addcomputed --fast and --unreproducible working

For these, use VURL and URL keys, with an "annex-compute:" URI prefix.

These URL keys will look something like this:

	URL--annex-compute&cbar4,63pconvert,3-f4d3d72cf3f16ac9c3e9a8012bde4462

Generally it's too long so most of it gets md5summed. It's a little
ugly, but it's what fell out of the existing URL key generation
machinery. I did consider special casing to eg
"URL--annex-compute&c4d3d72cf3f16ac9c3e9a8012bde4462". But it seems at
least possibly useful that the name of the file that was computed is
visible and perhaps one or two words of the git-annex compute command
parameters.

Note that two different output files from the same computation will get
the same URL key. And these keys should remain stable.
This commit is contained in:
Joey Hess 2025-02-25 16:36:22 -04:00
parent a154e91513
commit 16f529c05f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 50 additions and 28 deletions

View file

@ -20,8 +20,9 @@ import Annex.Ingest
import Types.RemoteConfig import Types.RemoteConfig
import Types.KeySource import Types.KeySource
import Messages.Progress import Messages.Progress
import Utility.MonotonicClock
import Logs.Location import Logs.Location
import Utility.MonotonicClock
import Backend.URL (fromUrl)
import qualified Data.Map as M import qualified Data.Map as M
import Data.Time.Clock import Data.Time.Clock
@ -42,19 +43,19 @@ optParser :: CmdParamsDesc -> Parser AddComputedOptions
optParser desc = AddComputedOptions optParser desc = AddComputedOptions
<$> cmdParams desc <$> cmdParams desc
<*> (mkParseRemoteOption <$> parseToOption) <*> (mkParseRemoteOption <$> parseToOption)
<*> (fromMaybe Unreproducible <$> parseReproducible) <*> (fromMaybe (Reproducible False) <$> parseReproducible)
data Reproducible = Reproducible | Unreproducible newtype Reproducible = Reproducible { isReproducible :: Bool }
parseReproducible :: Parser (Maybe Reproducible) parseReproducible :: Parser (Maybe Reproducible)
parseReproducible = r <|> unr parseReproducible = r <|> unr
where where
r = flag Nothing (Just Reproducible) r = flag Nothing (Just (Reproducible True))
( long "reproducible" ( long "reproducible"
<> short 'r' <> short 'r'
<> help "computation is fully reproducible" <> help "computation is fully reproducible"
) )
unr = flag Nothing (Just Unreproducible) unr = flag Nothing (Just (Reproducible False))
( long "unreproducible" ( long "unreproducible"
<> short 'u' <> short 'u'
<> help "computation is not fully reproducible" <> help "computation is not fully reproducible"
@ -90,17 +91,14 @@ perform o r program = do
, Remote.Compute.computeInputs = mempty , Remote.Compute.computeInputs = mempty
, Remote.Compute.computeOutputs = mempty , Remote.Compute.computeOutputs = mempty
, Remote.Compute.computeSubdir = subdir , Remote.Compute.computeSubdir = subdir
, Remote.Compute.computeReproducible = , Remote.Compute.computeReproducible = isreproducible
case reproducible o of
Reproducible -> True
Unreproducible -> False
} }
fast <- Annex.getRead Annex.fast fast <- Annex.getRead Annex.fast
starttime <- liftIO currentMonotonicTimestamp starttime <- liftIO currentMonotonicTimestamp
Remote.Compute.runComputeProgram program state Remote.Compute.runComputeProgram program state
(Remote.Compute.ImmutableState False) (Remote.Compute.ImmutableState False)
(getinputcontent fast) (getinputcontent fast)
(go starttime) (go starttime fast)
next $ return True next $ return True
where where
getinputcontent fast p = catKeyFile p >>= \case getinputcontent fast p = catKeyFile p >>= \case
@ -117,7 +115,7 @@ perform o r program = do
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p , giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
) )
go starttime state tmpdir = do go starttime fast state tmpdir = do
endtime <- liftIO currentMonotonicTimestamp endtime <- liftIO currentMonotonicTimestamp
let ts = calcduration starttime endtime let ts = calcduration starttime endtime
let outputs = Remote.Compute.computeOutputs state let outputs = Remote.Compute.computeOutputs state
@ -125,7 +123,7 @@ perform o r program = do
giveup "The computation succeeded, but it did not generate any files." giveup "The computation succeeded, but it did not generate any files."
oks <- forM (M.keys outputs) $ \outputfile -> do oks <- forM (M.keys outputs) $ \outputfile -> do
showAction $ "adding " <> QuotedPath outputfile showAction $ "adding " <> QuotedPath outputfile
k <- catchNonAsync (addfile tmpdir outputfile) k <- catchNonAsync (addfile fast state tmpdir outputfile)
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err) (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
return (outputfile, Just k) return (outputfile, Just k)
let state' = state let state' = state
@ -137,24 +135,32 @@ perform o r program = do
k ts state' k ts state'
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
addfile tmpdir outputfile = do addfile fast state tmpdir outputfile
let outputfile' = tmpdir </> outputfile | fast || not isreproducible = do
let ld = LockedDown ldc $ KeySource let stateurl = Remote.Compute.computeStateUrl state outputfile
{ keyFilename = outputfile let k = fromUrl stateurl Nothing isreproducible
, contentLocation = outputfile' addSymlink outputfile k Nothing
, inodeCache = Nothing return k
} | otherwise = do
sz <- liftIO $ getFileSize outputfile' let outputfile' = tmpdir </> outputfile
metered Nothing sz Nothing $ \_ p -> let ld = LockedDown ldc $ KeySource
ingestAdd p (Just ld) >>= \case { keyFilename = outputfile
Nothing -> giveup "key generation failed" , contentLocation = outputfile'
Just k -> return k , inodeCache = Nothing
}
sz <- liftIO $ getFileSize outputfile'
metered Nothing sz Nothing $ \_ p ->
ingestAdd p (Just ld) >>= \case
Nothing -> giveup "key generation failed"
Just k -> return k
ldc = LockDownConfig ldc = LockDownConfig
{ lockingFile = True { lockingFile = True
, hardlinkFileTmpDir = Nothing , hardlinkFileTmpDir = Nothing
, checkWritePerms = True , checkWritePerms = True
} }
calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) = calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) =
fromIntegral (endtime - starttime) :: NominalDiffTime fromIntegral (endtime - starttime) :: NominalDiffTime
isreproducible = isReproducible (reproducible o)

View file

@ -12,6 +12,7 @@ module Remote.Compute (
ComputeState(..), ComputeState(..),
setComputeState, setComputeState,
getComputeStates, getComputeStates,
computeStateUrl,
ComputeProgram, ComputeProgram,
getComputeProgram, getComputeProgram,
runComputeProgram, runComputeProgram,
@ -36,6 +37,7 @@ import Utility.Metered
import Utility.TimeStamp import Utility.TimeStamp
import Utility.Env import Utility.Env
import Utility.Tmp.Dir import Utility.Tmp.Dir
import Utility.Url
import qualified Git import qualified Git
import qualified Utility.SimpleProtocol as Proto import qualified Utility.SimpleProtocol as Proto
@ -190,7 +192,10 @@ data ComputeState = ComputeState
- and computeOutputs are sorted in ascending order for stability. - and computeOutputs are sorted in ascending order for stability.
-} -}
formatComputeState :: Key -> ComputeState -> B.ByteString formatComputeState :: Key -> ComputeState -> B.ByteString
formatComputeState k st = renderQuery False $ concat formatComputeState k = formatComputeState' (Just k)
formatComputeState' :: Maybe Key -> ComputeState -> B.ByteString
formatComputeState' mk st = renderQuery False $ concat
[ map formatparam (computeParams st) [ map formatparam (computeParams st)
, map formatinput (M.toAscList (computeInputs st)) , map formatinput (M.toAscList (computeInputs st))
, mapMaybe formatoutput (M.toAscList (computeOutputs st)) , mapMaybe formatoutput (M.toAscList (computeOutputs st))
@ -202,7 +207,7 @@ formatComputeState k st = renderQuery False $ concat
("i" <> fromOsPath file, Just (serializeKey' key)) ("i" <> fromOsPath file, Just (serializeKey' key))
formatoutput (file, (Just key)) = Just $ formatoutput (file, (Just key)) = Just $
("o" <> fromOsPath file, ("o" <> fromOsPath file,
if key == k if Just key == mk
then Nothing then Nothing
else Just (serializeKey' key) else Just (serializeKey' key)
) )
@ -251,6 +256,17 @@ parseComputeState k b =
_ -> Nothing _ -> Nothing
in go c' rest in go c' rest
{- A compute: url for a given output file of a computation. -}
computeStateUrl :: ComputeState -> OsPath -> URLString
computeStateUrl st p =
"annex-compute:" ++ fromOsPath p ++ "?"
++ decodeBS (formatComputeState' Nothing st')
where
-- Omit computeOutputs, so this gives the same result whether
-- it's called on a ComputeState with the computeOutputs
-- Keys populated or not.
st' = st { computeOutputs = mempty }
{- The per remote metadata is used to store ComputeState. This allows {- The per remote metadata is used to store ComputeState. This allows
- recording multiple ComputeStates that generate the same key. - recording multiple ComputeStates that generate the same key.
- -

View file

@ -54,7 +54,7 @@ Some examples of how this might look:
* `--fast` * `--fast`
Adds computed files to the repository, without generating their content Adds computed files to the repository, without generating their content
yet. yet.
* `--unreproducible`, `-u` * `--unreproducible`, `-u`