many recompute improvements

I've lost track of them all, but it includes:

* Using the same key backend as was used in the original computation.
* Fixing bug that prevented updating the source file key in the compute
  state
* Handling --reproducible and --unreproducible.
* recompute --original of a file using VURL, when the result is
  different, but the key remains the same, makes the object file
  be updated with the new content
* Detecting some other ways the program behavior can change, just for
  completeness.
* Also adds --backend to addcomputed.
This commit is contained in:
Joey Hess 2025-02-27 15:12:29 -04:00
parent 1704b5e327
commit e6ae5e8d56
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 127 additions and 54 deletions

View file

@ -28,7 +28,7 @@ import qualified Data.Map as M
import Data.Time.Clock
cmd :: Command
cmd = notBareRepo $
cmd = notBareRepo $ withAnnexOptions [backendOption] $
command "addcomputed" SectionCommon "add computed files to annex"
(paramRepeating paramExpression)
(seek <$$> optParser)
@ -96,11 +96,22 @@ perform o r = do
Remote.Compute.runComputeProgram program state
(Remote.Compute.ImmutableState False)
(getInputContent fast)
(addComputed "adding" True r (reproducible o) Just fast)
(addComputed "adding" True r (reproducible o) chooseBackend Just fast)
next $ return True
addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Maybe OsPath) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex ()
addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir ts = do
addComputed
:: StringContainingQuotedPath
-> Bool
-> Remote
-> Maybe Reproducible
-> (OsPath -> Annex Backend)
-> (OsPath -> Maybe OsPath)
-> Bool
-> Remote.Compute.ComputeState
-> OsPath
-> NominalDiffTime
-> Annex ()
addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fast state tmpdir ts = do
let outputs = Remote.Compute.computeOutputs state
when (M.null outputs) $
giveup "The computation succeeded, but it did not generate any files."
@ -146,22 +157,24 @@ addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir
, contentLocation = outputfile'
, inodeCache = Nothing
}
genkey f p = do
backend <- choosebackend outputfile
fst <$> genKey (ks f) p backend
makelink f k = void $ makeLink f k Nothing
ingesthelper f p mk
| stagefiles = ingestwith $ do
k <- maybe (genkey f p) return mk
ingestAdd' p (Just (ld f)) (Just k)
| otherwise = ingestwith $ do
k <- maybe (genkey f p) return mk
mk' <- fst <$> ingest p (Just (ld f)) (Just k)
maybe noop (makelink f) mk'
return mk'
ingestwith a = a >>= \case
Nothing -> giveup "ingestion failed"
Just k -> do
logStatus NoLiveUpdate k InfoPresent
return k
genkey f p = do
backend <- chooseBackend outputfile
fst <$> genKey (ks f) p backend
makelink f k = void $ makeLink f k Nothing
ingesthelper f p mk
| stagefiles = ingestwith $
ingestAdd' p (Just (ld f)) mk
| otherwise = ingestwith $ do
mk' <- fst <$> ingest p (Just (ld f)) mk
maybe noop (makelink f) mk'
return mk'
ldc = LockDownConfig
{ lockingFile = True

View file

@ -14,10 +14,13 @@ import qualified Annex
import qualified Remote.Compute
import qualified Remote
import qualified Types.Remote as Remote
import Annex.Content
import Annex.CatFile
import Git.FilePath
import Logs.Location
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage)
import Types.Key
import qualified Data.Map as M
@ -62,7 +65,7 @@ seek' o = do
start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
start o (Just computeremote) si file key =
stopUnless (notElem (Remote.uuid computeremote) <$> loggedLocations key) $
stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $
start' o computeremote si file key
start o Nothing si file key = do
rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
@ -103,31 +106,73 @@ start' o r si file key =
-- explains the problem.
Nothing -> True
-- TODO When reproducible is not set, preserve the
-- reproducible/unreproducible of the input key.
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
perform o r file key origstate = do
perform o r file origkey origstate = do
program <- Remote.Compute.getComputeProgram r
fast <- Annex.getRead Annex.fast
reproducibleconfig <- getreproducibleconfig
showOutput
Remote.Compute.runComputeProgram program origstate
(Remote.Compute.ImmutableState True)
(getinputcontent program fast)
(addComputed "processing" False r (reproducible o) destfile fast)
(Remote.Compute.ImmutableState False)
(getinputcontent program)
(go program reproducibleconfig)
next $ return True
where
getinputcontent program fast p
go program reproducibleconfig state tmpdir ts = do
checkbehaviorchange program state
addComputed "processing" False r reproducibleconfig
choosebackend destfile state tmpdir ts
checkbehaviorchange program state = do
let check s w a b = forM_ (M.keys (w a)) $ \f ->
unless (M.member f (w b)) $
Remote.Compute.computationBehaviorChangeError program s f
check "not using input file"
Remote.Compute.computeInputs origstate state
check "outputting"
Remote.Compute.computeOutputs state origstate
check "not outputting"
Remote.Compute.computeOutputs origstate state
getinputcontent program p
| originalOption o =
case M.lookup p (Remote.Compute.computeInputs origstate) of
Just inputkey -> getInputContent' fast inputkey
Just inputkey -> getInputContent' False inputkey
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
Nothing -> Remote.Compute.computationBehaviorChangeError program
"requesting a new input file" p
| otherwise = getInputContent fast p
| otherwise = getInputContent False p
destfile outputfile
| Just outputfile == origfile = Just file
| otherwise = Nothing
origfile = headMaybe $ M.keys $ M.filter (== Just key)
origfile = headMaybe $ M.keys $ M.filter (== Just origkey)
(Remote.Compute.computeOutputs origstate)
origbackendvariety = fromKey keyVariety origkey
getreproducibleconfig = case reproducible o of
Just (Reproducible True) -> return (Just (Reproducible True))
-- A VURL key is used when the computation was
-- unreproducible. So recomputing should too, but that
-- will result in the same VURL key. Since moveAnnex
-- will prefer the current annex object to a new one,
-- delete the annex object first, so that if recomputing
-- generates a new version of the file, it replaces
-- the old version.
v -> case origbackendvariety of
VURLKey -> do
lockContentForRemoval origkey noop removeAnnex
-- in case computation fails or is interupted
logStatus NoLiveUpdate origkey InfoMissing
return (Just (Reproducible False))
_ -> return v
choosebackend _outputfile
-- Use the same backend as was used to compute it before,
-- so if the computed file is the same, there will be
-- no change.
| otherwise = maybeLookupBackendVariety origbackendvariety >>= \case
Just b -> return b
Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety