From d6a010a6155e545142b59ca54042b91a98430c68 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Feb 2025 15:51:31 -0400 Subject: [PATCH] recompute closer to working properly Proper behavior without --others implemented. And eliminated most of the code duplication through refactoring. Also, changed it to not stage recomputed files. This way, git diff will show files that have differences. --- Command/AddComputed.hs | 108 ++++++++++++++++++++++------------- Command/Recompute.hs | 78 ++++--------------------- doc/git-annex-recompute.mdwn | 8 +-- 3 files changed, 81 insertions(+), 113 deletions(-) diff --git a/Command/AddComputed.hs b/Command/AddComputed.hs index f27932405e..b2b55fb605 100644 --- a/Command/AddComputed.hs +++ b/Command/AddComputed.hs @@ -14,6 +14,7 @@ import qualified Git import qualified Annex import qualified Remote.Compute import qualified Types.Remote as Remote +import Backend import Annex.CatFile import Annex.Content.Presence import Annex.Ingest @@ -24,6 +25,7 @@ import Utility.Metered import Backend.URL (fromUrl) import qualified Data.Map as M +import Data.Time.Clock cmd :: Command cmd = notBareRepo $ @@ -94,73 +96,97 @@ perform o r = do Remote.Compute.runComputeProgram program state (Remote.Compute.ImmutableState False) (getInputContent fast) - (go fast) + (addComputed "adding" True r (reproducible o) (const True) fast) next $ return True + +addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Bool) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex () +addComputed addaction stagefiles r reproducibleconfig wantfile fast state tmpdir ts = do + let outputs = Remote.Compute.computeOutputs state + when (M.null outputs) $ + giveup "The computation succeeded, but it did not generate any files." + oks <- forM (M.keys outputs) $ \outputfile -> do + showAction $ addaction <> " " <> QuotedPath outputfile + k <- catchNonAsync (addfile outputfile) + (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err) + return (outputfile, Just k) + let state' = state + { Remote.Compute.computeOutputs = M.fromList oks + } + forM_ (mapMaybe snd oks) $ \k -> do + Remote.Compute.setComputeState + (Remote.remoteStateHandle r) + k ts state' + logChange NoLiveUpdate k (Remote.uuid r) InfoPresent where - go fast state tmpdir ts = do - let outputs = Remote.Compute.computeOutputs state - when (M.null outputs) $ - giveup "The computation succeeded, but it did not generate any files." - oks <- forM (M.keys outputs) $ \outputfile -> do - showAction $ "adding " <> QuotedPath outputfile - k <- catchNonAsync (addfile fast state tmpdir outputfile) - (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err) - return (outputfile, Just k) - let state' = state - { Remote.Compute.computeOutputs = M.fromList oks - } - forM_ (mapMaybe snd oks) $ \k -> do - Remote.Compute.setComputeState - (Remote.remoteStateHandle r) - k ts state' - logChange NoLiveUpdate k (Remote.uuid r) InfoPresent - - addfile fast state tmpdir outputfile + addfile outputfile | fast = do - addSymlink outputfile stateurlk Nothing + when (wantfile outputfile) $ + if stagefiles + then addSymlink outputfile stateurlk Nothing + else makelink stateurlk return stateurlk - | isreproducible state = do + | isreproducible = do sz <- liftIO $ getFileSize outputfile' metered Nothing sz Nothing $ \_ p -> - ingestwith $ ingestAdd p (Just ld) - | otherwise = ingestwith $ - ingestAdd' nullMeterUpdate (Just ld) (Just stateurlk) + if wantfile outputfile + then ingesthelper p Nothing + else genkey p + | otherwise = + if wantfile outputfile + then ingesthelper nullMeterUpdate + (Just stateurlk) + else return stateurlk where stateurl = Remote.Compute.computeStateUrl r state outputfile stateurlk = fromUrl stateurl Nothing True outputfile' = tmpdir outputfile - ld = LockedDown ldc $ KeySource - { keyFilename = outputfile - , contentLocation = outputfile' - , inodeCache = Nothing - } + ld = LockedDown ldc ks + ks = KeySource + { keyFilename = outputfile + , contentLocation = outputfile' + , inodeCache = Nothing + } ingestwith a = a >>= \case - Nothing -> giveup "key generation failed" + Nothing -> giveup "ingestion failed" Just k -> do logStatus NoLiveUpdate k InfoPresent return k - + genkey p = do + backend <- chooseBackend outputfile + fst <$> genKey ks p backend + makelink k = void $ makeLink outputfile k Nothing + ingesthelper p mk + | stagefiles = ingestwith $ + ingestAdd' p (Just ld) mk + | otherwise = ingestwith $ do + mk' <- fst <$> ingest p (Just ld) mk + maybe noop makelink mk' + return mk' + ldc = LockDownConfig { lockingFile = True , hardlinkFileTmpDir = Nothing , checkWritePerms = True } - isreproducible state = case reproducible o of + isreproducible = case reproducibleconfig of Just v -> isReproducible v Nothing -> Remote.Compute.computeReproducible state getInputContent :: Bool -> OsPath -> Annex (Key, Maybe OsPath) getInputContent fast p = catKeyFile p >>= \case - Just inputkey -> do - obj <- calcRepo (gitAnnexLocation inputkey) - if fast - then return (inputkey, Nothing) - else ifM (inAnnex inputkey) - ( return (inputkey, Just obj) - , giveup $ "The computation needs the content of a file which is not present: " ++ fromOsPath p - ) + Just inputkey -> getInputContent' fast inputkey (fromOsPath p) Nothing -> ifM (liftIO $ doesFileExist p) ( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p , giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p ) + +getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe OsPath) +getInputContent' fast inputkey filedesc = do + obj <- calcRepo (gitAnnexLocation inputkey) + if fast + then return (inputkey, Nothing) + else ifM (inAnnex inputkey) + ( return (inputkey, Just obj) + , giveup $ "The computation needs the content of a file which is not present: " ++ filedesc + ) diff --git a/Command/Recompute.hs b/Command/Recompute.hs index 42a313ee75..a5a82b7028 100644 --- a/Command/Recompute.hs +++ b/Command/Recompute.hs @@ -15,14 +15,9 @@ import qualified Remote.Compute import qualified Remote import qualified Types.Remote as Remote import Annex.CatFile -import Annex.Ingest import Git.FilePath -import Types.KeySource -import Messages.Progress import Logs.Location -import Utility.Metered -import Backend.URL (fromUrl) -import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent) +import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed) import qualified Data.Map as M @@ -111,81 +106,28 @@ start' o r si file key = -- recompute. This way, the user will see the -- computation fail, with an error message that -- explains the problem. - -- XXX check that this works well Nothing -> True perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform perform o r file key oldstate = do program <- Remote.Compute.getComputeProgram r - let recomputestate = oldstate - { Remote.Compute.computeInputs = mempty - , Remote.Compute.computeOutputs = mempty - } fast <- Annex.getRead Annex.fast showOutput - Remote.Compute.runComputeProgram program recomputestate - (Remote.Compute.ImmutableState False) + Remote.Compute.runComputeProgram program oldstate + (Remote.Compute.ImmutableState True) (getinputcontent program fast) - (go fast) + (addComputed "processing" False r (reproducible o) wantfile fast) next $ return True where getinputcontent program fast p - | originalOption o = + | originalOption o = case M.lookup p (Remote.Compute.computeInputs oldstate) of - Just inputkey -> return (inputkey, Nothing) + Just inputkey -> getInputContent' fast inputkey + (fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")") Nothing -> Remote.Compute.computationBehaviorChangeError program "requesting a new input file" p | otherwise = getInputContent fast p - go fast state tmpdir ts = do - let outputs = Remote.Compute.computeOutputs state - when (M.null outputs) $ - giveup "The computation succeeded, but it did not generate any files." - oks <- forM (M.keys outputs) $ \outputfile -> do - showAction $ "adding " <> QuotedPath outputfile - k <- catchNonAsync (addfile fast state tmpdir outputfile) - (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err) - return (outputfile, Just k) - let state' = state - { Remote.Compute.computeOutputs = M.fromList oks - } - forM_ (mapMaybe snd oks) $ \k -> do - Remote.Compute.setComputeState - (Remote.remoteStateHandle r) - k ts state' - logChange NoLiveUpdate k (Remote.uuid r) InfoPresent - - addfile fast state tmpdir outputfile - | fast = do - addSymlink outputfile stateurlk Nothing - return stateurlk - | isreproducible state = do - sz <- liftIO $ getFileSize outputfile' - metered Nothing sz Nothing $ \_ p -> - ingestwith $ ingestAdd p (Just ld) - | otherwise = ingestwith $ - ingestAdd' nullMeterUpdate (Just ld) (Just stateurlk) - where - stateurl = Remote.Compute.computeStateUrl r state outputfile - stateurlk = fromUrl stateurl Nothing True - outputfile' = tmpdir outputfile - ld = LockedDown ldc $ KeySource - { keyFilename = outputfile - , contentLocation = outputfile' - , inodeCache = Nothing - } - ingestwith a = a >>= \case - Nothing -> giveup "key generation failed" - Just k -> do - logStatus NoLiveUpdate k InfoPresent - return k - - ldc = LockDownConfig - { lockingFile = True - , hardlinkFileTmpDir = Nothing - , checkWritePerms = True - } - - isreproducible state = case reproducible o of - Just v -> isReproducible v - Nothing -> Remote.Compute.computeReproducible state + wantfile outputfile + | othersOption o = True + | otherwise = outputfile == file diff --git a/doc/git-annex-recompute.mdwn b/doc/git-annex-recompute.mdwn index 6e1a32f0d9..b5176285e7 100644 --- a/doc/git-annex-recompute.mdwn +++ b/doc/git-annex-recompute.mdwn @@ -11,12 +11,12 @@ git-annex recompute [path ...]` This updates computed files that were added with [[git-annex-addcomputed]](1). -When the output of the computation is different, the updated computed -file is staged in the repository. - By default, this only recomputes files whose input files have changed. The new contents of the input files are used to re-run the computation. +When the output of the computation is different, the computed file is +updated with the new content. + # OPTIONS * `--original` @@ -26,7 +26,7 @@ The new contents of the input files are used to re-run the computation. * `--others` When recomputing one file also generates new versions of other files, - stage those other files in the repository too. + update those other files too. * `--unreproducible`, `-u`