avoid unncessary git-annex branch changes for recompute and addcomputed

This commit is contained in:
Joey Hess 2025-03-06 12:41:30 -04:00
parent ccc454a791
commit c6c6e2632d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 96 additions and 55 deletions

View file

@ -20,6 +20,7 @@ import Backend
import Annex.CatFile import Annex.CatFile
import Annex.Content.Presence import Annex.Content.Presence
import Annex.Ingest import Annex.Ingest
import Annex.UUID
import Annex.GitShaKey import Annex.GitShaKey
import Types.KeySource import Types.KeySource
import Types.Key import Types.Key
@ -94,35 +95,35 @@ perform o r = do
, Remote.Compute.computeInputs = mempty , Remote.Compute.computeInputs = mempty
, Remote.Compute.computeOutputs = mempty , Remote.Compute.computeOutputs = mempty
, Remote.Compute.computeSubdir = subdir , Remote.Compute.computeSubdir = subdir
, Remote.Compute.computeReproducible = False
, Remote.Compute.computeInputsUnavailable = False
} }
fast <- Annex.getRead Annex.fast fast <- Annex.getRead Annex.fast
Remote.Compute.runComputeProgram program state Remote.Compute.runComputeProgram program state
(Remote.Compute.ImmutableState False) (Remote.Compute.ImmutableState False)
(getInputContent fast) (getInputContent fast)
Nothing Nothing
(addComputed "adding" True r (reproducible o) chooseBackend Just fast) (addComputed (Just "adding") True r (reproducible o) chooseBackend Just fast)
next $ return True next $ return True
addComputed addComputed
:: StringContainingQuotedPath :: Maybe StringContainingQuotedPath
-> Bool -> Bool
-> Remote -> Remote
-> Maybe Reproducible -> Maybe Reproducible
-> (OsPath -> Annex Backend) -> (OsPath -> Annex Backend)
-> (OsPath -> Maybe OsPath) -> (OsPath -> Maybe OsPath)
-> Bool -> Bool
-> Remote.Compute.ComputeState -> Remote.Compute.ComputeProgramResult
-> OsPath -> OsPath
-> NominalDiffTime -> NominalDiffTime
-> Annex () -> Annex ()
addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fast state tmpdir ts = do addComputed maddaction stagefiles r reproducibleconfig choosebackend destfile fast result tmpdir ts = do
let outputs = Remote.Compute.computeOutputs state
when (M.null outputs) $ when (M.null outputs) $
giveup "The computation succeeded, but it did not generate any files." giveup "The computation succeeded, but it did not generate any files."
oks <- forM (M.keys outputs) $ \outputfile -> do oks <- forM (M.keys outputs) $ \outputfile -> do
showAction $ addaction <> " " <> QuotedPath outputfile case maddaction of
Just addaction -> showAction $
addaction <> " " <> QuotedPath outputfile
Nothing -> noop
k <- catchNonAsync (addfile outputfile) k <- catchNonAsync (addfile outputfile)
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err) (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
return (outputfile, Just k) return (outputfile, Just k)
@ -133,8 +134,15 @@ addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fas
Remote.Compute.setComputeState Remote.Compute.setComputeState
(Remote.remoteStateHandle r) (Remote.remoteStateHandle r)
k ts state' k ts state'
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
let u = Remote.uuid r
unlessM (elem u <$> loggedLocations k) $
logChange NoLiveUpdate k u InfoPresent
where where
state = Remote.Compute.computeState result
outputs = Remote.Compute.computeOutputs state
addfile outputfile addfile outputfile
| fast = do | fast = do
case destfile outputfile of case destfile outputfile of
@ -179,7 +187,9 @@ addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fas
ingestwith a = a >>= \case ingestwith a = a >>= \case
Nothing -> giveup "ingestion failed" Nothing -> giveup "ingestion failed"
Just k -> do Just k -> do
logStatus NoLiveUpdate k InfoPresent u <- getUUID
unlessM (elem u <$> loggedLocations k) $
logStatus NoLiveUpdate k InfoPresent
when (fromKey keyVariety k == VURLKey) $ do when (fromKey keyVariety k == VURLKey) $ do
hb <- hashBackend hb <- hashBackend
void $ addEquivilantKey hb k void $ addEquivilantKey hb k
@ -194,7 +204,7 @@ addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fas
isreproducible = case reproducibleconfig of isreproducible = case reproducibleconfig of
Just v -> isReproducible v Just v -> isReproducible v
Nothing -> Remote.Compute.computeReproducible state Nothing -> Remote.Compute.computeReproducible result
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)) getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent fast p = catKeyFile p >>= \case getInputContent fast p = catKeyFile p >>= \case

View file

@ -15,6 +15,7 @@ import qualified Remote
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import qualified Git.Ref as Git import qualified Git.Ref as Git
import Annex.Content import Annex.Content
import Annex.UUID
import Annex.CatFile import Annex.CatFile
import Annex.GitShaKey import Annex.GitShaKey
import Git.FilePath import Git.FilePath
@ -131,12 +132,13 @@ perform o r file origkey origstate = do
(getinputcontent program) (getinputcontent program)
Nothing Nothing
(go program reproducibleconfig) (go program reproducibleconfig)
next $ return True next cleanup
where where
go program reproducibleconfig state tmpdir ts = do go program reproducibleconfig result tmpdir ts = do
checkbehaviorchange program state checkbehaviorchange program
addComputed "processing" False r reproducibleconfig (Remote.Compute.computeState result)
choosebackend destfile False state tmpdir ts addComputed Nothing False r reproducibleconfig
choosebackend destfile False result tmpdir ts
checkbehaviorchange program state = do checkbehaviorchange program state = do
let check s w a b = forM_ (M.keys (w a)) $ \f -> let check s w a b = forM_ (M.keys (w a)) $ \f ->
@ -168,6 +170,10 @@ perform o r file origkey origstate = do
origbackendvariety = fromKey keyVariety origkey origbackendvariety = fromKey keyVariety origkey
recomputingvurl = case origbackendvariety of
VURLKey -> True
_ -> False
getreproducibleconfig = case reproducible o of getreproducibleconfig = case reproducible o of
Just (Reproducible True) -> return (Just (Reproducible True)) Just (Reproducible True) -> return (Just (Reproducible True))
-- A VURL key is used when the computation was -- A VURL key is used when the computation was
@ -177,13 +183,22 @@ perform o r file origkey origstate = do
-- delete the annex object first, so that if recomputing -- delete the annex object first, so that if recomputing
-- generates a new version of the file, it replaces -- generates a new version of the file, it replaces
-- the old version. -- the old version.
v -> case origbackendvariety of v -> if recomputingvurl
VURLKey -> do then do
lockContentForRemoval origkey noop removeAnnex lockContentForRemoval origkey noop removeAnnex
-- in case computation fails or is interupted
logStatus NoLiveUpdate origkey InfoMissing
return (Just (Reproducible False)) return (Just (Reproducible False))
_ -> return v else return v
cleanup = do
case reproducible o of
Just (Reproducible True) -> noop
-- in case computation failed, update
-- location log for removal done earlier
_ -> when recomputingvurl $ do
u <- getUUID
unlessM (elem u <$> loggedLocations origkey) $
logStatus NoLiveUpdate origkey InfoMissing
return True
choosebackend _outputfile choosebackend _outputfile
-- Use the same backend as was used to compute it before, -- Use the same backend as was used to compute it before,

View file

@ -18,6 +18,7 @@ module Remote.Compute (
getComputeProgram, getComputeProgram,
runComputeProgram, runComputeProgram,
ImmutableState(..), ImmutableState(..),
ComputeProgramResult(..),
computationBehaviorChangeError, computationBehaviorChangeError,
defaultComputeParams, defaultComputeParams,
) where ) where
@ -222,8 +223,6 @@ data ComputeState = ComputeState
, computeInputs :: M.Map OsPath Key , computeInputs :: M.Map OsPath Key
, computeOutputs :: M.Map OsPath (Maybe Key) , computeOutputs :: M.Map OsPath (Maybe Key)
, computeSubdir :: OsPath , computeSubdir :: OsPath
, computeReproducible :: Bool
, computeInputsUnavailable :: Bool
} }
deriving (Show, Eq) deriving (Show, Eq)
@ -272,8 +271,6 @@ parseComputeState k b =
, computeInputs = mempty , computeInputs = mempty
, computeOutputs = mempty , computeOutputs = mempty
, computeSubdir = literalOsPath "." , computeSubdir = literalOsPath "."
, computeReproducible = False
, computeInputsUnavailable = False
} }
go :: ComputeState -> [QueryItem] -> ComputeState go :: ComputeState -> [QueryItem] -> ComputeState
@ -330,16 +327,23 @@ computeStateUrl r st p =
- The metadata fields are numbers (prefixed with "t" to make them legal - The metadata fields are numbers (prefixed with "t" to make them legal
- field names), which are estimates of how long it might take to run - field names), which are estimates of how long it might take to run
- the computation (in seconds). - the computation (in seconds).
-
- Avoids redundantly recording a ComputeState when the per remote metadata
- already contains it.
-} -}
setComputeState :: RemoteStateHandle -> Key -> NominalDiffTime -> ComputeState -> Annex () setComputeState :: RemoteStateHandle -> Key -> NominalDiffTime -> ComputeState -> Annex ()
setComputeState rs k ts st = addRemoteMetaData k rs $ MetaData $ M.singleton setComputeState rs k ts st = do
(mkMetaFieldUnchecked $ T.pack ('t':show (truncateResolution 1 ts))) l <- map snd <$> getComputeStatesUnsorted rs k
(S.singleton (MetaValue (CurrentlySet True) (formatComputeState k st))) unless (st `elem` l) go
where
go = addRemoteMetaData k rs $ MetaData $ M.singleton
(mkMetaFieldUnchecked $ T.pack ('t':show (truncateResolution 1 ts)))
(S.singleton (MetaValue (CurrentlySet True) (formatComputeState k st)))
{- When multiple ComputeStates have been recorded for the same key, {- When multiple ComputeStates have been recorded for the same key,
- this returns one that is probably less expensive to compute, - this returns one that is probably less expensive to compute,
- based on the original time it took to compute it. -} - based on the original time it took to compute it. -}
getComputeState:: RemoteStateHandle -> Key -> Annex (Maybe ComputeState) getComputeState :: RemoteStateHandle -> Key -> Annex (Maybe ComputeState)
getComputeState rs k = headMaybe . map snd . sortOn fst getComputeState rs k = headMaybe . map snd . sortOn fst
<$> getComputeStatesUnsorted rs k <$> getComputeStatesUnsorted rs k
@ -372,6 +376,12 @@ computeProgramEnvironment st = do
newtype ImmutableState = ImmutableState Bool newtype ImmutableState = ImmutableState Bool
data ComputeProgramResult = ComputeProgramResult
{ computeState :: ComputeState
, computeInputsUnavailable :: Bool
, computeReproducible :: Bool
}
runComputeProgram runComputeProgram
:: ComputeProgram :: ComputeProgram
-> ComputeState -> ComputeState
@ -381,7 +391,7 @@ runComputeProgram
-- content is not available -- content is not available
-> Maybe (Key, MeterUpdate) -> Maybe (Key, MeterUpdate)
-- ^ update meter for this key -- ^ update meter for this key
-> (ComputeState -> OsPath -> NominalDiffTime -> Annex v) -> (ComputeProgramResult -> OsPath -> NominalDiffTime -> Annex v)
-> Annex v -> Annex v
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent meterkey cont = runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent meterkey cont =
withOtherTmp $ \othertmpdir -> withOtherTmp $ \othertmpdir ->
@ -398,12 +408,13 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
} }
showOutput showOutput
starttime <- liftIO currentMonotonicTimestamp starttime <- liftIO currentMonotonicTimestamp
state' <- withmeterfile $ \meterfile -> bracket let startresult = ComputeProgramResult state False False
result <- withmeterfile $ \meterfile -> bracket
(liftIO $ createProcess pr) (liftIO $ createProcess pr)
(liftIO . cleanupProcess) (liftIO . cleanupProcess)
(getinput tmpdir subdir state meterfile) (getinput tmpdir subdir startresult meterfile)
endtime <- liftIO currentMonotonicTimestamp endtime <- liftIO currentMonotonicTimestamp
cont state' subdir (calcduration starttime endtime) cont result subdir (calcduration starttime endtime)
getsubdir tmpdir = do getsubdir tmpdir = do
let subdir = tmpdir </> computeSubdir state let subdir = tmpdir </> computeSubdir state
@ -415,24 +426,25 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
, return tmpdir , return tmpdir
) )
getinput tmpdir subdir state' meterfile p = getinput tmpdir subdir result meterfile p =
liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case
Just l Just l
| null l -> getinput tmpdir subdir state' meterfile p | null l -> getinput tmpdir subdir result meterfile p
| otherwise -> do | otherwise -> do
state'' <- parseoutput p tmpdir subdir state' meterfile l result' <- parseoutput p tmpdir subdir result meterfile l
getinput tmpdir subdir state'' meterfile p getinput tmpdir subdir result' meterfile p
Nothing -> do Nothing -> do
liftIO $ hClose (stdoutHandle p) liftIO $ hClose (stdoutHandle p)
liftIO $ hClose (stdinHandle p) liftIO $ hClose (stdinHandle p)
unlessM (liftIO $ checkSuccessProcess (processHandle p)) $ unlessM (liftIO $ checkSuccessProcess (processHandle p)) $
giveup $ program ++ " exited unsuccessfully" giveup $ program ++ " exited unsuccessfully"
return state' return result
parseoutput p tmpdir subdir state' meterfile l = case Proto.parseMessage l of parseoutput p tmpdir subdir result meterfile l = case Proto.parseMessage l of
Just (ProcessInput f) -> do Just (ProcessInput f) -> do
let f' = toOsPath f let f' = toOsPath f
let knowninput = M.member f' (computeInputs state') let knowninput = M.member f'
(computeInputs (computeState result))
checksafefile tmpdir subdir f' "input" checksafefile tmpdir subdir f' "input"
checkimmutable knowninput "inputting" f' $ do checkimmutable knowninput "inputting" f' $ do
(k, inputcontent) <- getinputcontent f' (k, inputcontent) <- getinputcontent f'
@ -446,21 +458,21 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
liftIO $ hPutStrLn (stdinHandle p) $ liftIO $ hPutStrLn (stdinHandle p) $
maybe "" fromOsPath mp maybe "" fromOsPath mp
liftIO $ hFlush (stdinHandle p) liftIO $ hFlush (stdinHandle p)
let state'' = state' let result' = result
{ computeInputsUnavailable = { computeInputsUnavailable =
isNothing mp || computeInputsUnavailable state' isNothing mp || computeInputsUnavailable result
} }
return $ if immutablestate return $ if immutablestate
then state'' then result'
else state'' else modresultstate result' $ \s -> s
{ computeInputs = { computeInputs =
M.insert f' k M.insert f' k
(computeInputs state') (computeInputs s)
} }
Just (ProcessOutput f) -> do Just (ProcessOutput f) -> do
let f' = toOsPath f let f' = toOsPath f
checksafefile tmpdir subdir f' "output" checksafefile tmpdir subdir f' "output"
knownoutput <- case M.lookup f' (computeOutputs state') of knownoutput <- case M.lookup f' (computeOutputs $ computeState result) of
Nothing -> return False Nothing -> return False
Just mk -> do Just mk -> do
when (mk == fmap fst meterkey) $ when (mk == fmap fst meterkey) $
@ -468,20 +480,23 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
return True return True
checkimmutable knownoutput "outputting" f' $ checkimmutable knownoutput "outputting" f' $
return $ if immutablestate return $ if immutablestate
then state' then result
else state' else modresultstate result $ \s -> s
{ computeOutputs = { computeOutputs =
M.insert f' Nothing M.insert f' Nothing
(computeOutputs state') (computeOutputs s)
} }
Just (ProcessProgress percent) -> do Just (ProcessProgress percent) -> do
liftIO $ updatepercent percent liftIO $ updatepercent percent
return state' return result
Just ProcessReproducible -> Just ProcessReproducible ->
return $ state' { computeReproducible = True } return $ result { computeReproducible = True }
Nothing -> giveup $ Nothing -> giveup $
program ++ " output an unparseable line: \"" ++ l ++ "\"" program ++ " output an unparseable line: \"" ++ l ++ "\""
modresultstate result f =
result { computeState = f (computeState result) }
checksafefile tmpdir subdir f fileaction = do checksafefile tmpdir subdir f fileaction = do
let err problem = giveup $ let err problem = giveup $
program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f
@ -596,10 +611,11 @@ computeKey rs (ComputeProgram program) k _af dest meterupdate vc =
(keyfile : _) -> Just keyfile (keyfile : _) -> Just keyfile
[] -> Nothing [] -> Nothing
postcompute keyfile state tmpdir _ts postcompute keyfile result tmpdir _ts
| computeInputsUnavailable state = | computeInputsUnavailable result =
giveup "Input file(s) unavailable." giveup "Input file(s) unavailable."
| otherwise = postcompute' keyfile state tmpdir | otherwise =
postcompute' keyfile (computeState result) tmpdir
postcompute' keyfile state tmpdir = do postcompute' keyfile state tmpdir = do
hb <- hashBackend hb <- hashBackend