recompute closer to working properly

Proper behavior without --others implemented.

And eliminated most of the code duplication through refactoring.

Also, changed it to not stage recomputed files. This way, git diff will
show files that have differences.
This commit is contained in:
Joey Hess 2025-02-26 15:51:31 -04:00
parent 53d107ca47
commit d6a010a615
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 81 additions and 113 deletions

View file

@ -14,6 +14,7 @@ import qualified Git
import qualified Annex
import qualified Remote.Compute
import qualified Types.Remote as Remote
import Backend
import Annex.CatFile
import Annex.Content.Presence
import Annex.Ingest
@ -24,6 +25,7 @@ import Utility.Metered
import Backend.URL (fromUrl)
import qualified Data.Map as M
import Data.Time.Clock
cmd :: Command
cmd = notBareRepo $
@ -94,16 +96,17 @@ perform o r = do
Remote.Compute.runComputeProgram program state
(Remote.Compute.ImmutableState False)
(getInputContent fast)
(go fast)
(addComputed "adding" True r (reproducible o) (const True) fast)
next $ return True
where
go fast state tmpdir ts = do
addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Bool) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex ()
addComputed addaction stagefiles r reproducibleconfig wantfile fast state tmpdir ts = do
let outputs = Remote.Compute.computeOutputs state
when (M.null outputs) $
giveup "The computation succeeded, but it did not generate any files."
oks <- forM (M.keys outputs) $ \outputfile -> do
showAction $ "adding " <> QuotedPath outputfile
k <- catchNonAsync (addfile fast state tmpdir outputfile)
showAction $ addaction <> " " <> QuotedPath outputfile
k <- catchNonAsync (addfile outputfile)
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
return (outputfile, Just k)
let state' = state
@ -114,31 +117,51 @@ perform o r = do
(Remote.remoteStateHandle r)
k ts state'
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
addfile fast state tmpdir outputfile
where
addfile outputfile
| fast = do
addSymlink outputfile stateurlk Nothing
when (wantfile outputfile) $
if stagefiles
then addSymlink outputfile stateurlk Nothing
else makelink stateurlk
return stateurlk
| isreproducible state = do
| isreproducible = do
sz <- liftIO $ getFileSize outputfile'
metered Nothing sz Nothing $ \_ p ->
ingestwith $ ingestAdd p (Just ld)
| otherwise = ingestwith $
ingestAdd' nullMeterUpdate (Just ld) (Just stateurlk)
if wantfile outputfile
then ingesthelper p Nothing
else genkey p
| otherwise =
if wantfile outputfile
then ingesthelper nullMeterUpdate
(Just stateurlk)
else return stateurlk
where
stateurl = Remote.Compute.computeStateUrl r state outputfile
stateurlk = fromUrl stateurl Nothing True
outputfile' = tmpdir </> outputfile
ld = LockedDown ldc $ KeySource
ld = LockedDown ldc ks
ks = KeySource
{ keyFilename = outputfile
, contentLocation = outputfile'
, inodeCache = Nothing
}
ingestwith a = a >>= \case
Nothing -> giveup "key generation failed"
Nothing -> giveup "ingestion failed"
Just k -> do
logStatus NoLiveUpdate k InfoPresent
return k
genkey p = do
backend <- chooseBackend outputfile
fst <$> genKey ks p backend
makelink k = void $ makeLink outputfile k Nothing
ingesthelper p mk
| stagefiles = ingestwith $
ingestAdd' p (Just ld) mk
| otherwise = ingestwith $ do
mk' <- fst <$> ingest p (Just ld) mk
maybe noop makelink mk'
return mk'
ldc = LockDownConfig
{ lockingFile = True
@ -146,21 +169,24 @@ perform o r = do
, checkWritePerms = True
}
isreproducible state = case reproducible o of
isreproducible = case reproducibleconfig of
Just v -> isReproducible v
Nothing -> Remote.Compute.computeReproducible state
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe OsPath)
getInputContent fast p = catKeyFile p >>= \case
Just inputkey -> do
Just inputkey -> getInputContent' fast inputkey (fromOsPath p)
Nothing -> ifM (liftIO $ doesFileExist p)
( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
)
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe OsPath)
getInputContent' fast inputkey filedesc = do
obj <- calcRepo (gitAnnexLocation inputkey)
if fast
then return (inputkey, Nothing)
else ifM (inAnnex inputkey)
( return (inputkey, Just obj)
, giveup $ "The computation needs the content of a file which is not present: " ++ fromOsPath p
)
Nothing -> ifM (liftIO $ doesFileExist p)
( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
, giveup $ "The computation needs the content of a file which is not present: " ++ filedesc
)

View file

@ -15,14 +15,9 @@ import qualified Remote.Compute
import qualified Remote
import qualified Types.Remote as Remote
import Annex.CatFile
import Annex.Ingest
import Git.FilePath
import Types.KeySource
import Messages.Progress
import Logs.Location
import Utility.Metered
import Backend.URL (fromUrl)
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent)
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
import qualified Data.Map as M
@ -111,81 +106,28 @@ start' o r si file key =
-- recompute. This way, the user will see the
-- computation fail, with an error message that
-- explains the problem.
-- XXX check that this works well
Nothing -> True
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
perform o r file key oldstate = do
program <- Remote.Compute.getComputeProgram r
let recomputestate = oldstate
{ Remote.Compute.computeInputs = mempty
, Remote.Compute.computeOutputs = mempty
}
fast <- Annex.getRead Annex.fast
showOutput
Remote.Compute.runComputeProgram program recomputestate
(Remote.Compute.ImmutableState False)
Remote.Compute.runComputeProgram program oldstate
(Remote.Compute.ImmutableState True)
(getinputcontent program fast)
(go fast)
(addComputed "processing" False r (reproducible o) wantfile fast)
next $ return True
where
getinputcontent program fast p
| originalOption o =
case M.lookup p (Remote.Compute.computeInputs oldstate) of
Just inputkey -> return (inputkey, Nothing)
Just inputkey -> getInputContent' fast inputkey
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
Nothing -> Remote.Compute.computationBehaviorChangeError program
"requesting a new input file" p
| otherwise = getInputContent fast p
go fast state tmpdir ts = do
let outputs = Remote.Compute.computeOutputs state
when (M.null outputs) $
giveup "The computation succeeded, but it did not generate any files."
oks <- forM (M.keys outputs) $ \outputfile -> do
showAction $ "adding " <> QuotedPath outputfile
k <- catchNonAsync (addfile fast state tmpdir outputfile)
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
return (outputfile, Just k)
let state' = state
{ Remote.Compute.computeOutputs = M.fromList oks
}
forM_ (mapMaybe snd oks) $ \k -> do
Remote.Compute.setComputeState
(Remote.remoteStateHandle r)
k ts state'
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
addfile fast state tmpdir outputfile
| fast = do
addSymlink outputfile stateurlk Nothing
return stateurlk
| isreproducible state = do
sz <- liftIO $ getFileSize outputfile'
metered Nothing sz Nothing $ \_ p ->
ingestwith $ ingestAdd p (Just ld)
| otherwise = ingestwith $
ingestAdd' nullMeterUpdate (Just ld) (Just stateurlk)
where
stateurl = Remote.Compute.computeStateUrl r state outputfile
stateurlk = fromUrl stateurl Nothing True
outputfile' = tmpdir </> outputfile
ld = LockedDown ldc $ KeySource
{ keyFilename = outputfile
, contentLocation = outputfile'
, inodeCache = Nothing
}
ingestwith a = a >>= \case
Nothing -> giveup "key generation failed"
Just k -> do
logStatus NoLiveUpdate k InfoPresent
return k
ldc = LockDownConfig
{ lockingFile = True
, hardlinkFileTmpDir = Nothing
, checkWritePerms = True
}
isreproducible state = case reproducible o of
Just v -> isReproducible v
Nothing -> Remote.Compute.computeReproducible state
wantfile outputfile
| othersOption o = True
| otherwise = outputfile == file

View file

@ -11,12 +11,12 @@ git-annex recompute [path ...]`
This updates computed files that were added with
[[git-annex-addcomputed]](1).
When the output of the computation is different, the updated computed
file is staged in the repository.
By default, this only recomputes files whose input files have changed.
The new contents of the input files are used to re-run the computation.
When the output of the computation is different, the computed file is
updated with the new content.
# OPTIONS
* `--original`
@ -26,7 +26,7 @@ The new contents of the input files are used to re-run the computation.
* `--others`
When recomputing one file also generates new versions of other files,
stage those other files in the repository too.
update those other files too.
* `--unreproducible`, `-u`