many recompute improvements
I've lost track of them all, but it includes: * Using the same key backend as was used in the original computation. * Fixing bug that prevented updating the source file key in the compute state * Handling --reproducible and --unreproducible. * recompute --original of a file using VURL, when the result is different, but the key remains the same, makes the object file be updated with the new content * Detecting some other ways the program behavior can change, just for completeness. * Also adds --backend to addcomputed.
This commit is contained in:
parent
1704b5e327
commit
e6ae5e8d56
6 changed files with 127 additions and 54 deletions
|
@ -28,7 +28,7 @@ import qualified Data.Map as M
|
||||||
import Data.Time.Clock
|
import Data.Time.Clock
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
cmd = notBareRepo $
|
cmd = notBareRepo $ withAnnexOptions [backendOption] $
|
||||||
command "addcomputed" SectionCommon "add computed files to annex"
|
command "addcomputed" SectionCommon "add computed files to annex"
|
||||||
(paramRepeating paramExpression)
|
(paramRepeating paramExpression)
|
||||||
(seek <$$> optParser)
|
(seek <$$> optParser)
|
||||||
|
@ -96,11 +96,22 @@ perform o r = do
|
||||||
Remote.Compute.runComputeProgram program state
|
Remote.Compute.runComputeProgram program state
|
||||||
(Remote.Compute.ImmutableState False)
|
(Remote.Compute.ImmutableState False)
|
||||||
(getInputContent fast)
|
(getInputContent fast)
|
||||||
(addComputed "adding" True r (reproducible o) Just fast)
|
(addComputed "adding" True r (reproducible o) chooseBackend Just fast)
|
||||||
next $ return True
|
next $ return True
|
||||||
|
|
||||||
addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Maybe OsPath) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex ()
|
addComputed
|
||||||
addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir ts = do
|
:: StringContainingQuotedPath
|
||||||
|
-> Bool
|
||||||
|
-> Remote
|
||||||
|
-> Maybe Reproducible
|
||||||
|
-> (OsPath -> Annex Backend)
|
||||||
|
-> (OsPath -> Maybe OsPath)
|
||||||
|
-> Bool
|
||||||
|
-> Remote.Compute.ComputeState
|
||||||
|
-> OsPath
|
||||||
|
-> NominalDiffTime
|
||||||
|
-> Annex ()
|
||||||
|
addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fast state tmpdir ts = do
|
||||||
let outputs = Remote.Compute.computeOutputs state
|
let outputs = Remote.Compute.computeOutputs state
|
||||||
when (M.null outputs) $
|
when (M.null outputs) $
|
||||||
giveup "The computation succeeded, but it did not generate any files."
|
giveup "The computation succeeded, but it did not generate any files."
|
||||||
|
@ -146,22 +157,24 @@ addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir
|
||||||
, contentLocation = outputfile'
|
, contentLocation = outputfile'
|
||||||
, inodeCache = Nothing
|
, inodeCache = Nothing
|
||||||
}
|
}
|
||||||
|
genkey f p = do
|
||||||
|
backend <- choosebackend outputfile
|
||||||
|
fst <$> genKey (ks f) p backend
|
||||||
|
makelink f k = void $ makeLink f k Nothing
|
||||||
|
ingesthelper f p mk
|
||||||
|
| stagefiles = ingestwith $ do
|
||||||
|
k <- maybe (genkey f p) return mk
|
||||||
|
ingestAdd' p (Just (ld f)) (Just k)
|
||||||
|
| otherwise = ingestwith $ do
|
||||||
|
k <- maybe (genkey f p) return mk
|
||||||
|
mk' <- fst <$> ingest p (Just (ld f)) (Just k)
|
||||||
|
maybe noop (makelink f) mk'
|
||||||
|
return mk'
|
||||||
ingestwith a = a >>= \case
|
ingestwith a = a >>= \case
|
||||||
Nothing -> giveup "ingestion failed"
|
Nothing -> giveup "ingestion failed"
|
||||||
Just k -> do
|
Just k -> do
|
||||||
logStatus NoLiveUpdate k InfoPresent
|
logStatus NoLiveUpdate k InfoPresent
|
||||||
return k
|
return k
|
||||||
genkey f p = do
|
|
||||||
backend <- chooseBackend outputfile
|
|
||||||
fst <$> genKey (ks f) p backend
|
|
||||||
makelink f k = void $ makeLink f k Nothing
|
|
||||||
ingesthelper f p mk
|
|
||||||
| stagefiles = ingestwith $
|
|
||||||
ingestAdd' p (Just (ld f)) mk
|
|
||||||
| otherwise = ingestwith $ do
|
|
||||||
mk' <- fst <$> ingest p (Just (ld f)) mk
|
|
||||||
maybe noop (makelink f) mk'
|
|
||||||
return mk'
|
|
||||||
|
|
||||||
ldc = LockDownConfig
|
ldc = LockDownConfig
|
||||||
{ lockingFile = True
|
{ lockingFile = True
|
||||||
|
|
|
@ -14,10 +14,13 @@ import qualified Annex
|
||||||
import qualified Remote.Compute
|
import qualified Remote.Compute
|
||||||
import qualified Remote
|
import qualified Remote
|
||||||
import qualified Types.Remote as Remote
|
import qualified Types.Remote as Remote
|
||||||
|
import Annex.Content
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
|
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
|
||||||
|
import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage)
|
||||||
|
import Types.Key
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
|
@ -62,7 +65,7 @@ seek' o = do
|
||||||
|
|
||||||
start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
|
start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
|
||||||
start o (Just computeremote) si file key =
|
start o (Just computeremote) si file key =
|
||||||
stopUnless (notElem (Remote.uuid computeremote) <$> loggedLocations key) $
|
stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $
|
||||||
start' o computeremote si file key
|
start' o computeremote si file key
|
||||||
start o Nothing si file key = do
|
start o Nothing si file key = do
|
||||||
rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
|
rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
|
||||||
|
@ -103,31 +106,73 @@ start' o r si file key =
|
||||||
-- explains the problem.
|
-- explains the problem.
|
||||||
Nothing -> True
|
Nothing -> True
|
||||||
|
|
||||||
-- TODO When reproducible is not set, preserve the
|
|
||||||
-- reproducible/unreproducible of the input key.
|
|
||||||
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
|
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
|
||||||
perform o r file key origstate = do
|
perform o r file origkey origstate = do
|
||||||
program <- Remote.Compute.getComputeProgram r
|
program <- Remote.Compute.getComputeProgram r
|
||||||
fast <- Annex.getRead Annex.fast
|
reproducibleconfig <- getreproducibleconfig
|
||||||
showOutput
|
showOutput
|
||||||
Remote.Compute.runComputeProgram program origstate
|
Remote.Compute.runComputeProgram program origstate
|
||||||
(Remote.Compute.ImmutableState True)
|
(Remote.Compute.ImmutableState False)
|
||||||
(getinputcontent program fast)
|
(getinputcontent program)
|
||||||
(addComputed "processing" False r (reproducible o) destfile fast)
|
(go program reproducibleconfig)
|
||||||
next $ return True
|
next $ return True
|
||||||
where
|
where
|
||||||
getinputcontent program fast p
|
go program reproducibleconfig state tmpdir ts = do
|
||||||
|
checkbehaviorchange program state
|
||||||
|
addComputed "processing" False r reproducibleconfig
|
||||||
|
choosebackend destfile state tmpdir ts
|
||||||
|
|
||||||
|
checkbehaviorchange program state = do
|
||||||
|
let check s w a b = forM_ (M.keys (w a)) $ \f ->
|
||||||
|
unless (M.member f (w b)) $
|
||||||
|
Remote.Compute.computationBehaviorChangeError program s f
|
||||||
|
|
||||||
|
check "not using input file"
|
||||||
|
Remote.Compute.computeInputs origstate state
|
||||||
|
check "outputting"
|
||||||
|
Remote.Compute.computeOutputs state origstate
|
||||||
|
check "not outputting"
|
||||||
|
Remote.Compute.computeOutputs origstate state
|
||||||
|
|
||||||
|
getinputcontent program p
|
||||||
| originalOption o =
|
| originalOption o =
|
||||||
case M.lookup p (Remote.Compute.computeInputs origstate) of
|
case M.lookup p (Remote.Compute.computeInputs origstate) of
|
||||||
Just inputkey -> getInputContent' fast inputkey
|
Just inputkey -> getInputContent' False inputkey
|
||||||
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
|
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
|
||||||
Nothing -> Remote.Compute.computationBehaviorChangeError program
|
Nothing -> Remote.Compute.computationBehaviorChangeError program
|
||||||
"requesting a new input file" p
|
"requesting a new input file" p
|
||||||
| otherwise = getInputContent fast p
|
| otherwise = getInputContent False p
|
||||||
|
|
||||||
destfile outputfile
|
destfile outputfile
|
||||||
| Just outputfile == origfile = Just file
|
| Just outputfile == origfile = Just file
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
origfile = headMaybe $ M.keys $ M.filter (== Just key)
|
origfile = headMaybe $ M.keys $ M.filter (== Just origkey)
|
||||||
(Remote.Compute.computeOutputs origstate)
|
(Remote.Compute.computeOutputs origstate)
|
||||||
|
|
||||||
|
origbackendvariety = fromKey keyVariety origkey
|
||||||
|
|
||||||
|
getreproducibleconfig = case reproducible o of
|
||||||
|
Just (Reproducible True) -> return (Just (Reproducible True))
|
||||||
|
-- A VURL key is used when the computation was
|
||||||
|
-- unreproducible. So recomputing should too, but that
|
||||||
|
-- will result in the same VURL key. Since moveAnnex
|
||||||
|
-- will prefer the current annex object to a new one,
|
||||||
|
-- delete the annex object first, so that if recomputing
|
||||||
|
-- generates a new version of the file, it replaces
|
||||||
|
-- the old version.
|
||||||
|
v -> case origbackendvariety of
|
||||||
|
VURLKey -> do
|
||||||
|
lockContentForRemoval origkey noop removeAnnex
|
||||||
|
-- in case computation fails or is interupted
|
||||||
|
logStatus NoLiveUpdate origkey InfoMissing
|
||||||
|
return (Just (Reproducible False))
|
||||||
|
_ -> return v
|
||||||
|
|
||||||
|
choosebackend _outputfile
|
||||||
|
-- Use the same backend as was used to compute it before,
|
||||||
|
-- so if the computed file is the same, there will be
|
||||||
|
-- no change.
|
||||||
|
| otherwise = maybeLookupBackendVariety origbackendvariety >>= \case
|
||||||
|
Just b -> return b
|
||||||
|
Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety
|
||||||
|
|
|
@ -399,8 +399,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
liftIO $ hPutStrLn (stdinHandle p) $
|
liftIO $ hPutStrLn (stdinHandle p) $
|
||||||
maybe "" fromOsPath mp'
|
maybe "" fromOsPath mp'
|
||||||
liftIO $ hFlush (stdinHandle p)
|
liftIO $ hFlush (stdinHandle p)
|
||||||
return $ if knowninput
|
return $ if immutablestate
|
||||||
then state'
|
then state
|
||||||
else state'
|
else state'
|
||||||
{ computeInputs =
|
{ computeInputs =
|
||||||
M.insert f' k
|
M.insert f' k
|
||||||
|
@ -411,8 +411,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
checksafefile tmpdir subdir f' "output"
|
checksafefile tmpdir subdir f' "output"
|
||||||
let knownoutput = M.member f' (computeOutputs state')
|
let knownoutput = M.member f' (computeOutputs state')
|
||||||
checkimmutable knownoutput "outputting" f' $
|
checkimmutable knownoutput "outputting" f' $
|
||||||
return $ if knownoutput
|
return $ if immutablestate
|
||||||
then state'
|
then state
|
||||||
else state'
|
else state'
|
||||||
{ computeOutputs =
|
{ computeOutputs =
|
||||||
M.insert f' Nothing
|
M.insert f' Nothing
|
||||||
|
|
29
TODO-compute
29
TODO-compute
|
@ -1,3 +1,20 @@
|
||||||
|
* VURL keys don't currently have the hash key recorded in the equivilant
|
||||||
|
key log by addcompute or when getting from a compute remote.
|
||||||
|
|
||||||
|
* need progress bars for computations and implement PROGRESS message
|
||||||
|
|
||||||
|
* get input files for a computation (so `git-annex get .` gets every file,
|
||||||
|
even when input files in a directory are processed after computed files)
|
||||||
|
|
||||||
|
* autoinit security
|
||||||
|
|
||||||
|
* Support non-annexed files as inputs to computations.
|
||||||
|
|
||||||
|
* addcomputed should honor annex.addunlocked.
|
||||||
|
|
||||||
|
* Perhaps recompute should write a new version of a file as an unlocked
|
||||||
|
file when the file is currently unlocked?
|
||||||
|
|
||||||
* recompute could ingest keys for other files than the one being
|
* recompute could ingest keys for other files than the one being
|
||||||
recomputed, and remember them. Then recomputing those files could just
|
recomputed, and remember them. Then recomputing those files could just
|
||||||
use those keys, without re-running a computation. (Better than --others
|
use those keys, without re-running a computation. (Better than --others
|
||||||
|
@ -18,18 +35,6 @@
|
||||||
Or it could build a DAG and traverse it, but building a DAG of a large
|
Or it could build a DAG and traverse it, but building a DAG of a large
|
||||||
directory tree has its own problems.
|
directory tree has its own problems.
|
||||||
|
|
||||||
* recompute should use the same key backend for a file that it used before
|
|
||||||
(except when --reproducible/--unreproducible is passed).
|
|
||||||
|
|
||||||
* Check recompute's handling of --reproducible and --unreproducible.
|
|
||||||
|
|
||||||
* addcomputed should honor annex.addunlocked.
|
|
||||||
|
|
||||||
* Perhaps recompute should write a new version of a file as an unlocked
|
|
||||||
file when the file is currently unlocked?
|
|
||||||
|
|
||||||
* Support non-annexed files as inputs to computations.
|
|
||||||
|
|
||||||
* Should addcomputed honor annex.smallfiles? That would seem to imply
|
* Should addcomputed honor annex.smallfiles? That would seem to imply
|
||||||
that recompute should also support recomputing non-annexed files.
|
that recompute should also support recomputing non-annexed files.
|
||||||
Otherwise, adding a file and then recomputing it would vary in
|
Otherwise, adding a file and then recomputing it would vary in
|
||||||
|
|
|
@ -82,6 +82,10 @@ the parameters provided to `git-annex addcomputed`.
|
||||||
checksum verification error. One thing that can be done then is to use
|
checksum verification error. One thing that can be done then is to use
|
||||||
`git-annex recompute --original --unreproducible`.
|
`git-annex recompute --original --unreproducible`.
|
||||||
|
|
||||||
|
* `--backend`
|
||||||
|
|
||||||
|
Specifies which key-value backend to use.
|
||||||
|
|
||||||
* Also the [[git-annex-common-options]](1) can be used.
|
* Also the [[git-annex-common-options]](1) can be used.
|
||||||
|
|
||||||
# SEE ALSO
|
# SEE ALSO
|
||||||
|
|
|
@ -21,17 +21,7 @@ updated with the new content.
|
||||||
|
|
||||||
* `--original`
|
* `--original`
|
||||||
|
|
||||||
Use the original content of input files.
|
Re-run the computation with the original input files.
|
||||||
|
|
||||||
* `--unreproducible`, `-u`
|
|
||||||
|
|
||||||
Convert files that were added with `git-annex addcomputed --reproducible`
|
|
||||||
to be as if they were added without that option.
|
|
||||||
|
|
||||||
* `--reproducible`, `-r`
|
|
||||||
|
|
||||||
Convert files that were added with `git-annex addcomputed --unreproducible`
|
|
||||||
to be as if they were added with `--reproducible`.
|
|
||||||
|
|
||||||
* `--remote=name`
|
* `--remote=name`
|
||||||
|
|
||||||
|
@ -42,6 +32,22 @@ updated with the new content.
|
||||||
a file can be computed by multiple remotes, the one with the lowest
|
a file can be computed by multiple remotes, the one with the lowest
|
||||||
configured cost will be used.
|
configured cost will be used.
|
||||||
|
|
||||||
|
* `--unreproducible`, `-u`
|
||||||
|
|
||||||
|
Indicate that the computation is not expected to be fully reproducible.
|
||||||
|
It can vary, in ways that produce files that equivilant enough to
|
||||||
|
be interchangeable, but are not necessarily identical.
|
||||||
|
|
||||||
|
This is the default unless the compute remote indicates that it produces
|
||||||
|
reproducible output.
|
||||||
|
|
||||||
|
* `--reproducible`, `-r`
|
||||||
|
|
||||||
|
Indicate that the computation is expected to be fully reproducible.
|
||||||
|
|
||||||
|
This is the default when the compute remote indicates that it produces
|
||||||
|
reproducible output.
|
||||||
|
|
||||||
* matching options
|
* matching options
|
||||||
|
|
||||||
The [[git-annex-matching-options]](1) can be used to control what
|
The [[git-annex-matching-options]](1) can be used to control what
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue