many recompute improvements
I've lost track of them all, but it includes: * Using the same key backend as was used in the original computation. * Fixing bug that prevented updating the source file key in the compute state * Handling --reproducible and --unreproducible. * recompute --original of a file using VURL, when the result is different, but the key remains the same, makes the object file be updated with the new content * Detecting some other ways the program behavior can change, just for completeness. * Also adds --backend to addcomputed.
This commit is contained in:
parent
1704b5e327
commit
e6ae5e8d56
6 changed files with 127 additions and 54 deletions
|
@ -28,7 +28,7 @@ import qualified Data.Map as M
|
|||
import Data.Time.Clock
|
||||
|
||||
cmd :: Command
|
||||
cmd = notBareRepo $
|
||||
cmd = notBareRepo $ withAnnexOptions [backendOption] $
|
||||
command "addcomputed" SectionCommon "add computed files to annex"
|
||||
(paramRepeating paramExpression)
|
||||
(seek <$$> optParser)
|
||||
|
@ -96,11 +96,22 @@ perform o r = do
|
|||
Remote.Compute.runComputeProgram program state
|
||||
(Remote.Compute.ImmutableState False)
|
||||
(getInputContent fast)
|
||||
(addComputed "adding" True r (reproducible o) Just fast)
|
||||
(addComputed "adding" True r (reproducible o) chooseBackend Just fast)
|
||||
next $ return True
|
||||
|
||||
addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Maybe OsPath) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex ()
|
||||
addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir ts = do
|
||||
addComputed
|
||||
:: StringContainingQuotedPath
|
||||
-> Bool
|
||||
-> Remote
|
||||
-> Maybe Reproducible
|
||||
-> (OsPath -> Annex Backend)
|
||||
-> (OsPath -> Maybe OsPath)
|
||||
-> Bool
|
||||
-> Remote.Compute.ComputeState
|
||||
-> OsPath
|
||||
-> NominalDiffTime
|
||||
-> Annex ()
|
||||
addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fast state tmpdir ts = do
|
||||
let outputs = Remote.Compute.computeOutputs state
|
||||
when (M.null outputs) $
|
||||
giveup "The computation succeeded, but it did not generate any files."
|
||||
|
@ -146,22 +157,24 @@ addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir
|
|||
, contentLocation = outputfile'
|
||||
, inodeCache = Nothing
|
||||
}
|
||||
genkey f p = do
|
||||
backend <- choosebackend outputfile
|
||||
fst <$> genKey (ks f) p backend
|
||||
makelink f k = void $ makeLink f k Nothing
|
||||
ingesthelper f p mk
|
||||
| stagefiles = ingestwith $ do
|
||||
k <- maybe (genkey f p) return mk
|
||||
ingestAdd' p (Just (ld f)) (Just k)
|
||||
| otherwise = ingestwith $ do
|
||||
k <- maybe (genkey f p) return mk
|
||||
mk' <- fst <$> ingest p (Just (ld f)) (Just k)
|
||||
maybe noop (makelink f) mk'
|
||||
return mk'
|
||||
ingestwith a = a >>= \case
|
||||
Nothing -> giveup "ingestion failed"
|
||||
Just k -> do
|
||||
logStatus NoLiveUpdate k InfoPresent
|
||||
return k
|
||||
genkey f p = do
|
||||
backend <- chooseBackend outputfile
|
||||
fst <$> genKey (ks f) p backend
|
||||
makelink f k = void $ makeLink f k Nothing
|
||||
ingesthelper f p mk
|
||||
| stagefiles = ingestwith $
|
||||
ingestAdd' p (Just (ld f)) mk
|
||||
| otherwise = ingestwith $ do
|
||||
mk' <- fst <$> ingest p (Just (ld f)) mk
|
||||
maybe noop (makelink f) mk'
|
||||
return mk'
|
||||
|
||||
ldc = LockDownConfig
|
||||
{ lockingFile = True
|
||||
|
|
|
@ -14,10 +14,13 @@ import qualified Annex
|
|||
import qualified Remote.Compute
|
||||
import qualified Remote
|
||||
import qualified Types.Remote as Remote
|
||||
import Annex.Content
|
||||
import Annex.CatFile
|
||||
import Git.FilePath
|
||||
import Logs.Location
|
||||
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
|
||||
import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage)
|
||||
import Types.Key
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
|
@ -62,7 +65,7 @@ seek' o = do
|
|||
|
||||
start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
|
||||
start o (Just computeremote) si file key =
|
||||
stopUnless (notElem (Remote.uuid computeremote) <$> loggedLocations key) $
|
||||
stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $
|
||||
start' o computeremote si file key
|
||||
start o Nothing si file key = do
|
||||
rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
|
||||
|
@ -103,31 +106,73 @@ start' o r si file key =
|
|||
-- explains the problem.
|
||||
Nothing -> True
|
||||
|
||||
-- TODO When reproducible is not set, preserve the
|
||||
-- reproducible/unreproducible of the input key.
|
||||
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
|
||||
perform o r file key origstate = do
|
||||
perform o r file origkey origstate = do
|
||||
program <- Remote.Compute.getComputeProgram r
|
||||
fast <- Annex.getRead Annex.fast
|
||||
reproducibleconfig <- getreproducibleconfig
|
||||
showOutput
|
||||
Remote.Compute.runComputeProgram program origstate
|
||||
(Remote.Compute.ImmutableState True)
|
||||
(getinputcontent program fast)
|
||||
(addComputed "processing" False r (reproducible o) destfile fast)
|
||||
(Remote.Compute.ImmutableState False)
|
||||
(getinputcontent program)
|
||||
(go program reproducibleconfig)
|
||||
next $ return True
|
||||
where
|
||||
getinputcontent program fast p
|
||||
go program reproducibleconfig state tmpdir ts = do
|
||||
checkbehaviorchange program state
|
||||
addComputed "processing" False r reproducibleconfig
|
||||
choosebackend destfile state tmpdir ts
|
||||
|
||||
checkbehaviorchange program state = do
|
||||
let check s w a b = forM_ (M.keys (w a)) $ \f ->
|
||||
unless (M.member f (w b)) $
|
||||
Remote.Compute.computationBehaviorChangeError program s f
|
||||
|
||||
check "not using input file"
|
||||
Remote.Compute.computeInputs origstate state
|
||||
check "outputting"
|
||||
Remote.Compute.computeOutputs state origstate
|
||||
check "not outputting"
|
||||
Remote.Compute.computeOutputs origstate state
|
||||
|
||||
getinputcontent program p
|
||||
| originalOption o =
|
||||
case M.lookup p (Remote.Compute.computeInputs origstate) of
|
||||
Just inputkey -> getInputContent' fast inputkey
|
||||
Just inputkey -> getInputContent' False inputkey
|
||||
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
|
||||
Nothing -> Remote.Compute.computationBehaviorChangeError program
|
||||
"requesting a new input file" p
|
||||
| otherwise = getInputContent fast p
|
||||
| otherwise = getInputContent False p
|
||||
|
||||
destfile outputfile
|
||||
| Just outputfile == origfile = Just file
|
||||
| otherwise = Nothing
|
||||
|
||||
origfile = headMaybe $ M.keys $ M.filter (== Just key)
|
||||
origfile = headMaybe $ M.keys $ M.filter (== Just origkey)
|
||||
(Remote.Compute.computeOutputs origstate)
|
||||
|
||||
origbackendvariety = fromKey keyVariety origkey
|
||||
|
||||
getreproducibleconfig = case reproducible o of
|
||||
Just (Reproducible True) -> return (Just (Reproducible True))
|
||||
-- A VURL key is used when the computation was
|
||||
-- unreproducible. So recomputing should too, but that
|
||||
-- will result in the same VURL key. Since moveAnnex
|
||||
-- will prefer the current annex object to a new one,
|
||||
-- delete the annex object first, so that if recomputing
|
||||
-- generates a new version of the file, it replaces
|
||||
-- the old version.
|
||||
v -> case origbackendvariety of
|
||||
VURLKey -> do
|
||||
lockContentForRemoval origkey noop removeAnnex
|
||||
-- in case computation fails or is interupted
|
||||
logStatus NoLiveUpdate origkey InfoMissing
|
||||
return (Just (Reproducible False))
|
||||
_ -> return v
|
||||
|
||||
choosebackend _outputfile
|
||||
-- Use the same backend as was used to compute it before,
|
||||
-- so if the computed file is the same, there will be
|
||||
-- no change.
|
||||
| otherwise = maybeLookupBackendVariety origbackendvariety >>= \case
|
||||
Just b -> return b
|
||||
Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety
|
||||
|
|
|
@ -399,8 +399,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
liftIO $ hPutStrLn (stdinHandle p) $
|
||||
maybe "" fromOsPath mp'
|
||||
liftIO $ hFlush (stdinHandle p)
|
||||
return $ if knowninput
|
||||
then state'
|
||||
return $ if immutablestate
|
||||
then state
|
||||
else state'
|
||||
{ computeInputs =
|
||||
M.insert f' k
|
||||
|
@ -411,8 +411,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
checksafefile tmpdir subdir f' "output"
|
||||
let knownoutput = M.member f' (computeOutputs state')
|
||||
checkimmutable knownoutput "outputting" f' $
|
||||
return $ if knownoutput
|
||||
then state'
|
||||
return $ if immutablestate
|
||||
then state
|
||||
else state'
|
||||
{ computeOutputs =
|
||||
M.insert f' Nothing
|
||||
|
|
29
TODO-compute
29
TODO-compute
|
@ -1,3 +1,20 @@
|
|||
* VURL keys don't currently have the hash key recorded in the equivilant
|
||||
key log by addcompute or when getting from a compute remote.
|
||||
|
||||
* need progress bars for computations and implement PROGRESS message
|
||||
|
||||
* get input files for a computation (so `git-annex get .` gets every file,
|
||||
even when input files in a directory are processed after computed files)
|
||||
|
||||
* autoinit security
|
||||
|
||||
* Support non-annexed files as inputs to computations.
|
||||
|
||||
* addcomputed should honor annex.addunlocked.
|
||||
|
||||
* Perhaps recompute should write a new version of a file as an unlocked
|
||||
file when the file is currently unlocked?
|
||||
|
||||
* recompute could ingest keys for other files than the one being
|
||||
recomputed, and remember them. Then recomputing those files could just
|
||||
use those keys, without re-running a computation. (Better than --others
|
||||
|
@ -18,18 +35,6 @@
|
|||
Or it could build a DAG and traverse it, but building a DAG of a large
|
||||
directory tree has its own problems.
|
||||
|
||||
* recompute should use the same key backend for a file that it used before
|
||||
(except when --reproducible/--unreproducible is passed).
|
||||
|
||||
* Check recompute's handling of --reproducible and --unreproducible.
|
||||
|
||||
* addcomputed should honor annex.addunlocked.
|
||||
|
||||
* Perhaps recompute should write a new version of a file as an unlocked
|
||||
file when the file is currently unlocked?
|
||||
|
||||
* Support non-annexed files as inputs to computations.
|
||||
|
||||
* Should addcomputed honor annex.smallfiles? That would seem to imply
|
||||
that recompute should also support recomputing non-annexed files.
|
||||
Otherwise, adding a file and then recomputing it would vary in
|
||||
|
|
|
@ -82,6 +82,10 @@ the parameters provided to `git-annex addcomputed`.
|
|||
checksum verification error. One thing that can be done then is to use
|
||||
`git-annex recompute --original --unreproducible`.
|
||||
|
||||
* `--backend`
|
||||
|
||||
Specifies which key-value backend to use.
|
||||
|
||||
* Also the [[git-annex-common-options]](1) can be used.
|
||||
|
||||
# SEE ALSO
|
||||
|
|
|
@ -21,17 +21,7 @@ updated with the new content.
|
|||
|
||||
* `--original`
|
||||
|
||||
Use the original content of input files.
|
||||
|
||||
* `--unreproducible`, `-u`
|
||||
|
||||
Convert files that were added with `git-annex addcomputed --reproducible`
|
||||
to be as if they were added without that option.
|
||||
|
||||
* `--reproducible`, `-r`
|
||||
|
||||
Convert files that were added with `git-annex addcomputed --unreproducible`
|
||||
to be as if they were added with `--reproducible`.
|
||||
Re-run the computation with the original input files.
|
||||
|
||||
* `--remote=name`
|
||||
|
||||
|
@ -42,6 +32,22 @@ updated with the new content.
|
|||
a file can be computed by multiple remotes, the one with the lowest
|
||||
configured cost will be used.
|
||||
|
||||
* `--unreproducible`, `-u`
|
||||
|
||||
Indicate that the computation is not expected to be fully reproducible.
|
||||
It can vary, in ways that produce files that equivilant enough to
|
||||
be interchangeable, but are not necessarily identical.
|
||||
|
||||
This is the default unless the compute remote indicates that it produces
|
||||
reproducible output.
|
||||
|
||||
* `--reproducible`, `-r`
|
||||
|
||||
Indicate that the computation is expected to be fully reproducible.
|
||||
|
||||
This is the default when the compute remote indicates that it produces
|
||||
reproducible output.
|
||||
|
||||
* matching options
|
||||
|
||||
The [[git-annex-matching-options]](1) can be used to control what
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue