diff --git a/Command/AddComputed.hs b/Command/AddComputed.hs index 071015e014..20eacf954f 100644 --- a/Command/AddComputed.hs +++ b/Command/AddComputed.hs @@ -28,7 +28,7 @@ import qualified Data.Map as M import Data.Time.Clock cmd :: Command -cmd = notBareRepo $ +cmd = notBareRepo $ withAnnexOptions [backendOption] $ command "addcomputed" SectionCommon "add computed files to annex" (paramRepeating paramExpression) (seek <$$> optParser) @@ -96,11 +96,22 @@ perform o r = do Remote.Compute.runComputeProgram program state (Remote.Compute.ImmutableState False) (getInputContent fast) - (addComputed "adding" True r (reproducible o) Just fast) + (addComputed "adding" True r (reproducible o) chooseBackend Just fast) next $ return True -addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Maybe OsPath) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex () -addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir ts = do +addComputed + :: StringContainingQuotedPath + -> Bool + -> Remote + -> Maybe Reproducible + -> (OsPath -> Annex Backend) + -> (OsPath -> Maybe OsPath) + -> Bool + -> Remote.Compute.ComputeState + -> OsPath + -> NominalDiffTime + -> Annex () +addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fast state tmpdir ts = do let outputs = Remote.Compute.computeOutputs state when (M.null outputs) $ giveup "The computation succeeded, but it did not generate any files." @@ -146,22 +157,24 @@ addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir , contentLocation = outputfile' , inodeCache = Nothing } + genkey f p = do + backend <- choosebackend outputfile + fst <$> genKey (ks f) p backend + makelink f k = void $ makeLink f k Nothing + ingesthelper f p mk + | stagefiles = ingestwith $ do + k <- maybe (genkey f p) return mk + ingestAdd' p (Just (ld f)) (Just k) + | otherwise = ingestwith $ do + k <- maybe (genkey f p) return mk + mk' <- fst <$> ingest p (Just (ld f)) (Just k) + maybe noop (makelink f) mk' + return mk' ingestwith a = a >>= \case Nothing -> giveup "ingestion failed" Just k -> do logStatus NoLiveUpdate k InfoPresent return k - genkey f p = do - backend <- chooseBackend outputfile - fst <$> genKey (ks f) p backend - makelink f k = void $ makeLink f k Nothing - ingesthelper f p mk - | stagefiles = ingestwith $ - ingestAdd' p (Just (ld f)) mk - | otherwise = ingestwith $ do - mk' <- fst <$> ingest p (Just (ld f)) mk - maybe noop (makelink f) mk' - return mk' ldc = LockDownConfig { lockingFile = True diff --git a/Command/Recompute.hs b/Command/Recompute.hs index 2193216d29..4a3c8355ad 100644 --- a/Command/Recompute.hs +++ b/Command/Recompute.hs @@ -14,10 +14,13 @@ import qualified Annex import qualified Remote.Compute import qualified Remote import qualified Types.Remote as Remote +import Annex.Content import Annex.CatFile import Git.FilePath import Logs.Location import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed) +import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage) +import Types.Key import qualified Data.Map as M @@ -62,7 +65,7 @@ seek' o = do start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart start o (Just computeremote) si file key = - stopUnless (notElem (Remote.uuid computeremote) <$> loggedLocations key) $ + stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $ start' o computeremote si file key start o Nothing si file key = do rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key) @@ -103,31 +106,73 @@ start' o r si file key = -- explains the problem. Nothing -> True --- TODO When reproducible is not set, preserve the --- reproducible/unreproducible of the input key. perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform -perform o r file key origstate = do +perform o r file origkey origstate = do program <- Remote.Compute.getComputeProgram r - fast <- Annex.getRead Annex.fast + reproducibleconfig <- getreproducibleconfig showOutput Remote.Compute.runComputeProgram program origstate - (Remote.Compute.ImmutableState True) - (getinputcontent program fast) - (addComputed "processing" False r (reproducible o) destfile fast) + (Remote.Compute.ImmutableState False) + (getinputcontent program) + (go program reproducibleconfig) next $ return True where - getinputcontent program fast p + go program reproducibleconfig state tmpdir ts = do + checkbehaviorchange program state + addComputed "processing" False r reproducibleconfig + choosebackend destfile state tmpdir ts + + checkbehaviorchange program state = do + let check s w a b = forM_ (M.keys (w a)) $ \f -> + unless (M.member f (w b)) $ + Remote.Compute.computationBehaviorChangeError program s f + + check "not using input file" + Remote.Compute.computeInputs origstate state + check "outputting" + Remote.Compute.computeOutputs state origstate + check "not outputting" + Remote.Compute.computeOutputs origstate state + + getinputcontent program p | originalOption o = case M.lookup p (Remote.Compute.computeInputs origstate) of - Just inputkey -> getInputContent' fast inputkey + Just inputkey -> getInputContent' False inputkey (fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")") Nothing -> Remote.Compute.computationBehaviorChangeError program "requesting a new input file" p - | otherwise = getInputContent fast p + | otherwise = getInputContent False p destfile outputfile | Just outputfile == origfile = Just file | otherwise = Nothing - origfile = headMaybe $ M.keys $ M.filter (== Just key) + origfile = headMaybe $ M.keys $ M.filter (== Just origkey) (Remote.Compute.computeOutputs origstate) + + origbackendvariety = fromKey keyVariety origkey + + getreproducibleconfig = case reproducible o of + Just (Reproducible True) -> return (Just (Reproducible True)) + -- A VURL key is used when the computation was + -- unreproducible. So recomputing should too, but that + -- will result in the same VURL key. Since moveAnnex + -- will prefer the current annex object to a new one, + -- delete the annex object first, so that if recomputing + -- generates a new version of the file, it replaces + -- the old version. + v -> case origbackendvariety of + VURLKey -> do + lockContentForRemoval origkey noop removeAnnex + -- in case computation fails or is interupted + logStatus NoLiveUpdate origkey InfoMissing + return (Just (Reproducible False)) + _ -> return v + + choosebackend _outputfile + -- Use the same backend as was used to compute it before, + -- so if the computed file is the same, there will be + -- no change. + | otherwise = maybeLookupBackendVariety origbackendvariety >>= \case + Just b -> return b + Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety diff --git a/Remote/Compute.hs b/Remote/Compute.hs index e3ec2a8fdd..a8a3cdd32e 100644 --- a/Remote/Compute.hs +++ b/Remote/Compute.hs @@ -399,8 +399,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) liftIO $ hPutStrLn (stdinHandle p) $ maybe "" fromOsPath mp' liftIO $ hFlush (stdinHandle p) - return $ if knowninput - then state' + return $ if immutablestate + then state else state' { computeInputs = M.insert f' k @@ -411,8 +411,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) checksafefile tmpdir subdir f' "output" let knownoutput = M.member f' (computeOutputs state') checkimmutable knownoutput "outputting" f' $ - return $ if knownoutput - then state' + return $ if immutablestate + then state else state' { computeOutputs = M.insert f' Nothing diff --git a/TODO-compute b/TODO-compute index 31d8aaa7b2..fe128b0e4d 100644 --- a/TODO-compute +++ b/TODO-compute @@ -1,3 +1,20 @@ +* VURL keys don't currently have the hash key recorded in the equivilant + key log by addcompute or when getting from a compute remote. + +* need progress bars for computations and implement PROGRESS message + +* get input files for a computation (so `git-annex get .` gets every file, + even when input files in a directory are processed after computed files) + +* autoinit security + +* Support non-annexed files as inputs to computations. + +* addcomputed should honor annex.addunlocked. + +* Perhaps recompute should write a new version of a file as an unlocked + file when the file is currently unlocked? + * recompute could ingest keys for other files than the one being recomputed, and remember them. Then recomputing those files could just use those keys, without re-running a computation. (Better than --others @@ -18,18 +35,6 @@ Or it could build a DAG and traverse it, but building a DAG of a large directory tree has its own problems. -* recompute should use the same key backend for a file that it used before - (except when --reproducible/--unreproducible is passed). - -* Check recompute's handling of --reproducible and --unreproducible. - -* addcomputed should honor annex.addunlocked. - -* Perhaps recompute should write a new version of a file as an unlocked - file when the file is currently unlocked? - -* Support non-annexed files as inputs to computations. - * Should addcomputed honor annex.smallfiles? That would seem to imply that recompute should also support recomputing non-annexed files. Otherwise, adding a file and then recomputing it would vary in diff --git a/doc/git-annex-addcomputed.mdwn b/doc/git-annex-addcomputed.mdwn index 58261da181..3301381c66 100644 --- a/doc/git-annex-addcomputed.mdwn +++ b/doc/git-annex-addcomputed.mdwn @@ -82,6 +82,10 @@ the parameters provided to `git-annex addcomputed`. checksum verification error. One thing that can be done then is to use `git-annex recompute --original --unreproducible`. +* `--backend` + + Specifies which key-value backend to use. + * Also the [[git-annex-common-options]](1) can be used. # SEE ALSO diff --git a/doc/git-annex-recompute.mdwn b/doc/git-annex-recompute.mdwn index b65488bae8..fb895aa75c 100644 --- a/doc/git-annex-recompute.mdwn +++ b/doc/git-annex-recompute.mdwn @@ -21,17 +21,7 @@ updated with the new content. * `--original` - Use the original content of input files. - -* `--unreproducible`, `-u` - - Convert files that were added with `git-annex addcomputed --reproducible` - to be as if they were added without that option. - -* `--reproducible`, `-r` - - Convert files that were added with `git-annex addcomputed --unreproducible` - to be as if they were added with `--reproducible`. + Re-run the computation with the original input files. * `--remote=name` @@ -42,6 +32,22 @@ updated with the new content. a file can be computed by multiple remotes, the one with the lowest configured cost will be used. +* `--unreproducible`, `-u` + + Indicate that the computation is not expected to be fully reproducible. + It can vary, in ways that produce files that equivilant enough to + be interchangeable, but are not necessarily identical. + + This is the default unless the compute remote indicates that it produces + reproducible output. + +* `--reproducible`, `-r` + + Indicate that the computation is expected to be fully reproducible. + + This is the default when the compute remote indicates that it produces + reproducible output. + * matching options The [[git-annex-matching-options]](1) can be used to control what