fix recompute of renamed files
When a computed file has been renamed, a recompute needs to write to the new filename. I decided to remove --others because it's not clear what it should do in the face of renames. Should it update only other files that have not been renamed? Or update files that use the old key to the new key anywhere in the tree? Or write the other files to the cwd, ignoring renames? Since --others is just a way to save on compute time, adding this complexity at this point seems like a bad idea. May revisit later. Added temporary TODO-compute file
This commit is contained in:
parent
5d2a608a56
commit
9c2c3002a6
4 changed files with 71 additions and 42 deletions
|
@ -96,11 +96,11 @@ perform o r = do
|
||||||
Remote.Compute.runComputeProgram program state
|
Remote.Compute.runComputeProgram program state
|
||||||
(Remote.Compute.ImmutableState False)
|
(Remote.Compute.ImmutableState False)
|
||||||
(getInputContent fast)
|
(getInputContent fast)
|
||||||
(addComputed "adding" True r (reproducible o) (const True) fast)
|
(addComputed "adding" True r (reproducible o) Just fast)
|
||||||
next $ return True
|
next $ return True
|
||||||
|
|
||||||
addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Bool) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex ()
|
addComputed :: StringContainingQuotedPath -> Bool -> Remote -> Maybe Reproducible -> (OsPath -> Maybe OsPath) -> Bool -> Remote.Compute.ComputeState -> OsPath -> NominalDiffTime -> Annex ()
|
||||||
addComputed addaction stagefiles r reproducibleconfig wantfile fast state tmpdir ts = do
|
addComputed addaction stagefiles r reproducibleconfig destfile fast state tmpdir ts = do
|
||||||
let outputs = Remote.Compute.computeOutputs state
|
let outputs = Remote.Compute.computeOutputs state
|
||||||
when (M.null outputs) $
|
when (M.null outputs) $
|
||||||
giveup "The computation succeeded, but it did not generate any files."
|
giveup "The computation succeeded, but it did not generate any files."
|
||||||
|
@ -120,29 +120,29 @@ addComputed addaction stagefiles r reproducibleconfig wantfile fast state tmpdir
|
||||||
where
|
where
|
||||||
addfile outputfile
|
addfile outputfile
|
||||||
| fast = do
|
| fast = do
|
||||||
when (wantfile outputfile) $
|
case destfile outputfile of
|
||||||
if stagefiles
|
Nothing -> noop
|
||||||
then addSymlink outputfile stateurlk Nothing
|
Just f
|
||||||
else makelink stateurlk
|
| stagefiles -> addSymlink f stateurlk Nothing
|
||||||
|
| otherwise -> makelink f stateurlk
|
||||||
return stateurlk
|
return stateurlk
|
||||||
| isreproducible = do
|
| isreproducible = do
|
||||||
sz <- liftIO $ getFileSize outputfile'
|
sz <- liftIO $ getFileSize outputfile'
|
||||||
metered Nothing sz Nothing $ \_ p ->
|
metered Nothing sz Nothing $ \_ p ->
|
||||||
if wantfile outputfile
|
case destfile outputfile of
|
||||||
then ingesthelper p Nothing
|
Just f -> ingesthelper f p Nothing
|
||||||
else genkey p
|
Nothing -> genkey outputfile p
|
||||||
| otherwise =
|
| otherwise = case destfile outputfile of
|
||||||
if wantfile outputfile
|
Just f -> ingesthelper f nullMeterUpdate
|
||||||
then ingesthelper nullMeterUpdate
|
(Just stateurlk)
|
||||||
(Just stateurlk)
|
Nothing -> return stateurlk
|
||||||
else return stateurlk
|
|
||||||
where
|
where
|
||||||
stateurl = Remote.Compute.computeStateUrl r state outputfile
|
stateurl = Remote.Compute.computeStateUrl r state outputfile
|
||||||
stateurlk = fromUrl stateurl Nothing True
|
stateurlk = fromUrl stateurl Nothing True
|
||||||
outputfile' = tmpdir </> outputfile
|
outputfile' = tmpdir </> outputfile
|
||||||
ld = LockedDown ldc ks
|
ld f = LockedDown ldc (ks f)
|
||||||
ks = KeySource
|
ks f = KeySource
|
||||||
{ keyFilename = outputfile
|
{ keyFilename = f
|
||||||
, contentLocation = outputfile'
|
, contentLocation = outputfile'
|
||||||
, inodeCache = Nothing
|
, inodeCache = Nothing
|
||||||
}
|
}
|
||||||
|
@ -151,16 +151,16 @@ addComputed addaction stagefiles r reproducibleconfig wantfile fast state tmpdir
|
||||||
Just k -> do
|
Just k -> do
|
||||||
logStatus NoLiveUpdate k InfoPresent
|
logStatus NoLiveUpdate k InfoPresent
|
||||||
return k
|
return k
|
||||||
genkey p = do
|
genkey f p = do
|
||||||
backend <- chooseBackend outputfile
|
backend <- chooseBackend outputfile
|
||||||
fst <$> genKey ks p backend
|
fst <$> genKey (ks f) p backend
|
||||||
makelink k = void $ makeLink outputfile k Nothing
|
makelink f k = void $ makeLink f k Nothing
|
||||||
ingesthelper p mk
|
ingesthelper f p mk
|
||||||
| stagefiles = ingestwith $
|
| stagefiles = ingestwith $
|
||||||
ingestAdd' p (Just ld) mk
|
ingestAdd' p (Just (ld f)) mk
|
||||||
| otherwise = ingestwith $ do
|
| otherwise = ingestwith $ do
|
||||||
mk' <- fst <$> ingest p (Just ld) mk
|
mk' <- fst <$> ingest p (Just (ld f)) mk
|
||||||
maybe noop makelink mk'
|
maybe noop (makelink f) mk'
|
||||||
return mk'
|
return mk'
|
||||||
|
|
||||||
ldc = LockDownConfig
|
ldc = LockDownConfig
|
||||||
|
|
|
@ -29,7 +29,6 @@ cmd = notBareRepo $
|
||||||
data RecomputeOptions = RecomputeOptions
|
data RecomputeOptions = RecomputeOptions
|
||||||
{ recomputeThese :: CmdParams
|
{ recomputeThese :: CmdParams
|
||||||
, originalOption :: Bool
|
, originalOption :: Bool
|
||||||
, othersOption :: Bool
|
|
||||||
, reproducible :: Maybe Reproducible
|
, reproducible :: Maybe Reproducible
|
||||||
, computeRemote :: Maybe (DeferredParse Remote)
|
, computeRemote :: Maybe (DeferredParse Remote)
|
||||||
}
|
}
|
||||||
|
@ -41,10 +40,6 @@ optParser desc = RecomputeOptions
|
||||||
( long "original"
|
( long "original"
|
||||||
<> help "recompute using original content of input files"
|
<> help "recompute using original content of input files"
|
||||||
)
|
)
|
||||||
<*> switch
|
|
||||||
( long "others"
|
|
||||||
<> help "stage other files that are recomputed in passing"
|
|
||||||
)
|
|
||||||
<*> parseReproducible
|
<*> parseReproducible
|
||||||
<*> optional (mkParseRemoteOption <$> parseRemoteOption)
|
<*> optional (mkParseRemoteOption <$> parseRemoteOption)
|
||||||
|
|
||||||
|
@ -111,25 +106,28 @@ start' o r si file key =
|
||||||
-- TODO When reproducible is not set, preserve the
|
-- TODO When reproducible is not set, preserve the
|
||||||
-- reproducible/unreproducible of the input key.
|
-- reproducible/unreproducible of the input key.
|
||||||
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
|
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
|
||||||
perform o r file key oldstate = do
|
perform o r file key origstate = do
|
||||||
program <- Remote.Compute.getComputeProgram r
|
program <- Remote.Compute.getComputeProgram r
|
||||||
fast <- Annex.getRead Annex.fast
|
fast <- Annex.getRead Annex.fast
|
||||||
showOutput
|
showOutput
|
||||||
Remote.Compute.runComputeProgram program oldstate
|
Remote.Compute.runComputeProgram program origstate
|
||||||
(Remote.Compute.ImmutableState True)
|
(Remote.Compute.ImmutableState True)
|
||||||
(getinputcontent program fast)
|
(getinputcontent program fast)
|
||||||
(addComputed "processing" False r (reproducible o) wantfile fast)
|
(addComputed "processing" False r (reproducible o) destfile fast)
|
||||||
next $ return True
|
next $ return True
|
||||||
where
|
where
|
||||||
getinputcontent program fast p
|
getinputcontent program fast p
|
||||||
| originalOption o =
|
| originalOption o =
|
||||||
case M.lookup p (Remote.Compute.computeInputs oldstate) of
|
case M.lookup p (Remote.Compute.computeInputs origstate) of
|
||||||
Just inputkey -> getInputContent' fast inputkey
|
Just inputkey -> getInputContent' fast inputkey
|
||||||
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
|
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
|
||||||
Nothing -> Remote.Compute.computationBehaviorChangeError program
|
Nothing -> Remote.Compute.computationBehaviorChangeError program
|
||||||
"requesting a new input file" p
|
"requesting a new input file" p
|
||||||
| otherwise = getInputContent fast p
|
| otherwise = getInputContent fast p
|
||||||
|
|
||||||
wantfile outputfile
|
destfile outputfile
|
||||||
| othersOption o = True
|
| Just outputfile == origfile = Just file
|
||||||
| otherwise = outputfile == file
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
origfile = headMaybe $ M.keys $ M.filter (== Just key)
|
||||||
|
(Remote.Compute.computeOutputs origstate)
|
||||||
|
|
36
TODO-compute
Normal file
36
TODO-compute
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
* recompute could ingest keys for other files than the one being
|
||||||
|
recomputed, and remember them. Then recomputing those files could just
|
||||||
|
use those keys, without re-running a computation. (Better than --others
|
||||||
|
which got removed.)
|
||||||
|
|
||||||
|
* `git-annex recompute foo bar baz`, when foo depends on bar which depends
|
||||||
|
on baz, and when baz has changed, will not recompute foo, because bar has
|
||||||
|
not changed. It then recomputes bar. So running the command again is
|
||||||
|
needed to recompute foo.
|
||||||
|
|
||||||
|
What it could do is, after it recomputes bar, notice that it already
|
||||||
|
considered foo, and revisit foo, and recompute it then. It could either
|
||||||
|
use a bloom filter to remember the files it considered but did not
|
||||||
|
compute, or it could just notice that the command line includes foo
|
||||||
|
(or includes a directory that contains foo), and then foo is not
|
||||||
|
modified.
|
||||||
|
|
||||||
|
Or it could build a DAG and traverse it, but building a DAG of a large
|
||||||
|
directory tree has its own problems.
|
||||||
|
|
||||||
|
* recompute should use the same key backend for a file that it used before
|
||||||
|
(except when --reproducible/--unreproducible is passed).
|
||||||
|
|
||||||
|
* Check recompute's handling of --reproducible and --unreproducible.
|
||||||
|
|
||||||
|
* addcomputed should honor annex.addunlocked.
|
||||||
|
|
||||||
|
* Perhaps recompute should write a new version of a file as an unlocked
|
||||||
|
file when the file is currently unlocked?
|
||||||
|
|
||||||
|
* Support non-annexed files as inputs to computations.
|
||||||
|
|
||||||
|
* Should addcomputed honor annex.smallfiles? That would seem to imply
|
||||||
|
that recompute should also support recomputing non-annexed files.
|
||||||
|
Otherwise, adding a file and then recomputing it would vary in
|
||||||
|
what the content of the file is, depending on annex.smallfiles setting.
|
|
@ -23,11 +23,6 @@ updated with the new content.
|
||||||
|
|
||||||
Use the original content of input files.
|
Use the original content of input files.
|
||||||
|
|
||||||
* `--others`
|
|
||||||
|
|
||||||
When recomputing one file also generates new versions of other files,
|
|
||||||
update those other files too.
|
|
||||||
|
|
||||||
* `--unreproducible`, `-u`
|
* `--unreproducible`, `-u`
|
||||||
|
|
||||||
Convert files that were added with `git-annex addcomputed --reproducible`
|
Convert files that were added with `git-annex addcomputed --reproducible`
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue