addcomputed --fast and --unreproducible working
For these, use VURL and URL keys, with an "annex-compute:" URI prefix. These URL keys will look something like this: URL--annex-compute&cbar4,63pconvert,3-f4d3d72cf3f16ac9c3e9a8012bde4462 Generally it's too long so most of it gets md5summed. It's a little ugly, but it's what fell out of the existing URL key generation machinery. I did consider special casing to eg "URL--annex-compute&c4d3d72cf3f16ac9c3e9a8012bde4462". But it seems at least possibly useful that the name of the file that was computed is visible and perhaps one or two words of the git-annex compute command parameters. Note that two different output files from the same computation will get the same URL key. And these keys should remain stable.
This commit is contained in:
parent
a154e91513
commit
16f529c05f
3 changed files with 50 additions and 28 deletions
|
@ -20,8 +20,9 @@ import Annex.Ingest
|
||||||
import Types.RemoteConfig
|
import Types.RemoteConfig
|
||||||
import Types.KeySource
|
import Types.KeySource
|
||||||
import Messages.Progress
|
import Messages.Progress
|
||||||
import Utility.MonotonicClock
|
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
import Utility.MonotonicClock
|
||||||
|
import Backend.URL (fromUrl)
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Time.Clock
|
import Data.Time.Clock
|
||||||
|
@ -42,19 +43,19 @@ optParser :: CmdParamsDesc -> Parser AddComputedOptions
|
||||||
optParser desc = AddComputedOptions
|
optParser desc = AddComputedOptions
|
||||||
<$> cmdParams desc
|
<$> cmdParams desc
|
||||||
<*> (mkParseRemoteOption <$> parseToOption)
|
<*> (mkParseRemoteOption <$> parseToOption)
|
||||||
<*> (fromMaybe Unreproducible <$> parseReproducible)
|
<*> (fromMaybe (Reproducible False) <$> parseReproducible)
|
||||||
|
|
||||||
data Reproducible = Reproducible | Unreproducible
|
newtype Reproducible = Reproducible { isReproducible :: Bool }
|
||||||
|
|
||||||
parseReproducible :: Parser (Maybe Reproducible)
|
parseReproducible :: Parser (Maybe Reproducible)
|
||||||
parseReproducible = r <|> unr
|
parseReproducible = r <|> unr
|
||||||
where
|
where
|
||||||
r = flag Nothing (Just Reproducible)
|
r = flag Nothing (Just (Reproducible True))
|
||||||
( long "reproducible"
|
( long "reproducible"
|
||||||
<> short 'r'
|
<> short 'r'
|
||||||
<> help "computation is fully reproducible"
|
<> help "computation is fully reproducible"
|
||||||
)
|
)
|
||||||
unr = flag Nothing (Just Unreproducible)
|
unr = flag Nothing (Just (Reproducible False))
|
||||||
( long "unreproducible"
|
( long "unreproducible"
|
||||||
<> short 'u'
|
<> short 'u'
|
||||||
<> help "computation is not fully reproducible"
|
<> help "computation is not fully reproducible"
|
||||||
|
@ -90,17 +91,14 @@ perform o r program = do
|
||||||
, Remote.Compute.computeInputs = mempty
|
, Remote.Compute.computeInputs = mempty
|
||||||
, Remote.Compute.computeOutputs = mempty
|
, Remote.Compute.computeOutputs = mempty
|
||||||
, Remote.Compute.computeSubdir = subdir
|
, Remote.Compute.computeSubdir = subdir
|
||||||
, Remote.Compute.computeReproducible =
|
, Remote.Compute.computeReproducible = isreproducible
|
||||||
case reproducible o of
|
|
||||||
Reproducible -> True
|
|
||||||
Unreproducible -> False
|
|
||||||
}
|
}
|
||||||
fast <- Annex.getRead Annex.fast
|
fast <- Annex.getRead Annex.fast
|
||||||
starttime <- liftIO currentMonotonicTimestamp
|
starttime <- liftIO currentMonotonicTimestamp
|
||||||
Remote.Compute.runComputeProgram program state
|
Remote.Compute.runComputeProgram program state
|
||||||
(Remote.Compute.ImmutableState False)
|
(Remote.Compute.ImmutableState False)
|
||||||
(getinputcontent fast)
|
(getinputcontent fast)
|
||||||
(go starttime)
|
(go starttime fast)
|
||||||
next $ return True
|
next $ return True
|
||||||
where
|
where
|
||||||
getinputcontent fast p = catKeyFile p >>= \case
|
getinputcontent fast p = catKeyFile p >>= \case
|
||||||
|
@ -117,7 +115,7 @@ perform o r program = do
|
||||||
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
|
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
|
||||||
)
|
)
|
||||||
|
|
||||||
go starttime state tmpdir = do
|
go starttime fast state tmpdir = do
|
||||||
endtime <- liftIO currentMonotonicTimestamp
|
endtime <- liftIO currentMonotonicTimestamp
|
||||||
let ts = calcduration starttime endtime
|
let ts = calcduration starttime endtime
|
||||||
let outputs = Remote.Compute.computeOutputs state
|
let outputs = Remote.Compute.computeOutputs state
|
||||||
|
@ -125,7 +123,7 @@ perform o r program = do
|
||||||
giveup "The computation succeeded, but it did not generate any files."
|
giveup "The computation succeeded, but it did not generate any files."
|
||||||
oks <- forM (M.keys outputs) $ \outputfile -> do
|
oks <- forM (M.keys outputs) $ \outputfile -> do
|
||||||
showAction $ "adding " <> QuotedPath outputfile
|
showAction $ "adding " <> QuotedPath outputfile
|
||||||
k <- catchNonAsync (addfile tmpdir outputfile)
|
k <- catchNonAsync (addfile fast state tmpdir outputfile)
|
||||||
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
|
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
|
||||||
return (outputfile, Just k)
|
return (outputfile, Just k)
|
||||||
let state' = state
|
let state' = state
|
||||||
|
@ -137,24 +135,32 @@ perform o r program = do
|
||||||
k ts state'
|
k ts state'
|
||||||
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
|
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
|
||||||
|
|
||||||
addfile tmpdir outputfile = do
|
addfile fast state tmpdir outputfile
|
||||||
let outputfile' = tmpdir </> outputfile
|
| fast || not isreproducible = do
|
||||||
let ld = LockedDown ldc $ KeySource
|
let stateurl = Remote.Compute.computeStateUrl state outputfile
|
||||||
{ keyFilename = outputfile
|
let k = fromUrl stateurl Nothing isreproducible
|
||||||
, contentLocation = outputfile'
|
addSymlink outputfile k Nothing
|
||||||
, inodeCache = Nothing
|
return k
|
||||||
}
|
| otherwise = do
|
||||||
sz <- liftIO $ getFileSize outputfile'
|
let outputfile' = tmpdir </> outputfile
|
||||||
metered Nothing sz Nothing $ \_ p ->
|
let ld = LockedDown ldc $ KeySource
|
||||||
ingestAdd p (Just ld) >>= \case
|
{ keyFilename = outputfile
|
||||||
Nothing -> giveup "key generation failed"
|
, contentLocation = outputfile'
|
||||||
Just k -> return k
|
, inodeCache = Nothing
|
||||||
|
}
|
||||||
|
sz <- liftIO $ getFileSize outputfile'
|
||||||
|
metered Nothing sz Nothing $ \_ p ->
|
||||||
|
ingestAdd p (Just ld) >>= \case
|
||||||
|
Nothing -> giveup "key generation failed"
|
||||||
|
Just k -> return k
|
||||||
|
|
||||||
ldc = LockDownConfig
|
ldc = LockDownConfig
|
||||||
{ lockingFile = True
|
{ lockingFile = True
|
||||||
, hardlinkFileTmpDir = Nothing
|
, hardlinkFileTmpDir = Nothing
|
||||||
, checkWritePerms = True
|
, checkWritePerms = True
|
||||||
}
|
}
|
||||||
|
|
||||||
calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) =
|
calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) =
|
||||||
fromIntegral (endtime - starttime) :: NominalDiffTime
|
fromIntegral (endtime - starttime) :: NominalDiffTime
|
||||||
|
|
||||||
|
isreproducible = isReproducible (reproducible o)
|
||||||
|
|
|
@ -12,6 +12,7 @@ module Remote.Compute (
|
||||||
ComputeState(..),
|
ComputeState(..),
|
||||||
setComputeState,
|
setComputeState,
|
||||||
getComputeStates,
|
getComputeStates,
|
||||||
|
computeStateUrl,
|
||||||
ComputeProgram,
|
ComputeProgram,
|
||||||
getComputeProgram,
|
getComputeProgram,
|
||||||
runComputeProgram,
|
runComputeProgram,
|
||||||
|
@ -36,6 +37,7 @@ import Utility.Metered
|
||||||
import Utility.TimeStamp
|
import Utility.TimeStamp
|
||||||
import Utility.Env
|
import Utility.Env
|
||||||
import Utility.Tmp.Dir
|
import Utility.Tmp.Dir
|
||||||
|
import Utility.Url
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Utility.SimpleProtocol as Proto
|
import qualified Utility.SimpleProtocol as Proto
|
||||||
|
|
||||||
|
@ -190,7 +192,10 @@ data ComputeState = ComputeState
|
||||||
- and computeOutputs are sorted in ascending order for stability.
|
- and computeOutputs are sorted in ascending order for stability.
|
||||||
-}
|
-}
|
||||||
formatComputeState :: Key -> ComputeState -> B.ByteString
|
formatComputeState :: Key -> ComputeState -> B.ByteString
|
||||||
formatComputeState k st = renderQuery False $ concat
|
formatComputeState k = formatComputeState' (Just k)
|
||||||
|
|
||||||
|
formatComputeState' :: Maybe Key -> ComputeState -> B.ByteString
|
||||||
|
formatComputeState' mk st = renderQuery False $ concat
|
||||||
[ map formatparam (computeParams st)
|
[ map formatparam (computeParams st)
|
||||||
, map formatinput (M.toAscList (computeInputs st))
|
, map formatinput (M.toAscList (computeInputs st))
|
||||||
, mapMaybe formatoutput (M.toAscList (computeOutputs st))
|
, mapMaybe formatoutput (M.toAscList (computeOutputs st))
|
||||||
|
@ -202,7 +207,7 @@ formatComputeState k st = renderQuery False $ concat
|
||||||
("i" <> fromOsPath file, Just (serializeKey' key))
|
("i" <> fromOsPath file, Just (serializeKey' key))
|
||||||
formatoutput (file, (Just key)) = Just $
|
formatoutput (file, (Just key)) = Just $
|
||||||
("o" <> fromOsPath file,
|
("o" <> fromOsPath file,
|
||||||
if key == k
|
if Just key == mk
|
||||||
then Nothing
|
then Nothing
|
||||||
else Just (serializeKey' key)
|
else Just (serializeKey' key)
|
||||||
)
|
)
|
||||||
|
@ -251,6 +256,17 @@ parseComputeState k b =
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
in go c' rest
|
in go c' rest
|
||||||
|
|
||||||
|
{- A compute: url for a given output file of a computation. -}
|
||||||
|
computeStateUrl :: ComputeState -> OsPath -> URLString
|
||||||
|
computeStateUrl st p =
|
||||||
|
"annex-compute:" ++ fromOsPath p ++ "?"
|
||||||
|
++ decodeBS (formatComputeState' Nothing st')
|
||||||
|
where
|
||||||
|
-- Omit computeOutputs, so this gives the same result whether
|
||||||
|
-- it's called on a ComputeState with the computeOutputs
|
||||||
|
-- Keys populated or not.
|
||||||
|
st' = st { computeOutputs = mempty }
|
||||||
|
|
||||||
{- The per remote metadata is used to store ComputeState. This allows
|
{- The per remote metadata is used to store ComputeState. This allows
|
||||||
- recording multiple ComputeStates that generate the same key.
|
- recording multiple ComputeStates that generate the same key.
|
||||||
-
|
-
|
||||||
|
|
|
@ -54,7 +54,7 @@ Some examples of how this might look:
|
||||||
* `--fast`
|
* `--fast`
|
||||||
|
|
||||||
Adds computed files to the repository, without generating their content
|
Adds computed files to the repository, without generating their content
|
||||||
yet.
|
yet.
|
||||||
|
|
||||||
* `--unreproducible`, `-u`
|
* `--unreproducible`, `-u`
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue