Merge branch 'compute'
This commit is contained in:
commit
6f78341fbf
47 changed files with 1771 additions and 161 deletions
|
@ -308,16 +308,12 @@ updateAdjustedBranch adj (AdjBranch currbranch) origbranch
|
|||
-- adjustment is stable.
|
||||
return True
|
||||
|
||||
{- Passed an action that, if it succeeds may get or drop the Key associated
|
||||
- with the file. When the adjusted branch needs to be refreshed to reflect
|
||||
{- Passed an action that, if it succeeds may get or drop a key.
|
||||
- When the adjusted branch needs to be refreshed to reflect
|
||||
- those changes, it's handled here.
|
||||
-
|
||||
- Note that the AssociatedFile must be verified by this to point to the
|
||||
- Key. In some cases, the value was provided by the user and might not
|
||||
- really be an associated file.
|
||||
-}
|
||||
adjustedBranchRefresh :: AssociatedFile -> Annex a -> Annex a
|
||||
adjustedBranchRefresh _af a = do
|
||||
adjustedBranchRefresh :: Annex a -> Annex a
|
||||
adjustedBranchRefresh a = do
|
||||
r <- a
|
||||
go
|
||||
return r
|
||||
|
|
|
@ -376,16 +376,16 @@ lockContentUsing contentlocker key fallback a = withContentLockFile key $ \mlock
|
|||
{- Runs an action, passing it the temp file to get,
|
||||
- and if the action succeeds, verifies the file matches
|
||||
- the key and moves the file into the annex as a key's content. -}
|
||||
getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> Maybe FileSize -> (OsPath -> Annex (Bool, Verification)) -> Annex Bool
|
||||
getViaTmp rsp v key af sz action =
|
||||
getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> Maybe FileSize -> (OsPath -> Annex (Bool, Verification)) -> Annex Bool
|
||||
getViaTmp rsp v key sz action =
|
||||
checkDiskSpaceToGet key sz False $
|
||||
getViaTmpFromDisk rsp v key af action
|
||||
getViaTmpFromDisk rsp v key action
|
||||
|
||||
{- Like getViaTmp, but does not check that there is enough disk space
|
||||
- for the incoming key. For use when the key content is already on disk
|
||||
- and not being copied into place. -}
|
||||
getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (OsPath -> Annex (Bool, Verification)) -> Annex Bool
|
||||
getViaTmpFromDisk rsp v key af action = checkallowed $ do
|
||||
getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> (OsPath -> Annex (Bool, Verification)) -> Annex Bool
|
||||
getViaTmpFromDisk rsp v key action = checkallowed $ do
|
||||
tmpfile <- prepTmp key
|
||||
resuming <- liftIO $ doesPathExist tmpfile
|
||||
(ok, verification) <- action tmpfile
|
||||
|
@ -400,7 +400,7 @@ getViaTmpFromDisk rsp v key af action = checkallowed $ do
|
|||
else verification
|
||||
if ok
|
||||
then ifM (verifyKeyContentPostRetrieval rsp v verification' key tmpfile)
|
||||
( pruneTmpWorkDirBefore tmpfile (moveAnnex key af)
|
||||
( pruneTmpWorkDirBefore tmpfile (moveAnnex key)
|
||||
, do
|
||||
verificationOfContentFailed tmpfile
|
||||
return False
|
||||
|
@ -507,8 +507,8 @@ withTmp key action = do
|
|||
- accepted into the repository. Will display a warning message in this
|
||||
- case. May also throw exceptions in some cases.
|
||||
-}
|
||||
moveAnnex :: Key -> AssociatedFile -> OsPath -> Annex Bool
|
||||
moveAnnex key af src = ifM (checkSecureHashes' key)
|
||||
moveAnnex :: Key -> OsPath -> Annex Bool
|
||||
moveAnnex key src = ifM (checkSecureHashes' key)
|
||||
( do
|
||||
#ifdef mingw32_HOST_OS
|
||||
{- Windows prevents deletion of files that are not
|
||||
|
@ -523,7 +523,7 @@ moveAnnex key af src = ifM (checkSecureHashes' key)
|
|||
where
|
||||
storeobject dest = ifM (liftIO $ doesPathExist dest)
|
||||
( alreadyhave
|
||||
, adjustedBranchRefresh af $ modifyContentDir dest $ do
|
||||
, adjustedBranchRefresh $ modifyContentDir dest $ do
|
||||
liftIO $ moveFile src dest
|
||||
-- Freeze the object file now that it is in place.
|
||||
-- Waiting until now to freeze it allows for freeze
|
||||
|
@ -776,7 +776,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
|
|||
-- it's unmodified.
|
||||
resetpointer file = unlessM (liftIO $ isSymbolicLink <$> R.getSymbolicLinkStatus (fromOsPath file)) $
|
||||
ifM (isUnmodified key file)
|
||||
( adjustedBranchRefresh (AssociatedFile (Just file)) $
|
||||
( adjustedBranchRefresh $
|
||||
depopulatePointerFile key file
|
||||
-- Modified file, so leave it alone.
|
||||
-- If it was a hard link to the annex object,
|
||||
|
|
|
@ -11,16 +11,13 @@ module Annex.Export where
|
|||
|
||||
import Annex
|
||||
import Annex.CatFile
|
||||
import Annex.GitShaKey
|
||||
import Types
|
||||
import Types.Key
|
||||
import qualified Git
|
||||
import qualified Types.Remote as Remote
|
||||
import Git.Quote
|
||||
import Messages
|
||||
|
||||
import Data.Maybe
|
||||
import qualified Data.ByteString.Short as S (fromShort, toShort)
|
||||
|
||||
-- From a sha pointing to the content of a file to the key
|
||||
-- to use to export it. When the file is annexed, it's the annexed key.
|
||||
-- When the file is stored in git, it's a special type of key to indicate
|
||||
|
@ -31,31 +28,6 @@ exportKey sha = mk <$> catKey sha
|
|||
mk (Just k) = k
|
||||
mk Nothing = gitShaKey sha
|
||||
|
||||
-- Encodes a git sha as a key. This is used to represent a non-annexed
|
||||
-- file that is stored on a special remote, which necessarily needs a
|
||||
-- key.
|
||||
--
|
||||
-- This is not the same as a SHA1 key, because the mapping needs to be
|
||||
-- bijective, also because git may not always use SHA1, and because git
|
||||
-- takes a SHA1 of the file size + content, while git-annex SHA1 keys
|
||||
-- only checksum the content.
|
||||
gitShaKey :: Git.Sha -> Key
|
||||
gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
|
||||
{ keyName = S.toShort s
|
||||
, keyVariety = OtherKey "GIT"
|
||||
}
|
||||
|
||||
-- Reverse of gitShaKey
|
||||
keyGitSha :: Key -> Maybe Git.Sha
|
||||
keyGitSha k
|
||||
| fromKey keyVariety k == OtherKey "GIT" =
|
||||
Just (Git.Ref (S.fromShort (fromKey keyName k)))
|
||||
| otherwise = Nothing
|
||||
|
||||
-- Is a key storing a git sha, and not used for an annexed file?
|
||||
isGitShaKey :: Key -> Bool
|
||||
isGitShaKey = isJust . keyGitSha
|
||||
|
||||
warnExportImportConflict :: Remote -> Annex ()
|
||||
warnExportImportConflict r = do
|
||||
isimport <- Remote.isImportSupported r
|
||||
|
|
41
Annex/GitShaKey.hs
Normal file
41
Annex/GitShaKey.hs
Normal file
|
@ -0,0 +1,41 @@
|
|||
{- Encoding a git sha as a Key
|
||||
-
|
||||
- Copyright 2017-2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Annex.GitShaKey where
|
||||
|
||||
import Types
|
||||
import Types.Key
|
||||
import qualified Git
|
||||
|
||||
import Data.Maybe
|
||||
import qualified Data.ByteString.Short as S (fromShort, toShort)
|
||||
|
||||
-- Encodes a git sha as a Key. This is used to represent a non-annexed
|
||||
-- file. For example, when storing a git sha on a special remote.
|
||||
--
|
||||
-- This is not the same as a SHA1 key, because the mapping needs to be
|
||||
-- bijective, also because git may not always use SHA1, and because git
|
||||
-- takes a SHA1 of the file size + content, while git-annex SHA1 keys
|
||||
-- only checksum the content.
|
||||
gitShaKey :: Git.Sha -> Key
|
||||
gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
|
||||
{ keyName = S.toShort s
|
||||
, keyVariety = OtherKey "GIT"
|
||||
}
|
||||
|
||||
-- Reverse of gitShaKey
|
||||
keyGitSha :: Key -> Maybe Git.Sha
|
||||
keyGitSha k
|
||||
| fromKey keyVariety k == OtherKey "GIT" =
|
||||
Just (Git.Ref (S.fromShort (fromKey keyName k)))
|
||||
| otherwise = Nothing
|
||||
|
||||
-- Is a key storing a git sha, and not used for an annexed file?
|
||||
isGitShaKey :: Key -> Bool
|
||||
isGitShaKey = isJust . keyGitSha
|
|
@ -38,12 +38,12 @@ import qualified Annex
|
|||
import Annex.Link
|
||||
import Annex.LockFile
|
||||
import Annex.Content
|
||||
import Annex.Export
|
||||
import Annex.RemoteTrackingBranch
|
||||
import Annex.HashObject
|
||||
import Annex.Transfer
|
||||
import Annex.CheckIgnore
|
||||
import Annex.CatFile
|
||||
import Annex.GitShaKey
|
||||
import Annex.VectorClock
|
||||
import Annex.SpecialRemote.Config
|
||||
import Command
|
||||
|
@ -863,7 +863,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
ia loc [cid] tmpfile
|
||||
(Left k)
|
||||
(combineMeterUpdate p' p)
|
||||
ok <- moveAnnex k af tmpfile
|
||||
ok <- moveAnnex k tmpfile
|
||||
when ok $
|
||||
logStatus NoLiveUpdate k InfoPresent
|
||||
return (Just (k, ok))
|
||||
|
@ -906,7 +906,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
p
|
||||
case keyGitSha k of
|
||||
Nothing -> do
|
||||
ok <- moveAnnex k af tmpfile
|
||||
ok <- moveAnnex k tmpfile
|
||||
when ok $ do
|
||||
recordcidkey cidmap cid k
|
||||
logStatus NoLiveUpdate k InfoPresent
|
||||
|
|
|
@ -198,17 +198,11 @@ ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage =
|
|||
| otherwise = gounlocked key mcache
|
||||
|
||||
golocked key mcache =
|
||||
tryNonAsync (moveAnnex key naf (contentLocation source)) >>= \case
|
||||
tryNonAsync (moveAnnex key (contentLocation source)) >>= \case
|
||||
Right True -> success key mcache
|
||||
Right False -> giveup "failed to add content to annex"
|
||||
Left e -> restoreFile (keyFilename source) key e
|
||||
|
||||
-- moveAnnex uses the AssociatedFile provided to it to unlock
|
||||
-- locked files when getting a file in an adjusted branch.
|
||||
-- That case does not apply here, where we're adding an unlocked
|
||||
-- file, so provide it nothing.
|
||||
naf = AssociatedFile Nothing
|
||||
|
||||
gounlocked key (Just cache) = do
|
||||
-- Remove temp directory hard link first because
|
||||
-- linkToAnnex falls back to copying if a file
|
||||
|
@ -377,7 +371,7 @@ addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)
|
|||
stagePointerFile file mode =<< hashPointerFile key
|
||||
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||
case mtmp of
|
||||
Just tmp -> ifM (moveAnnex key af tmp)
|
||||
Just tmp -> ifM (moveAnnex key tmp)
|
||||
( linkunlocked mode >> return True
|
||||
, writepointer mode >> return False
|
||||
)
|
||||
|
@ -388,11 +382,10 @@ addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)
|
|||
, do
|
||||
addSymlink file key Nothing
|
||||
case mtmp of
|
||||
Just tmp -> moveAnnex key af tmp
|
||||
Just tmp -> moveAnnex key tmp
|
||||
Nothing -> return True
|
||||
)
|
||||
where
|
||||
af = AssociatedFile (Just file)
|
||||
mi = case mtmp of
|
||||
Just tmp -> MatchingFile $ FileInfo
|
||||
{ contentFile = tmp
|
||||
|
|
|
@ -78,7 +78,7 @@ download r key f d witness =
|
|||
Just StallDetectionDisabled -> go Nothing
|
||||
Just sd -> runTransferrer sd r key f d Download witness
|
||||
where
|
||||
go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key f Nothing $ \dest ->
|
||||
go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key Nothing $ \dest ->
|
||||
download' (Remote.uuid r) key f sd d (go' dest) witness
|
||||
go' dest p = verifiedAction $
|
||||
Remote.retrieveKeyFile r key f dest p vc
|
||||
|
|
12
Backend.hs
12
Backend.hs
|
@ -10,13 +10,14 @@
|
|||
module Backend (
|
||||
builtinList,
|
||||
defaultBackend,
|
||||
defaultHashBackend,
|
||||
hashBackend,
|
||||
genKey,
|
||||
getBackend,
|
||||
chooseBackend,
|
||||
lookupBackendVariety,
|
||||
lookupBuiltinBackendVariety,
|
||||
maybeLookupBackendVariety,
|
||||
unknownBackendVarietyMessage,
|
||||
isStableKey,
|
||||
isCryptographicallySecureKey,
|
||||
isCryptographicallySecure,
|
||||
|
@ -54,6 +55,15 @@ defaultBackend = maybe cache return =<< Annex.getState Annex.backend
|
|||
valid name = not (null name)
|
||||
lookupname = lookupBackendVariety . parseKeyVariety . encodeBS
|
||||
|
||||
{- A hashing backend. Takes git config into account, but
|
||||
- guarantees the backend is cryptographically secure. -}
|
||||
hashBackend :: Annex Backend
|
||||
hashBackend = do
|
||||
db <- defaultBackend
|
||||
return $ if isCryptographicallySecure db
|
||||
then db
|
||||
else defaultHashBackend
|
||||
|
||||
{- Generates a key for a file. -}
|
||||
genKey :: KeySource -> MeterUpdate -> Backend -> Annex (Key, Backend)
|
||||
genKey source meterupdate b = case B.genKey b of
|
||||
|
|
|
@ -10,10 +10,8 @@ module Backend.VURL.Utilities where
|
|||
import Annex.Common
|
||||
import Types.Key
|
||||
import Types.Backend
|
||||
import Types.KeySource
|
||||
import Logs.EquivilantKeys
|
||||
import qualified Backend.Hash
|
||||
import Utility.Metered
|
||||
|
||||
migrateFromURLToVURL :: Key -> Backend -> AssociatedFile -> Bool -> Annex (Maybe Key)
|
||||
migrateFromURLToVURL oldkey newbackend _af inannex
|
||||
|
@ -41,18 +39,3 @@ migrateFromVURLToURL oldkey newbackend _af _
|
|||
(keyData oldkey)
|
||||
{ keyVariety = URLKey }
|
||||
| otherwise = return Nothing
|
||||
|
||||
-- The Backend must use a cryptographically secure hash.
|
||||
generateEquivilantKey :: Backend -> OsPath -> Annex (Maybe Key)
|
||||
generateEquivilantKey b f =
|
||||
case genKey b of
|
||||
Just genkey -> do
|
||||
showSideAction (UnquotedString Backend.Hash.descChecksum)
|
||||
Just <$> genkey source nullMeterUpdate
|
||||
Nothing -> return Nothing
|
||||
where
|
||||
source = KeySource
|
||||
{ keyFilename = mempty -- avoid adding any extension
|
||||
, contentLocation = f
|
||||
, inodeCache = Nothing
|
||||
}
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
git-annex (10.20250116) UNRELEASED; urgency=medium
|
||||
|
||||
* Added the compute special remote.
|
||||
* addcomputed: New command, adds a file that is generated by a compute
|
||||
special remote.
|
||||
* recompute: New command, recomputes computed files.
|
||||
* Support help.autocorrect settings "prompt", "never", and "immediate".
|
||||
* Allow setting remote.foo.annex-tracking-branch to a branch name
|
||||
that contains "/", as long as it's not a remote tracking branch.
|
||||
|
|
|
@ -133,6 +133,8 @@ import qualified Command.ExtendCluster
|
|||
import qualified Command.UpdateProxy
|
||||
import qualified Command.MaxSize
|
||||
import qualified Command.Sim
|
||||
import qualified Command.AddComputed
|
||||
import qualified Command.Recompute
|
||||
import qualified Command.Version
|
||||
import qualified Command.RemoteDaemon
|
||||
#ifdef WITH_ASSISTANT
|
||||
|
@ -265,6 +267,8 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
|
|||
, Command.UpdateProxy.cmd
|
||||
, Command.MaxSize.cmd
|
||||
, Command.Sim.cmd
|
||||
, Command.AddComputed.cmd
|
||||
, Command.Recompute.cmd
|
||||
, Command.Version.cmd
|
||||
, Command.RemoteDaemon.cmd
|
||||
#ifdef WITH_ASSISTANT
|
||||
|
|
|
@ -927,7 +927,7 @@ downloadGitBundle rmt k = getKeyExportLocations rmt k >>= \case
|
|||
|
||||
getexport loc = catchNonAsync (getexport' loc) (const (pure False))
|
||||
getexport' loc =
|
||||
getViaTmp rsp vc k (AssociatedFile Nothing) Nothing $ \tmp -> do
|
||||
getViaTmp rsp vc k Nothing $ \tmp -> do
|
||||
v <- Remote.retrieveExport (Remote.exportActions rmt)
|
||||
k loc tmp nullMeterUpdate
|
||||
return (True, v)
|
||||
|
@ -986,7 +986,7 @@ generateGitBundle rmt bs manifest =
|
|||
tmp nullMeterUpdate
|
||||
if (bundlekey `notElem` inManifest manifest)
|
||||
then do
|
||||
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) tmp) $
|
||||
unlessM (moveAnnex bundlekey tmp) $
|
||||
giveup "Unable to push"
|
||||
return (bundlekey, uploadaction bundlekey)
|
||||
else return (bundlekey, noop)
|
||||
|
|
236
Command/AddComputed.hs
Normal file
236
Command/AddComputed.hs
Normal file
|
@ -0,0 +1,236 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Command.AddComputed where
|
||||
|
||||
import Command
|
||||
import qualified Git
|
||||
import qualified Git.Types as Git
|
||||
import qualified Git.Ref as Git
|
||||
import qualified Annex
|
||||
import qualified Remote.Compute
|
||||
import qualified Types.Remote as Remote
|
||||
import Backend
|
||||
import Annex.CatFile
|
||||
import Annex.Content.Presence
|
||||
import Annex.Ingest
|
||||
import Annex.UUID
|
||||
import Annex.GitShaKey
|
||||
import Types.KeySource
|
||||
import Types.Key
|
||||
import Messages.Progress
|
||||
import Logs.Location
|
||||
import Logs.EquivilantKeys
|
||||
import Utility.Metered
|
||||
import Backend.URL (fromUrl)
|
||||
|
||||
import qualified Data.Map as M
|
||||
import Data.Time.Clock
|
||||
|
||||
cmd :: Command
|
||||
cmd = notBareRepo $ withAnnexOptions [backendOption] $
|
||||
command "addcomputed" SectionCommon "add computed files to annex"
|
||||
(paramRepeating paramExpression)
|
||||
(seek <$$> optParser)
|
||||
|
||||
data AddComputedOptions = AddComputedOptions
|
||||
{ computeParams :: CmdParams
|
||||
, computeRemote :: DeferredParse Remote
|
||||
, reproducible :: Maybe Reproducible
|
||||
}
|
||||
|
||||
optParser :: CmdParamsDesc -> Parser AddComputedOptions
|
||||
optParser desc = AddComputedOptions
|
||||
<$> cmdParams desc
|
||||
<*> (mkParseRemoteOption <$> parseToOption)
|
||||
<*> parseReproducible
|
||||
|
||||
newtype Reproducible = Reproducible { isReproducible :: Bool }
|
||||
|
||||
parseReproducible :: Parser (Maybe Reproducible)
|
||||
parseReproducible = r <|> unr
|
||||
where
|
||||
r = flag Nothing (Just (Reproducible True))
|
||||
( long "reproducible"
|
||||
<> short 'r'
|
||||
<> help "computation is fully reproducible"
|
||||
)
|
||||
unr = flag Nothing (Just (Reproducible False))
|
||||
( long "unreproducible"
|
||||
<> short 'u'
|
||||
<> help "computation is not fully reproducible"
|
||||
)
|
||||
|
||||
seek :: AddComputedOptions -> CommandSeek
|
||||
seek o = startConcurrency commandStages (seek' o)
|
||||
|
||||
seek' :: AddComputedOptions -> CommandSeek
|
||||
seek' o = do
|
||||
r <- getParsed (computeRemote o)
|
||||
unless (Remote.Compute.isComputeRemote r) $
|
||||
giveup "That is not a compute remote."
|
||||
|
||||
commandAction $ start o r
|
||||
|
||||
start :: AddComputedOptions -> Remote -> CommandStart
|
||||
start o r = starting "addcomputed" ai si $ perform o r
|
||||
where
|
||||
ai = ActionItemUUID (Remote.uuid r) (UnquotedString (Remote.name r))
|
||||
si = SeekInput (computeParams o)
|
||||
|
||||
perform :: AddComputedOptions -> Remote -> CommandPerform
|
||||
perform o r = do
|
||||
program <- Remote.Compute.getComputeProgram r
|
||||
repopath <- fromRepo Git.repoPath
|
||||
subdir <- liftIO $ relPathDirToFile repopath (literalOsPath ".")
|
||||
let state = Remote.Compute.ComputeState
|
||||
{ Remote.Compute.computeParams = computeParams o ++
|
||||
Remote.Compute.defaultComputeParams r
|
||||
, Remote.Compute.computeInputs = mempty
|
||||
, Remote.Compute.computeOutputs = mempty
|
||||
, Remote.Compute.computeSubdir = subdir
|
||||
}
|
||||
fast <- Annex.getRead Annex.fast
|
||||
Remote.Compute.runComputeProgram program state
|
||||
(Remote.Compute.ImmutableState False)
|
||||
(getInputContent fast)
|
||||
Nothing
|
||||
(addComputed (Just "adding") True r (reproducible o) chooseBackend Just fast)
|
||||
next $ return True
|
||||
|
||||
addComputed
|
||||
:: Maybe StringContainingQuotedPath
|
||||
-> Bool
|
||||
-> Remote
|
||||
-> Maybe Reproducible
|
||||
-> (OsPath -> Annex Backend)
|
||||
-> (OsPath -> Maybe OsPath)
|
||||
-> Bool
|
||||
-> Remote.Compute.ComputeProgramResult
|
||||
-> OsPath
|
||||
-> NominalDiffTime
|
||||
-> Annex ()
|
||||
addComputed maddaction stagefiles r reproducibleconfig choosebackend destfile fast result tmpdir ts = do
|
||||
when (M.null outputs) $
|
||||
giveup "The computation succeeded, but it did not generate any files."
|
||||
oks <- forM (M.keys outputs) $ \outputfile -> do
|
||||
case maddaction of
|
||||
Just addaction -> showAction $
|
||||
addaction <> " " <> QuotedPath outputfile
|
||||
Nothing -> noop
|
||||
k <- catchNonAsync (addfile outputfile)
|
||||
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
|
||||
return (outputfile, Just k)
|
||||
let state' = state
|
||||
{ Remote.Compute.computeOutputs = M.fromList oks
|
||||
}
|
||||
forM_ (mapMaybe snd oks) $ \k -> do
|
||||
Remote.Compute.setComputeState
|
||||
(Remote.remoteStateHandle r)
|
||||
k ts state'
|
||||
|
||||
let u = Remote.uuid r
|
||||
unlessM (elem u <$> loggedLocations k) $
|
||||
logChange NoLiveUpdate k u InfoPresent
|
||||
where
|
||||
state = Remote.Compute.computeState result
|
||||
|
||||
outputs = Remote.Compute.computeOutputs state
|
||||
|
||||
addfile outputfile
|
||||
| fast = do
|
||||
case destfile outputfile of
|
||||
Nothing -> noop
|
||||
Just f
|
||||
| stagefiles -> addSymlink f stateurlk Nothing
|
||||
| otherwise -> makelink f stateurlk
|
||||
return stateurlk
|
||||
| isreproducible = do
|
||||
sz <- liftIO $ getFileSize outputfile'
|
||||
metered Nothing sz Nothing $ \_ p ->
|
||||
case destfile outputfile of
|
||||
Just f -> ingesthelper f p Nothing
|
||||
Nothing -> genkey outputfile p
|
||||
| otherwise = case destfile outputfile of
|
||||
Just f -> ingesthelper f nullMeterUpdate
|
||||
(Just stateurlk)
|
||||
Nothing -> return stateurlk
|
||||
where
|
||||
stateurl = Remote.Compute.computeStateUrl r state outputfile
|
||||
stateurlk = fromUrl stateurl Nothing True
|
||||
outputfile' = tmpdir </> outputfile
|
||||
ld f = LockedDown ldc (ks f)
|
||||
ks f = KeySource
|
||||
{ keyFilename = f
|
||||
, contentLocation = outputfile'
|
||||
, inodeCache = Nothing
|
||||
}
|
||||
genkey f p = do
|
||||
backend <- choosebackend outputfile
|
||||
fst <$> genKey (ks f) p backend
|
||||
makelink f k = void $ makeLink f k Nothing
|
||||
ingesthelper f p mk
|
||||
| stagefiles = ingestwith $ do
|
||||
k <- maybe (genkey f p) return mk
|
||||
ingestAdd' p (Just (ld f)) (Just k)
|
||||
| otherwise = ingestwith $ do
|
||||
k <- maybe (genkey f p) return mk
|
||||
mk' <- fst <$> ingest p (Just (ld f)) (Just k)
|
||||
maybe noop (makelink f) mk'
|
||||
return mk'
|
||||
ingestwith a = a >>= \case
|
||||
Nothing -> giveup "ingestion failed"
|
||||
Just k -> do
|
||||
u <- getUUID
|
||||
unlessM (elem u <$> loggedLocations k) $
|
||||
logStatus NoLiveUpdate k InfoPresent
|
||||
when (fromKey keyVariety k == VURLKey) $ do
|
||||
hb <- hashBackend
|
||||
void $ addEquivilantKey hb k
|
||||
=<< calcRepo (gitAnnexLocation k)
|
||||
return k
|
||||
|
||||
ldc = LockDownConfig
|
||||
{ lockingFile = True
|
||||
, hardlinkFileTmpDir = Nothing
|
||||
, checkWritePerms = True
|
||||
}
|
||||
|
||||
isreproducible = case reproducibleconfig of
|
||||
Just v -> isReproducible v
|
||||
Nothing -> Remote.Compute.computeReproducible result
|
||||
|
||||
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))
|
||||
getInputContent fast p = catKeyFile p >>= \case
|
||||
Just inputkey -> getInputContent' fast inputkey filedesc
|
||||
Nothing -> inRepo (Git.fileRef p) >>= \case
|
||||
Just fileref -> catObjectMetaData fileref >>= \case
|
||||
Just (sha, _, t)
|
||||
| t == Git.BlobObject ->
|
||||
getInputContent' fast (gitShaKey sha) filedesc
|
||||
| otherwise ->
|
||||
badinput $ ", not a git " ++ decodeBS (Git.fmtObjectType t)
|
||||
Nothing -> notcheckedin
|
||||
Nothing -> notcheckedin
|
||||
where
|
||||
filedesc = fromOsPath p
|
||||
badinput s = giveup $ "The computation needs an input file " ++ s ++ ": " ++ fromOsPath p
|
||||
notcheckedin = badinput "that is not checked into the git repository"
|
||||
|
||||
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
|
||||
getInputContent' fast inputkey filedesc
|
||||
| fast = return (inputkey, Nothing)
|
||||
| otherwise = case keyGitSha inputkey of
|
||||
Nothing -> ifM (inAnnex inputkey)
|
||||
( do
|
||||
obj <- calcRepo (gitAnnexLocation inputkey)
|
||||
return (inputkey, Just (Right obj))
|
||||
, giveup $ "The computation needs the content of an annexed file which is not present: " ++ filedesc
|
||||
)
|
||||
Just sha -> return (inputkey, Just (Left sha))
|
|
@ -26,6 +26,7 @@ import Types.Remote
|
|||
import Types.Export
|
||||
import Annex.Export
|
||||
import Annex.Content
|
||||
import Annex.GitShaKey
|
||||
import Annex.Transfer
|
||||
import Annex.CatFile
|
||||
import Annex.FileMatcher
|
||||
|
|
|
@ -213,7 +213,7 @@ storeReceived f = do
|
|||
warning $ "Received a file " <> QuotedPath f <> " that is not a git-annex key. Deleting this file."
|
||||
liftIO $ removeWhenExistsWith removeFile f
|
||||
Just k -> void $ logStatusAfter NoLiveUpdate k $
|
||||
getViaTmpFromDisk RetrievalVerifiableKeysSecure AlwaysVerify k (AssociatedFile Nothing) $ \dest -> unVerified $
|
||||
getViaTmpFromDisk RetrievalVerifiableKeysSecure AlwaysVerify k $ \dest -> unVerified $
|
||||
liftIO $ catchBoolIO $ do
|
||||
renameFile f dest
|
||||
return True
|
||||
|
|
|
@ -128,7 +128,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
|
|||
- and vulnerable to corruption. -}
|
||||
linkKey' :: VerifyConfig -> Key -> Key -> Annex Bool
|
||||
linkKey' v oldkey newkey =
|
||||
getViaTmpFromDisk RetrievalAllKeysSecure v newkey (AssociatedFile Nothing) $ \tmp -> unVerified $ do
|
||||
getViaTmpFromDisk RetrievalAllKeysSecure v newkey $ \tmp -> unVerified $ do
|
||||
oldobj <- calcRepo (gitAnnexLocation oldkey)
|
||||
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
|
||||
|
||||
|
|
209
Command/Recompute.hs
Normal file
209
Command/Recompute.hs
Normal file
|
@ -0,0 +1,209 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Command.Recompute where
|
||||
|
||||
import Command
|
||||
import qualified Remote.Compute
|
||||
import qualified Remote
|
||||
import qualified Types.Remote as Remote
|
||||
import qualified Git.Ref as Git
|
||||
import Annex.Content
|
||||
import Annex.UUID
|
||||
import Annex.CatFile
|
||||
import Annex.GitShaKey
|
||||
import Git.FilePath
|
||||
import Logs.Location
|
||||
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
|
||||
import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage)
|
||||
import Types.Key
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
cmd :: Command
|
||||
cmd = notBareRepo $
|
||||
command "recompute" SectionCommon "recompute computed files"
|
||||
paramPaths (seek <$$> optParser)
|
||||
|
||||
data RecomputeOptions = RecomputeOptions
|
||||
{ recomputeThese :: CmdParams
|
||||
, originalOption :: Bool
|
||||
, reproducible :: Maybe Reproducible
|
||||
, computeRemote :: Maybe (DeferredParse Remote)
|
||||
}
|
||||
|
||||
optParser :: CmdParamsDesc -> Parser RecomputeOptions
|
||||
optParser desc = RecomputeOptions
|
||||
<$> cmdParams desc
|
||||
<*> switch
|
||||
( long "original"
|
||||
<> help "recompute using original content of input files"
|
||||
)
|
||||
<*> parseReproducible
|
||||
<*> optional (mkParseRemoteOption <$> parseRemoteOption)
|
||||
|
||||
seek :: RecomputeOptions -> CommandSeek
|
||||
seek o = startConcurrency commandStages (seek' o)
|
||||
|
||||
seek' :: RecomputeOptions -> CommandSeek
|
||||
seek' o = do
|
||||
computeremote <- maybe (pure Nothing) (Just <$$> getParsed)
|
||||
(computeRemote o)
|
||||
let seeker = AnnexedFileSeeker
|
||||
{ startAction = const $ start o computeremote
|
||||
, checkContentPresent = Nothing
|
||||
, usesLocationLog = True
|
||||
}
|
||||
withFilesInGitAnnex ww seeker
|
||||
=<< workTreeItems ww (recomputeThese o)
|
||||
where
|
||||
ww = WarnUnmatchLsFiles "recompute"
|
||||
|
||||
start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
|
||||
start o (Just computeremote) si file key =
|
||||
stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $
|
||||
start' o computeremote si file key
|
||||
start o Nothing si file key = do
|
||||
rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
|
||||
case sortOn Remote.cost $ filter Remote.Compute.isComputeRemote rs of
|
||||
[] -> stop
|
||||
(r:_) -> start' o r si file key
|
||||
|
||||
start' :: RecomputeOptions -> Remote -> SeekInput -> OsPath -> Key -> CommandStart
|
||||
start' o r si file key =
|
||||
Remote.Compute.getComputeState
|
||||
(Remote.remoteStateHandle r) key >>= \case
|
||||
Nothing -> stop
|
||||
Just state -> shouldrecompute state >>= \case
|
||||
Nothing -> stop
|
||||
Just mreason -> starting "recompute" ai si $ do
|
||||
maybe noop showNote mreason
|
||||
perform o r file key state
|
||||
where
|
||||
ai = mkActionItem (key, file)
|
||||
|
||||
shouldrecompute state
|
||||
| originalOption o = return (Just Nothing)
|
||||
| otherwise = firstM (inputchanged state)
|
||||
(M.toList (Remote.Compute.computeInputs state))
|
||||
>>= return . \case
|
||||
Nothing -> Nothing
|
||||
Just (inputfile, _) -> Just $ Just $
|
||||
QuotedPath inputfile <> " changed"
|
||||
|
||||
inputchanged state (inputfile, inputkey) = do
|
||||
-- Note that the paths from the remote state are not to be
|
||||
-- trusted to point to a file in the repository, but using
|
||||
-- the path with git cat-file will only succeed if it
|
||||
-- is checked into the repository.
|
||||
p <- fromRepo $ fromTopFilePath $ asTopFilePath $
|
||||
Remote.Compute.computeSubdir state </> inputfile
|
||||
case keyGitSha inputkey of
|
||||
Nothing ->
|
||||
catKeyFile p >>= return . \case
|
||||
Just k -> k /= inputkey
|
||||
Nothing -> inputfilemissing
|
||||
Just inputgitsha -> inRepo (Git.fileRef p) >>= \case
|
||||
Just fileref -> catObjectMetaData fileref >>= return . \case
|
||||
Just (sha, _, _) -> sha /= inputgitsha
|
||||
Nothing -> inputfilemissing
|
||||
Nothing -> return inputfilemissing
|
||||
where
|
||||
-- When an input file is missing, go ahead and recompute.
|
||||
-- This way, the user will see the computation fail,
|
||||
-- with an error message that explains the problem.
|
||||
-- Or, if the input file is only optionally used by the
|
||||
-- computation, it might succeed.
|
||||
inputfilemissing = True
|
||||
|
||||
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
|
||||
perform o r file origkey origstate = do
|
||||
program <- Remote.Compute.getComputeProgram r
|
||||
reproducibleconfig <- getreproducibleconfig
|
||||
showOutput
|
||||
Remote.Compute.runComputeProgram program origstate
|
||||
(Remote.Compute.ImmutableState False)
|
||||
(getinputcontent program)
|
||||
Nothing
|
||||
(go program reproducibleconfig)
|
||||
next cleanup
|
||||
where
|
||||
go program reproducibleconfig result tmpdir ts = do
|
||||
checkbehaviorchange program
|
||||
(Remote.Compute.computeState result)
|
||||
addComputed Nothing False r reproducibleconfig
|
||||
choosebackend destfile False result tmpdir ts
|
||||
|
||||
checkbehaviorchange program state = do
|
||||
let check s w a b = forM_ (M.keys (w a)) $ \f ->
|
||||
unless (M.member f (w b)) $
|
||||
Remote.Compute.computationBehaviorChangeError program s f
|
||||
|
||||
check "not using input file"
|
||||
Remote.Compute.computeInputs origstate state
|
||||
check "outputting"
|
||||
Remote.Compute.computeOutputs state origstate
|
||||
check "not outputting"
|
||||
Remote.Compute.computeOutputs origstate state
|
||||
|
||||
getinputcontent program p
|
||||
| originalOption o =
|
||||
case M.lookup p (Remote.Compute.computeInputs origstate) of
|
||||
Just inputkey -> getInputContent' False inputkey
|
||||
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
|
||||
Nothing -> Remote.Compute.computationBehaviorChangeError program
|
||||
"requesting a new input file" p
|
||||
| otherwise = getInputContent False p
|
||||
|
||||
destfile outputfile
|
||||
| Just outputfile == origfile = Just file
|
||||
| otherwise = Nothing
|
||||
|
||||
origfile = headMaybe $ M.keys $ M.filter (== Just origkey)
|
||||
(Remote.Compute.computeOutputs origstate)
|
||||
|
||||
origbackendvariety = fromKey keyVariety origkey
|
||||
|
||||
recomputingvurl = case origbackendvariety of
|
||||
VURLKey -> True
|
||||
_ -> False
|
||||
|
||||
getreproducibleconfig = case reproducible o of
|
||||
Just (Reproducible True) -> return (Just (Reproducible True))
|
||||
-- A VURL key is used when the computation was
|
||||
-- unreproducible. So recomputing should too, but that
|
||||
-- will result in the same VURL key. Since moveAnnex
|
||||
-- will prefer the current annex object to a new one,
|
||||
-- delete the annex object first, so that if recomputing
|
||||
-- generates a new version of the file, it replaces
|
||||
-- the old version.
|
||||
v -> if recomputingvurl
|
||||
then do
|
||||
lockContentForRemoval origkey noop removeAnnex
|
||||
return (Just (Reproducible False))
|
||||
else return v
|
||||
|
||||
cleanup = do
|
||||
case reproducible o of
|
||||
Just (Reproducible True) -> noop
|
||||
-- in case computation failed, update
|
||||
-- location log for removal done earlier
|
||||
_ -> when recomputingvurl $ do
|
||||
u <- getUUID
|
||||
unlessM (elem u <$> loggedLocations origkey) $
|
||||
logStatus NoLiveUpdate origkey InfoMissing
|
||||
return True
|
||||
|
||||
choosebackend _outputfile
|
||||
-- Use the same backend as was used to compute it before,
|
||||
-- so if the computed file is the same, there will be
|
||||
-- no change.
|
||||
| otherwise = maybeLookupBackendVariety origbackendvariety >>= \case
|
||||
Just b -> return b
|
||||
Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety
|
|
@ -28,7 +28,7 @@ start :: (SeekInput, Key) -> CommandStart
|
|||
start (_, key) = fieldTransfer Download key $ \_p -> do
|
||||
-- This matches the retrievalSecurityPolicy of Remote.Git
|
||||
let rsp = RetrievalAllKeysSecure
|
||||
ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) Nothing go)
|
||||
ifM (getViaTmp rsp DefaultVerify key Nothing go)
|
||||
( do
|
||||
logStatus NoLiveUpdate key InfoPresent
|
||||
_ <- quiesce True
|
||||
|
|
|
@ -129,7 +129,7 @@ perform src key = do
|
|||
)
|
||||
where
|
||||
move = checkDiskSpaceToGet key Nothing False $
|
||||
moveAnnex key (AssociatedFile Nothing) src
|
||||
moveAnnex key src
|
||||
|
||||
cleanup :: Key -> CommandCleanup
|
||||
cleanup key = do
|
||||
|
|
|
@ -36,7 +36,7 @@ perform file key = do
|
|||
-- the file might be on a different filesystem, so moveFile is used
|
||||
-- rather than simply calling moveAnnex; disk space is also
|
||||
-- checked this way.
|
||||
ok <- getViaTmp RetrievalAllKeysSecure DefaultVerify key (AssociatedFile Nothing) Nothing $ \dest -> unVerified $
|
||||
ok <- getViaTmp RetrievalAllKeysSecure DefaultVerify key Nothing $ \dest -> unVerified $
|
||||
if dest /= file
|
||||
then liftIO $ catchBoolIO $ do
|
||||
moveFile file dest
|
||||
|
|
|
@ -301,7 +301,7 @@ test runannex mkr mkk =
|
|||
Just verifier -> do
|
||||
loc <- Annex.calcRepo (gitAnnexLocation k)
|
||||
verifier k loc
|
||||
get r k = logStatusAfter NoLiveUpdate k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) Nothing $ \dest ->
|
||||
get r k = logStatusAfter NoLiveUpdate k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k Nothing $ \dest ->
|
||||
tryNonAsync (Remote.retrieveKeyFile r k (AssociatedFile Nothing) dest nullMeterUpdate (RemoteVerify r)) >>= \case
|
||||
Right v -> return (True, v)
|
||||
Left _ -> return (False, UnVerified)
|
||||
|
@ -375,13 +375,13 @@ testUnavailable runannex mkr mkk =
|
|||
, check (`notElem` [Right True, Right False]) "checkPresent" $ \r k ->
|
||||
Remote.checkPresent r k
|
||||
, check (== Right False) "retrieveKeyFile" $ \r k ->
|
||||
logStatusAfter NoLiveUpdate k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) Nothing $ \dest ->
|
||||
logStatusAfter NoLiveUpdate k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k Nothing $ \dest ->
|
||||
tryNonAsync (Remote.retrieveKeyFile r k (AssociatedFile Nothing) dest nullMeterUpdate (RemoteVerify r)) >>= \case
|
||||
Right v -> return (True, v)
|
||||
Left _ -> return (False, UnVerified)
|
||||
, check (== Right False) "retrieveKeyFileCheap" $ \r k -> case Remote.retrieveKeyFileCheap r of
|
||||
Nothing -> return False
|
||||
Just a -> logStatusAfter NoLiveUpdate k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) Nothing $ \dest ->
|
||||
Just a -> logStatusAfter NoLiveUpdate k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k Nothing $ \dest ->
|
||||
unVerified $ isRight
|
||||
<$> tryNonAsync (a k (AssociatedFile Nothing) dest)
|
||||
]
|
||||
|
@ -443,7 +443,7 @@ randKey sz = withTmpFile (literalOsPath "randkey") $ \f h -> do
|
|||
k <- case Types.Backend.genKey Backend.Hash.testKeyBackend of
|
||||
Just a -> a ks nullMeterUpdate
|
||||
Nothing -> giveup "failed to generate random key (backend problem)"
|
||||
_ <- moveAnnex k (AssociatedFile Nothing) f
|
||||
_ <- moveAnnex k f
|
||||
return k
|
||||
|
||||
getReadonlyKey :: Remote -> OsPath -> Annex Key
|
||||
|
|
|
@ -63,7 +63,7 @@ toPerform key af remote = go Upload af $
|
|||
fromPerform :: Key -> AssociatedFile -> Remote -> CommandPerform
|
||||
fromPerform key af remote = go Upload af $
|
||||
download' (uuid remote) key af Nothing stdRetry $ \p ->
|
||||
logStatusAfter NoLiveUpdate key $ getViaTmp (retrievalSecurityPolicy remote) vc key af Nothing $ \t ->
|
||||
logStatusAfter NoLiveUpdate key $ getViaTmp (retrievalSecurityPolicy remote) vc key Nothing $ \t ->
|
||||
tryNonAsync (Remote.retrieveKeyFile remote key af t p vc) >>= \case
|
||||
Right v -> return (True, v)
|
||||
Left e -> do
|
||||
|
|
|
@ -50,7 +50,7 @@ start = do
|
|||
return True
|
||||
| otherwise = notifyTransfer direction af $
|
||||
download' (Remote.uuid remote) key af Nothing stdRetry $ \p ->
|
||||
logStatusAfter NoLiveUpdate key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key af Nothing $ \t -> do
|
||||
logStatusAfter NoLiveUpdate key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key Nothing $ \t -> do
|
||||
r <- tryNonAsync (Remote.retrieveKeyFile remote key af t p (RemoteVerify remote)) >>= \case
|
||||
Left e -> do
|
||||
warning (UnquotedString (show e))
|
||||
|
|
|
@ -55,7 +55,7 @@ start = do
|
|||
-- so caller is responsible for doing notification
|
||||
-- and for retrying, and updating location log,
|
||||
-- and stall canceling.
|
||||
let go p = getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key af Nothing $ \t -> do
|
||||
let go p = getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key Nothing $ \t -> do
|
||||
Remote.verifiedAction (Remote.retrieveKeyFile remote key af t p (RemoteVerify remote))
|
||||
in download' (Remote.uuid remote) key af Nothing noRetry go
|
||||
noNotification
|
||||
|
@ -72,7 +72,7 @@ start = do
|
|||
runner (AssistantDownloadRequest _ key (TransferAssociatedFile file)) remote =
|
||||
notifyTransfer Download file $
|
||||
download' (Remote.uuid remote) key file Nothing stdRetry $ \p ->
|
||||
logStatusAfter NoLiveUpdate key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file Nothing $ \t -> do
|
||||
logStatusAfter NoLiveUpdate key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key Nothing $ \t -> do
|
||||
r <- tryNonAsync (Remote.retrieveKeyFile remote key file t p (RemoteVerify remote)) >>= \case
|
||||
Left e -> do
|
||||
warning (UnquotedString (show e))
|
||||
|
|
|
@ -145,7 +145,7 @@ newtype RefDate = RefDate String
|
|||
|
||||
{- Types of objects that can be stored in git. -}
|
||||
data ObjectType = BlobObject | CommitObject | TreeObject
|
||||
deriving (Show)
|
||||
deriving (Show, Eq)
|
||||
|
||||
readObjectType :: S.ByteString -> Maybe ObjectType
|
||||
readObjectType "blob" = Just BlobObject
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Logs listing keys that are equivalent to a key.
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2024-2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -10,6 +10,9 @@
|
|||
module Logs.EquivilantKeys (
|
||||
getEquivilantKeys,
|
||||
setEquivilantKey,
|
||||
updateEquivilantKeys,
|
||||
addEquivilantKey,
|
||||
generateEquivilantKey,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -17,6 +20,11 @@ import qualified Annex
|
|||
import Logs
|
||||
import Logs.Presence
|
||||
import qualified Annex.Branch
|
||||
import qualified Backend.Hash
|
||||
import Types.KeySource
|
||||
import Types.Backend
|
||||
import Types.Remote (Verification(..))
|
||||
import Utility.Metered
|
||||
|
||||
getEquivilantKeys :: Key -> Annex [Key]
|
||||
getEquivilantKeys key = do
|
||||
|
@ -29,3 +37,34 @@ setEquivilantKey key equivkey = do
|
|||
config <- Annex.getGitConfig
|
||||
addLog (Annex.Branch.RegardingUUID []) (equivilantKeysLogFile config key)
|
||||
InfoPresent (LogInfo (serializeKey' equivkey))
|
||||
|
||||
-- This returns Verified when when an equivilant key has been added to the
|
||||
-- log (or was already in the log). This is to avoid hashing the object
|
||||
-- again later.
|
||||
updateEquivilantKeys :: Backend -> OsPath -> Key -> [Key] -> Annex (Maybe Verification)
|
||||
updateEquivilantKeys b obj key eks = generateEquivilantKey b obj >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just ek -> do
|
||||
unless (ek `elem` eks) $
|
||||
setEquivilantKey key ek
|
||||
return (Just Verified)
|
||||
|
||||
addEquivilantKey :: Backend -> Key -> OsPath -> Annex (Maybe Verification)
|
||||
addEquivilantKey b key obj =
|
||||
updateEquivilantKeys b obj key
|
||||
=<< getEquivilantKeys key
|
||||
|
||||
-- The Backend must use a cryptographically secure hash.
|
||||
generateEquivilantKey :: Backend -> OsPath -> Annex (Maybe Key)
|
||||
generateEquivilantKey b obj =
|
||||
case genKey b of
|
||||
Just genkey -> do
|
||||
showSideAction (UnquotedString Backend.Hash.descChecksum)
|
||||
Just <$> genkey source nullMeterUpdate
|
||||
Nothing -> return Nothing
|
||||
where
|
||||
source = KeySource
|
||||
{ keyFilename = mempty -- avoid adding any extension
|
||||
, contentLocation = obj
|
||||
, inodeCache = Nothing
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ runLocal runst runner a = case a of
|
|||
iv <- startVerifyKeyContentIncrementally DefaultVerify k
|
||||
let runtransfer ti =
|
||||
Right <$> transfer download' k af Nothing (\p ->
|
||||
logStatusAfter NoLiveUpdate k $ getViaTmp rsp DefaultVerify k af Nothing $ \tmp ->
|
||||
logStatusAfter NoLiveUpdate k $ getViaTmp rsp DefaultVerify k Nothing $ \tmp ->
|
||||
storefile tmp o l getb iv validitycheck p ti)
|
||||
let fallback = return $ Left $
|
||||
ProtoFailureMessage "transfer already in progress, or unable to take transfer lock"
|
||||
|
|
36
Remote.hs
36
Remote.hs
|
@ -319,22 +319,11 @@ remoteFromUUID u = ifM ((==) u <$> getUUID)
|
|||
remotesChanged
|
||||
findinmap
|
||||
|
||||
{- Filters a list of remotes to ones that have the listed uuids. -}
|
||||
remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
|
||||
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
|
||||
|
||||
{- Filters a list of remotes to ones that do not have the listed uuids. -}
|
||||
remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
|
||||
remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs
|
||||
|
||||
{- List of repository UUIDs that the location log indicates may have a key.
|
||||
- Dead repositories are excluded. -}
|
||||
keyLocations :: Key -> Annex [UUID]
|
||||
keyLocations key = trustExclude DeadTrusted =<< loggedLocations key
|
||||
|
||||
{- Whether to include remotes that have annex-ignore set. -}
|
||||
newtype IncludeIgnored = IncludeIgnored Bool
|
||||
|
||||
{- Cost ordered lists of remotes that the location log indicates
|
||||
- may have a key.
|
||||
-
|
||||
|
@ -342,33 +331,16 @@ newtype IncludeIgnored = IncludeIgnored Bool
|
|||
-}
|
||||
keyPossibilities :: IncludeIgnored -> Key -> Annex [Remote]
|
||||
keyPossibilities ii key = do
|
||||
u <- getUUID
|
||||
-- uuids of all remotes that are recorded to have the key
|
||||
locations <- filter (/= u) <$> keyLocations key
|
||||
speclocations <- map uuid
|
||||
. filter (remoteAnnexSpeculatePresent . gitconfig)
|
||||
<$> remoteList
|
||||
-- there are unlikely to be many speclocations, so building a Set
|
||||
-- is not worth the expense
|
||||
let locations' = speclocations ++ filter (`notElem` speclocations) locations
|
||||
fst <$> remoteLocations ii locations' []
|
||||
locations <- keyLocations key
|
||||
keyPossibilities' ii key locations =<< remoteList
|
||||
|
||||
{- Given a list of locations of a key, and a list of all
|
||||
- trusted repositories, generates a cost-ordered list of
|
||||
- remotes that contain the key, and a list of trusted locations of the key.
|
||||
-}
|
||||
remoteLocations :: IncludeIgnored -> [UUID] -> [UUID] -> Annex ([Remote], [UUID])
|
||||
remoteLocations (IncludeIgnored ii) locations trusted = do
|
||||
let validtrustedlocations = nub locations `intersect` trusted
|
||||
|
||||
-- remotes that match uuids that have the key
|
||||
allremotes <- remoteList
|
||||
>>= if not ii
|
||||
then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig)
|
||||
else return
|
||||
let validremotes = remotesWithUUID allremotes locations
|
||||
|
||||
return (sortBy (comparing cost) validremotes, validtrustedlocations)
|
||||
remoteLocations ii locations trusted =
|
||||
remoteLocations' ii locations trusted =<< remoteList
|
||||
|
||||
{- Displays known locations of a key and helps the user take action
|
||||
- to make them accessible. -}
|
||||
|
|
709
Remote/Compute.hs
Normal file
709
Remote/Compute.hs
Normal file
|
@ -0,0 +1,709 @@
|
|||
{- Compute remote.
|
||||
-
|
||||
- Copyright 2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Remote.Compute (
|
||||
remote,
|
||||
isComputeRemote,
|
||||
ComputeState(..),
|
||||
setComputeState,
|
||||
getComputeState,
|
||||
computeStateUrl,
|
||||
ComputeProgram,
|
||||
getComputeProgram,
|
||||
runComputeProgram,
|
||||
ImmutableState(..),
|
||||
ComputeProgramResult(..),
|
||||
computationBehaviorChangeError,
|
||||
defaultComputeParams,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
import qualified Annex
|
||||
import Types.Remote
|
||||
import Types.ProposedAccepted
|
||||
import Types.MetaData
|
||||
import Types.Creds
|
||||
import Config
|
||||
import Config.Cost
|
||||
import Remote.Helper.Special
|
||||
import Remote.Helper.ExportImport
|
||||
import Remote.List.Util
|
||||
import Annex.SpecialRemote.Config
|
||||
import Annex.UUID
|
||||
import Annex.Content
|
||||
import Annex.Tmp
|
||||
import Annex.GitShaKey
|
||||
import Annex.CatFile
|
||||
import Annex.RepoSize.LiveUpdate
|
||||
import qualified Annex.Transfer
|
||||
import Logs.MetaData
|
||||
import Logs.EquivilantKeys
|
||||
import Logs.Location
|
||||
import Messages.Progress
|
||||
import Utility.Metered
|
||||
import Utility.TimeStamp
|
||||
import Utility.Env
|
||||
import Utility.Tmp.Dir
|
||||
import Utility.Url
|
||||
import Utility.MonotonicClock
|
||||
import Types.Key
|
||||
import Backend
|
||||
import qualified Git
|
||||
import qualified Utility.FileIO as F
|
||||
import qualified Utility.SimpleProtocol as Proto
|
||||
|
||||
import Network.HTTP.Types.URI
|
||||
import Data.Time.Clock
|
||||
import Text.Read
|
||||
import Control.Concurrent.STM
|
||||
import Control.Concurrent.Async
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
|
||||
remote :: RemoteType
|
||||
remote = RemoteType
|
||||
{ typename = "compute"
|
||||
, enumerate = const $ findSpecialRemotes "compute"
|
||||
, generate = gen
|
||||
, configParser = computeConfigParser
|
||||
, setup = setupInstance
|
||||
, exportSupported = exportUnsupported
|
||||
, importSupported = importUnsupported
|
||||
, thirdPartyPopulated = False
|
||||
}
|
||||
|
||||
isComputeRemote :: Remote -> Bool
|
||||
isComputeRemote r = typename (remotetype r) == typename remote
|
||||
|
||||
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
|
||||
gen r u rc gc rs = case getComputeProgram' rc of
|
||||
Left _err -> return Nothing
|
||||
Right program -> do
|
||||
c <- parsedRemoteConfig remote rc
|
||||
cst <- remoteCost gc c veryExpensiveRemoteCost
|
||||
return $ Just $ mk program c cst
|
||||
where
|
||||
mk program c cst = Remote
|
||||
{ uuid = u
|
||||
, cost = cst
|
||||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyUnsupported
|
||||
, retrieveKeyFile = computeKey rs program
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = dropKey rs
|
||||
, lockContent = Nothing
|
||||
, checkPresent = checkKey rs
|
||||
, checkPresentCheap = False
|
||||
, exportActions = exportUnsupported
|
||||
, importActions = importUnsupported
|
||||
, whereisKey = Nothing
|
||||
, remoteFsck = Nothing
|
||||
, repairRepo = Nothing
|
||||
, config = c
|
||||
, gitconfig = gc
|
||||
, localpath = Nothing
|
||||
, getRepo = return r
|
||||
, readonly = True
|
||||
, appendonly = False
|
||||
, untrustworthy = False
|
||||
, availability = pure LocallyAvailable
|
||||
, remotetype = remote
|
||||
, mkUnavailable = return Nothing
|
||||
, getInfo = return []
|
||||
, claimUrl = Nothing
|
||||
, checkUrl = Nothing
|
||||
, remoteStateHandle = rs
|
||||
}
|
||||
|
||||
setupInstance :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
||||
setupInstance ss mu _ c _ = do
|
||||
ComputeProgram program <- either giveup return $ getComputeProgram' c
|
||||
allowedprograms <- maybe [] words . annexAllowedComputePrograms
|
||||
<$> Annex.getGitConfig
|
||||
case ss of
|
||||
Init -> noop
|
||||
_ -> unless (program `elem` allowedprograms) $ do
|
||||
let remotename = fromMaybe "(unknown)" (lookupName c)
|
||||
giveup $ unwords
|
||||
[ "Unable to enable compute special remote"
|
||||
, remotename
|
||||
, "because its compute program"
|
||||
, program
|
||||
, "is not listed in annex.security-allowed-compute-programs"
|
||||
]
|
||||
unlessM (liftIO $ inSearchPath program) $
|
||||
giveup $ "Cannot find " ++ program ++ " in PATH"
|
||||
u <- maybe (liftIO genUUID) return mu
|
||||
gitConfigSpecialRemote u c [("compute", "true")]
|
||||
return (c, u)
|
||||
|
||||
computeConfigParser :: RemoteConfig -> Annex RemoteConfigParser
|
||||
computeConfigParser _ = return $ RemoteConfigParser
|
||||
{ remoteConfigFieldParsers =
|
||||
[ optionalStringParser programField
|
||||
(FieldDesc $ "compute program (must start with \"" ++ safetyPrefix ++ "\")")
|
||||
]
|
||||
-- Pass through all other params, which git-annex addcomputed adds
|
||||
-- to the input params.
|
||||
, remoteConfigRestPassthrough = Just
|
||||
( const True
|
||||
, [("*", FieldDesc "all other parameters are passed to compute program")]
|
||||
)
|
||||
}
|
||||
|
||||
defaultComputeParams :: Remote -> [String]
|
||||
defaultComputeParams = map mk . M.toList . getRemoteConfigPassedThrough . config
|
||||
where
|
||||
mk (f, v) = fromProposedAccepted f ++ '=' : v
|
||||
|
||||
newtype ComputeProgram = ComputeProgram String
|
||||
deriving (Show)
|
||||
|
||||
getComputeProgram :: Remote -> Annex ComputeProgram
|
||||
getComputeProgram r =
|
||||
case getComputeProgram' (unparsedRemoteConfig (config r)) of
|
||||
Right program -> return program
|
||||
Left err -> giveup $
|
||||
"Problem with the configuration of compute remote " ++ name r ++ ": " ++ err
|
||||
|
||||
getComputeProgram' :: RemoteConfig -> Either String ComputeProgram
|
||||
getComputeProgram' c = case fromProposedAccepted <$> M.lookup programField c of
|
||||
Just program
|
||||
| safetyPrefix `isPrefixOf` program ->
|
||||
Right (ComputeProgram program)
|
||||
| otherwise -> Left $
|
||||
"The program's name must begin with \"" ++ safetyPrefix ++ "\""
|
||||
Nothing -> Left "Specify program="
|
||||
|
||||
-- Limiting the program to "git-annex-compute-" prefix is important for
|
||||
-- security, it prevents autoenabled compute remotes from running arbitrary
|
||||
-- programs.
|
||||
safetyPrefix :: String
|
||||
safetyPrefix = "git-annex-compute-"
|
||||
|
||||
programField :: RemoteConfigField
|
||||
programField = Accepted "program"
|
||||
|
||||
data ProcessCommand
|
||||
= ProcessInput FilePath
|
||||
| ProcessOutput FilePath
|
||||
| ProcessReproducible
|
||||
| ProcessProgress PercentFloat
|
||||
deriving (Show, Eq)
|
||||
|
||||
instance Proto.Receivable ProcessCommand where
|
||||
parseCommand "INPUT" = Proto.parse1 ProcessInput
|
||||
parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
|
||||
parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
|
||||
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
|
||||
parseCommand _ = Proto.parseFail
|
||||
|
||||
newtype PercentFloat = PercentFloat Float
|
||||
deriving (Show, Eq)
|
||||
|
||||
instance Proto.Serializable PercentFloat where
|
||||
serialize (PercentFloat p) = show p ++ "%"
|
||||
deserialize s = do
|
||||
s' <- reverse <$> stripPrefix "%" (reverse s)
|
||||
PercentFloat <$> readMaybe s'
|
||||
|
||||
data ComputeState = ComputeState
|
||||
{ computeParams :: [String]
|
||||
, computeInputs :: M.Map OsPath Key
|
||||
, computeOutputs :: M.Map OsPath (Maybe Key)
|
||||
, computeSubdir :: OsPath
|
||||
}
|
||||
deriving (Show, Eq)
|
||||
|
||||
{- Formats a ComputeState as an URL query string.
|
||||
-
|
||||
- Prefixes computeParams with 'p', computeInputs with 'i',
|
||||
- and computeOutputs with 'o'. Uses "d" for computeSubdir.
|
||||
-
|
||||
- When the passed Key is an output, rather than duplicate it
|
||||
- in the query string, that output has no value.
|
||||
-
|
||||
- Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=&d=subdir"
|
||||
-
|
||||
- The computeParams are in the order they were given. The computeInputs
|
||||
- and computeOutputs are sorted in ascending order for stability.
|
||||
-}
|
||||
formatComputeState :: Key -> ComputeState -> B.ByteString
|
||||
formatComputeState k = formatComputeState' (Just k)
|
||||
|
||||
formatComputeState' :: Maybe Key -> ComputeState -> B.ByteString
|
||||
formatComputeState' mk st = renderQuery False $ concat
|
||||
[ map formatparam (computeParams st)
|
||||
, map formatinput (M.toAscList (computeInputs st))
|
||||
, mapMaybe formatoutput (M.toAscList (computeOutputs st))
|
||||
, [("d", Just (fromOsPath (computeSubdir st)))]
|
||||
]
|
||||
where
|
||||
formatparam p = ("p" <> encodeBS p, Nothing)
|
||||
formatinput (file, key) =
|
||||
("i" <> fromOsPath file, Just (serializeKey' key))
|
||||
formatoutput (file, (Just key)) = Just $
|
||||
("o" <> fromOsPath file,
|
||||
if Just key == mk
|
||||
then Nothing
|
||||
else Just (serializeKey' key)
|
||||
)
|
||||
formatoutput (_, Nothing) = Nothing
|
||||
|
||||
parseComputeState :: Key -> B.ByteString -> Maybe ComputeState
|
||||
parseComputeState k b =
|
||||
let st = go emptycomputestate (parseQuery b)
|
||||
in if st == emptycomputestate then Nothing else Just st
|
||||
where
|
||||
emptycomputestate = ComputeState
|
||||
{ computeParams = mempty
|
||||
, computeInputs = mempty
|
||||
, computeOutputs = mempty
|
||||
, computeSubdir = literalOsPath "."
|
||||
}
|
||||
|
||||
go :: ComputeState -> [QueryItem] -> ComputeState
|
||||
go c [] = c { computeParams = reverse (computeParams c) }
|
||||
go c ((f, v):rest) =
|
||||
let c' = fromMaybe c $ case decodeBS f of
|
||||
('p':p) -> Just $ c
|
||||
{ computeParams = p : computeParams c
|
||||
}
|
||||
('i':i) -> do
|
||||
key <- deserializeKey' =<< v
|
||||
Just $ c
|
||||
{ computeInputs =
|
||||
M.insert (toOsPath i) key
|
||||
(computeInputs c)
|
||||
}
|
||||
('o':o) -> case v of
|
||||
Just kv -> do
|
||||
key <- deserializeKey' kv
|
||||
Just $ c
|
||||
{ computeOutputs =
|
||||
M.insert (toOsPath o)
|
||||
(Just key)
|
||||
(computeOutputs c)
|
||||
}
|
||||
Nothing -> Just $ c
|
||||
{ computeOutputs =
|
||||
M.insert (toOsPath o)
|
||||
(Just k)
|
||||
(computeOutputs c)
|
||||
}
|
||||
('d':[]) -> do
|
||||
subdir <- v
|
||||
Just $ c
|
||||
{ computeSubdir = toOsPath subdir
|
||||
}
|
||||
_ -> Nothing
|
||||
in go c' rest
|
||||
|
||||
{- A compute: url for a given output file of a computation. -}
|
||||
computeStateUrl :: Remote -> ComputeState -> OsPath -> URLString
|
||||
computeStateUrl r st p =
|
||||
"annex-compute:" ++ fromUUID (uuid r) ++ "/" ++ fromOsPath p ++ "?"
|
||||
++ decodeBS (formatComputeState' Nothing st')
|
||||
where
|
||||
-- Omit computeOutputs, so this gives the same result whether
|
||||
-- it's called on a ComputeState with the computeOutputs
|
||||
-- Keys populated or not.
|
||||
st' = st { computeOutputs = mempty }
|
||||
|
||||
{- The per remote metadata is used to store ComputeState. This allows
|
||||
- recording multiple ComputeStates that generate the same key.
|
||||
-
|
||||
- The metadata fields are numbers (prefixed with "t" to make them legal
|
||||
- field names), which are estimates of how long it might take to run
|
||||
- the computation (in seconds).
|
||||
-
|
||||
- Avoids redundantly recording a ComputeState when the per remote metadata
|
||||
- already contains it.
|
||||
-}
|
||||
setComputeState :: RemoteStateHandle -> Key -> NominalDiffTime -> ComputeState -> Annex ()
|
||||
setComputeState rs k ts st = do
|
||||
l <- map snd <$> getComputeStatesUnsorted rs k
|
||||
unless (st `elem` l) go
|
||||
where
|
||||
go = addRemoteMetaData k rs $ MetaData $ M.singleton
|
||||
(mkMetaFieldUnchecked $ T.pack ('t':show (truncateResolution 1 ts)))
|
||||
(S.singleton (MetaValue (CurrentlySet True) (formatComputeState k st)))
|
||||
|
||||
{- When multiple ComputeStates have been recorded for the same key,
|
||||
- this returns one that is probably less expensive to compute,
|
||||
- based on the original time it took to compute it. -}
|
||||
getComputeState :: RemoteStateHandle -> Key -> Annex (Maybe ComputeState)
|
||||
getComputeState rs k = headMaybe . map snd . sortOn fst
|
||||
<$> getComputeStatesUnsorted rs k
|
||||
|
||||
getComputeStatesUnsorted :: RemoteStateHandle -> Key -> Annex [(NominalDiffTime, ComputeState)]
|
||||
getComputeStatesUnsorted rs k = do
|
||||
RemoteMetaData _ (MetaData m) <- getCurrentRemoteMetaData rs k
|
||||
return $ go [] (M.toList m)
|
||||
where
|
||||
go c [] = concat c
|
||||
go c ((f, s) : rest) =
|
||||
let sts = mapMaybe (parseComputeState k . fromMetaValue)
|
||||
(S.toList s)
|
||||
in case parsePOSIXTime (T.encodeUtf8 (T.drop 1 (fromMetaField f))) of
|
||||
Just ts -> go (zip (repeat ts) sts : c) rest
|
||||
Nothing -> go c rest
|
||||
|
||||
computeProgramEnvironment :: ComputeState -> Annex [(String, String)]
|
||||
computeProgramEnvironment st = do
|
||||
environ <- filter (caninherit . fst) <$> liftIO getEnvironment
|
||||
let addenv = mapMaybe go (computeParams st)
|
||||
return $ environ ++ addenv
|
||||
where
|
||||
envprefix = "ANNEX_COMPUTE_"
|
||||
caninherit v = not (envprefix `isPrefixOf` v)
|
||||
go p
|
||||
| '=' `elem` p =
|
||||
let (f, v) = separate (== '=') p
|
||||
in Just (envprefix ++ f, v)
|
||||
| otherwise = Nothing
|
||||
|
||||
newtype ImmutableState = ImmutableState Bool
|
||||
|
||||
data ComputeProgramResult = ComputeProgramResult
|
||||
{ computeState :: ComputeState
|
||||
, computeInputsUnavailable :: Bool
|
||||
, computeReproducible :: Bool
|
||||
}
|
||||
|
||||
runComputeProgram
|
||||
:: ComputeProgram
|
||||
-> ComputeState
|
||||
-> ImmutableState
|
||||
-> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
|
||||
-- ^ get input file's content, or Nothing the input file's
|
||||
-- content is not available
|
||||
-> Maybe (Key, MeterUpdate)
|
||||
-- ^ update meter for this key
|
||||
-> (ComputeProgramResult -> OsPath -> NominalDiffTime -> Annex v)
|
||||
-> Annex v
|
||||
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent meterkey cont =
|
||||
withOtherTmp $ \othertmpdir ->
|
||||
withTmpDirIn othertmpdir (literalOsPath "compute") go
|
||||
where
|
||||
go tmpdir = do
|
||||
environ <- computeProgramEnvironment state
|
||||
subdir <- liftIO $ getsubdir tmpdir
|
||||
let pr = (proc program (computeParams state))
|
||||
{ cwd = Just (fromOsPath subdir)
|
||||
, std_in = CreatePipe
|
||||
, std_out = CreatePipe
|
||||
, env = Just environ
|
||||
}
|
||||
showOutput
|
||||
starttime <- liftIO currentMonotonicTimestamp
|
||||
let startresult = ComputeProgramResult state False False
|
||||
result <- withmeterfile $ \meterfile -> bracket
|
||||
(liftIO $ createProcess pr)
|
||||
(liftIO . cleanupProcess)
|
||||
(getinput tmpdir subdir startresult meterfile)
|
||||
endtime <- liftIO currentMonotonicTimestamp
|
||||
cont result subdir (calcduration starttime endtime)
|
||||
|
||||
getsubdir tmpdir = do
|
||||
let subdir = tmpdir </> computeSubdir state
|
||||
ifM (dirContains <$> absPath tmpdir <*> absPath subdir)
|
||||
( do
|
||||
createDirectoryIfMissing True subdir
|
||||
return subdir
|
||||
-- Ignore unsafe value in state.
|
||||
, return tmpdir
|
||||
)
|
||||
|
||||
getinput tmpdir subdir result meterfile p =
|
||||
liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case
|
||||
Just l
|
||||
| null l -> getinput tmpdir subdir result meterfile p
|
||||
| otherwise -> do
|
||||
result' <- parseoutput p tmpdir subdir result meterfile l
|
||||
getinput tmpdir subdir result' meterfile p
|
||||
Nothing -> do
|
||||
liftIO $ hClose (stdoutHandle p)
|
||||
liftIO $ hClose (stdinHandle p)
|
||||
unlessM (liftIO $ checkSuccessProcess (processHandle p)) $
|
||||
giveup $ program ++ " exited unsuccessfully"
|
||||
return result
|
||||
|
||||
parseoutput p tmpdir subdir result meterfile l = case Proto.parseMessage l of
|
||||
Just (ProcessInput f) -> do
|
||||
let f' = toOsPath f
|
||||
let knowninput = M.member f'
|
||||
(computeInputs (computeState result))
|
||||
checksafefile tmpdir subdir f' "input"
|
||||
checkimmutable knowninput "inputting" f' $ do
|
||||
(k, inputcontent) <- getinputcontent f'
|
||||
mp <- case inputcontent of
|
||||
Nothing -> pure Nothing
|
||||
Just (Right f'') -> liftIO $
|
||||
Just <$> relPathDirToFile subdir f''
|
||||
Just (Left gitsha) ->
|
||||
Just <$> (liftIO . relPathDirToFile subdir
|
||||
=<< populategitsha gitsha tmpdir)
|
||||
liftIO $ hPutStrLn (stdinHandle p) $
|
||||
maybe "" fromOsPath mp
|
||||
liftIO $ hFlush (stdinHandle p)
|
||||
let result' = result
|
||||
{ computeInputsUnavailable =
|
||||
isNothing mp || computeInputsUnavailable result
|
||||
}
|
||||
return $ if immutablestate
|
||||
then result'
|
||||
else modresultstate result' $ \s -> s
|
||||
{ computeInputs =
|
||||
M.insert f' k
|
||||
(computeInputs s)
|
||||
}
|
||||
Just (ProcessOutput f) -> do
|
||||
let f' = toOsPath f
|
||||
checksafefile tmpdir subdir f' "output"
|
||||
knownoutput <- case M.lookup f' (computeOutputs $ computeState result) of
|
||||
Nothing -> return False
|
||||
Just mk -> do
|
||||
when (mk == fmap fst meterkey) $
|
||||
meterfile (subdir </> f')
|
||||
return True
|
||||
checkimmutable knownoutput "outputting" f' $
|
||||
return $ if immutablestate
|
||||
then result
|
||||
else modresultstate result $ \s -> s
|
||||
{ computeOutputs =
|
||||
M.insert f' Nothing
|
||||
(computeOutputs s)
|
||||
}
|
||||
Just (ProcessProgress percent) -> do
|
||||
liftIO $ updatepercent percent
|
||||
return result
|
||||
Just ProcessReproducible ->
|
||||
return $ result { computeReproducible = True }
|
||||
Nothing -> giveup $
|
||||
program ++ " output an unparseable line: \"" ++ l ++ "\""
|
||||
|
||||
modresultstate result f =
|
||||
result { computeState = f (computeState result) }
|
||||
|
||||
checksafefile tmpdir subdir f fileaction = do
|
||||
let err problem = giveup $
|
||||
program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f
|
||||
unlessM (liftIO $ dirContains <$> absPath tmpdir <*> absPath (subdir </> f)) $
|
||||
err "outside the git repository"
|
||||
when (any (\p -> dropTrailingPathSeparator p == literalOsPath ".git") (splitPath f)) $
|
||||
err "inside the .git directory"
|
||||
|
||||
checkimmutable True _ _ a = a
|
||||
checkimmutable False requestdesc p a
|
||||
| not immutablestate = a
|
||||
| otherwise = computationBehaviorChangeError (ComputeProgram program) requestdesc p
|
||||
|
||||
calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) =
|
||||
fromIntegral (endtime - starttime) :: NominalDiffTime
|
||||
|
||||
-- Writes to a .git/objects/ file in the tmpdir, rather than
|
||||
-- using the input filename, to avoid exposing the input filename
|
||||
-- to the program as a parameter, which could parse it as a dashed
|
||||
-- option or other special parameter.
|
||||
populategitsha gitsha tmpdir = do
|
||||
let f = tmpdir </> literalOsPath ".git" </> literalOsPath "objects"
|
||||
</> toOsPath (Git.fromRef' gitsha)
|
||||
liftIO $ createDirectoryIfMissing True $ takeDirectory f
|
||||
liftIO . F.writeFile f =<< catObject gitsha
|
||||
return f
|
||||
|
||||
withmeterfile a = case meterkey of
|
||||
Nothing -> a (const noop)
|
||||
Just (_, progress) -> do
|
||||
filev <- liftIO newEmptyTMVarIO
|
||||
endv <- liftIO $ newEmptyTMVarIO
|
||||
let meterfile = void . liftIO
|
||||
. atomically . tryPutTMVar filev
|
||||
let endmeterfile = atomically $ putTMVar endv ()
|
||||
tid <- liftIO $ async $ do
|
||||
v <- liftIO $ atomically $
|
||||
(Right <$> takeTMVar filev)
|
||||
`orElse`
|
||||
(Left <$> takeTMVar endv)
|
||||
case v of
|
||||
Right f -> watchFileSize f progress $ \_ ->
|
||||
void $ liftIO $ atomically $
|
||||
takeTMVar endv
|
||||
Left () -> return ()
|
||||
a meterfile
|
||||
`finally` liftIO (endmeterfile >> wait tid)
|
||||
|
||||
updatepercent (PercentFloat percent) = case meterkey of
|
||||
Nothing -> noop
|
||||
Just (k, progress) -> case fromKey keySize k of
|
||||
Nothing -> noop
|
||||
Just sz ->
|
||||
progress $ BytesProcessed $ floor $
|
||||
fromIntegral sz * percent / 100
|
||||
|
||||
computationBehaviorChangeError :: ComputeProgram -> String -> OsPath -> Annex a
|
||||
computationBehaviorChangeError (ComputeProgram program) requestdesc p =
|
||||
giveup $ program ++ " is not behaving the same way it used to, now " ++ requestdesc ++ ": " ++ fromOsPath p
|
||||
|
||||
computeKey :: RemoteStateHandle -> ComputeProgram -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
||||
computeKey rs (ComputeProgram program) k _af dest meterupdate vc =
|
||||
getComputeState rs k >>= \case
|
||||
Just state ->
|
||||
case computeskey state of
|
||||
Just keyfile -> go state keyfile
|
||||
Nothing -> missingstate
|
||||
Nothing -> missingstate
|
||||
where
|
||||
missingstate = giveup "Missing compute state"
|
||||
|
||||
go state keyfile = metered (Just meterupdate) k Nothing $ \_ p ->
|
||||
runComputeProgram (ComputeProgram program) state
|
||||
(ImmutableState True)
|
||||
(getinputcontent state)
|
||||
(Just (k, p))
|
||||
(postcompute keyfile)
|
||||
|
||||
getinputcontent state f =
|
||||
case M.lookup f (computeInputs state) of
|
||||
Just inputkey -> case keyGitSha inputkey of
|
||||
Nothing ->
|
||||
let retkey = do
|
||||
obj <- calcRepo (gitAnnexLocation inputkey)
|
||||
return (inputkey, Just (Right obj))
|
||||
in ifM (inAnnex inputkey)
|
||||
( retkey
|
||||
, ifM (getinputcontent' f inputkey)
|
||||
( retkey
|
||||
, return (inputkey, Nothing)
|
||||
)
|
||||
)
|
||||
Just gitsha ->
|
||||
return (inputkey, Just (Left gitsha))
|
||||
Nothing -> error "internal"
|
||||
|
||||
getinputcontent' f inputkey = do
|
||||
remotes <- avoidCycles [k] inputkey
|
||||
=<< keyPossibilities inputkey
|
||||
anyM (getinputcontentfrom f inputkey) remotes
|
||||
|
||||
getinputcontentfrom f inputkey r = do
|
||||
showAction $ "getting input " <> QuotedPath f
|
||||
<> " from " <> UnquotedString (name r)
|
||||
lu <- prepareLiveUpdate Nothing inputkey AddingKey
|
||||
logStatusAfter lu inputkey $
|
||||
Annex.Transfer.download r inputkey (AssociatedFile (Just f))
|
||||
Annex.Transfer.stdRetry Annex.Transfer.noNotification
|
||||
|
||||
computeskey state =
|
||||
case M.keys $ M.filter (== Just k) (computeOutputs state) of
|
||||
(keyfile : _) -> Just keyfile
|
||||
[] -> Nothing
|
||||
|
||||
postcompute keyfile result tmpdir _ts
|
||||
| computeInputsUnavailable result =
|
||||
giveup "Input file(s) unavailable."
|
||||
| otherwise =
|
||||
postcompute' keyfile (computeState result) tmpdir
|
||||
|
||||
postcompute' keyfile state tmpdir = do
|
||||
hb <- hashBackend
|
||||
let updatevurl key getobj =
|
||||
if (fromKey keyVariety key == VURLKey)
|
||||
then addEquivilantKey hb key =<< getobj
|
||||
else return Nothing
|
||||
|
||||
let keyfile' = tmpdir </> keyfile
|
||||
unlessM (liftIO $ doesFileExist keyfile') $
|
||||
giveup $ program ++ " exited sucessfully, but failed to write the computed file"
|
||||
catchNonAsync (liftIO $ moveFile keyfile' dest)
|
||||
(\err -> giveup $ "failed to move the computed file: " ++ show err)
|
||||
mverification <- updatevurl k (pure dest)
|
||||
|
||||
-- Try to move any other computed object files into the annex.
|
||||
forM_ (M.toList $ computeOutputs state) $ \case
|
||||
(file, (Just key)) ->
|
||||
when (k /= key) $ do
|
||||
let file' = tmpdir </> file
|
||||
whenM (liftIO $ doesFileExist file') $ do
|
||||
whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc MustVerify key file') $ do
|
||||
moved <- moveAnnex key file' `catchNonAsync` const (pure False)
|
||||
when moved $
|
||||
void $ updatevurl key (calcRepo (gitAnnexLocation key))
|
||||
_ -> noop
|
||||
|
||||
-- The program might not be reproducible,
|
||||
-- so require strong verification.
|
||||
return $ fromMaybe MustVerify mverification
|
||||
|
||||
keyPossibilities :: Key -> Annex [Remote]
|
||||
keyPossibilities key = do
|
||||
remotelist <- Annex.getState Annex.remotes
|
||||
locs <- loggedLocations key
|
||||
keyPossibilities' (IncludeIgnored False) key locs remotelist
|
||||
|
||||
{- Filter out any remotes that, in order to compute the inputkey, would
|
||||
- need to get the outputkey from some remote.
|
||||
-
|
||||
- This only finds cycles of compute special remotes, not any other
|
||||
- similar type of special remote that might have its own input keys.
|
||||
- There are no other such special remotes in git-annex itself, so this
|
||||
- is the best that can be done.
|
||||
-
|
||||
- Note that, in a case where a compute special remote needs the outputkey
|
||||
- to compute the inputkey, but could get the outputkey from either this
|
||||
- remote, or some other, non-compute remote, that is filtered out as a
|
||||
- cycle because it's not possible to prevent that remote getting from this
|
||||
- remote.
|
||||
-}
|
||||
avoidCycles :: [Key] -> Key -> [Remote] -> Annex [Remote]
|
||||
avoidCycles outputkeys inputkey = filterM go
|
||||
where
|
||||
go r
|
||||
| iscomputeremote r =
|
||||
getComputeState (remoteStateHandle r) inputkey >>= \case
|
||||
Nothing -> return True
|
||||
Just state
|
||||
| inputsoutput state -> return False
|
||||
| otherwise -> checkdeeper state
|
||||
| otherwise = return True
|
||||
|
||||
iscomputeremote r = remotetype r == remote
|
||||
|
||||
inputsoutput state = not $ M.null $
|
||||
M.filter (`elem` outputkeys)
|
||||
(computeInputs state)
|
||||
|
||||
checkdeeper state =
|
||||
flip allM (M.elems (computeInputs state)) $ \inputkey' -> do
|
||||
rs <- keyPossibilities inputkey'
|
||||
rs' <- avoidCycles (inputkey:outputkeys) inputkey' rs
|
||||
return (rs' == rs)
|
||||
|
||||
-- Make sure that the compute state exists.
|
||||
checkKey :: RemoteStateHandle -> Key -> Annex Bool
|
||||
checkKey rs k = do
|
||||
states <- getComputeStatesUnsorted rs k
|
||||
if null states
|
||||
then giveup "Missing compute state"
|
||||
else return True
|
||||
|
||||
-- Unsetting the compute state will prevent computing the key.
|
||||
dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex ()
|
||||
dropKey rs _ k = do
|
||||
RemoteMetaData _ old <- getCurrentRemoteMetaData rs k
|
||||
addRemoteMetaData k rs (modMeta old DelAllMeta)
|
||||
|
||||
storeKeyUnsupported :: Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
|
||||
storeKeyUnsupported _ _ _ _ = giveup "transfer to compute remote not supported; use git-annex addcomputed instead"
|
|
@ -682,7 +682,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
|
|||
let checksuccess = liftIO checkio >>= \case
|
||||
Just err -> giveup err
|
||||
Nothing -> return True
|
||||
logStatusAfter NoLiveUpdate key $ Annex.Content.getViaTmp rsp verify key af (Just sz) $ \dest ->
|
||||
logStatusAfter NoLiveUpdate key $ Annex.Content.getViaTmp rsp verify key (Just sz) $ \dest ->
|
||||
metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' ->
|
||||
copier object dest key p' checksuccess verify
|
||||
)
|
||||
|
|
|
@ -40,6 +40,7 @@ import qualified Remote.Borg
|
|||
import qualified Remote.Rclone
|
||||
import qualified Remote.Hook
|
||||
import qualified Remote.External
|
||||
import qualified Remote.Compute
|
||||
|
||||
remoteTypes :: [RemoteType]
|
||||
remoteTypes = map adjustExportImportRemoteType
|
||||
|
@ -63,6 +64,7 @@ remoteTypes = map adjustExportImportRemoteType
|
|||
, Remote.Rclone.remote
|
||||
, Remote.Hook.remote
|
||||
, Remote.External.remote
|
||||
, Remote.Compute.remote
|
||||
]
|
||||
|
||||
{- Builds a list of all Remotes.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex remote list utils
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -10,6 +10,11 @@ module Remote.List.Util where
|
|||
import Annex.Common
|
||||
import qualified Annex
|
||||
import qualified Git.Config
|
||||
import Annex.UUID
|
||||
import Types.Remote
|
||||
import Config.DynamicConfig
|
||||
|
||||
import Data.Ord
|
||||
|
||||
{- Call when remotes have changed. Re-reads the git config, and
|
||||
- invalidates the cache so the remoteList will be re-generated next time
|
||||
|
@ -22,3 +27,44 @@ remotesChanged = do
|
|||
, Annex.gitremotes = Nothing
|
||||
, Annex.repo = newg
|
||||
}
|
||||
|
||||
{- Whether to include remotes that have annex-ignore set. -}
|
||||
newtype IncludeIgnored = IncludeIgnored Bool
|
||||
|
||||
keyPossibilities'
|
||||
:: IncludeIgnored
|
||||
-> Key
|
||||
-> [UUID]
|
||||
-- ^ uuids of remotes that are recorded to have the key
|
||||
-> [Remote]
|
||||
-- ^ all remotes
|
||||
-> Annex [Remote]
|
||||
keyPossibilities' ii _key remotelocations rs = do
|
||||
u <- getUUID
|
||||
let locations = filter (/= u) remotelocations
|
||||
let speclocations = map uuid
|
||||
$ filter (remoteAnnexSpeculatePresent . gitconfig) rs
|
||||
-- there are unlikely to be many speclocations, so building a Set
|
||||
-- is not worth the expense
|
||||
let locations' = speclocations ++ filter (`notElem` speclocations) locations
|
||||
fst <$> remoteLocations' ii locations' [] rs
|
||||
|
||||
remoteLocations' :: IncludeIgnored -> [UUID] -> [UUID] -> [Remote] -> Annex ([Remote], [UUID])
|
||||
remoteLocations' (IncludeIgnored ii) locations trusted rs = do
|
||||
let validtrustedlocations = nub locations `intersect` trusted
|
||||
|
||||
-- remotes that match uuids that have the key
|
||||
allremotes <- if not ii
|
||||
then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig) rs
|
||||
else return rs
|
||||
let validremotes = remotesWithUUID allremotes locations
|
||||
|
||||
return (sortBy (comparing cost) validremotes, validtrustedlocations)
|
||||
|
||||
{- Filters a list of remotes to ones that have the listed uuids. -}
|
||||
remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
|
||||
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
|
||||
|
||||
{- Filters a list of remotes to ones that do not have the listed uuids. -}
|
||||
remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
|
||||
remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs
|
||||
|
|
|
@ -30,7 +30,6 @@ import Annex.SpecialRemote.Config
|
|||
import Logs.Remote
|
||||
import Logs.EquivilantKeys
|
||||
import Backend
|
||||
import Backend.VURL.Utilities (generateEquivilantKey)
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
|
@ -169,18 +168,8 @@ downloadKey urlincludeexclude key _af dest p vc =
|
|||
| otherwise = return (Just v)
|
||||
|
||||
recordvurlkey eks = do
|
||||
-- Make sure to pick a backend that is cryptographically
|
||||
-- secure.
|
||||
db <- defaultBackend
|
||||
let b = if isCryptographicallySecure db
|
||||
then db
|
||||
else defaultHashBackend
|
||||
generateEquivilantKey b dest >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just ek -> do
|
||||
unless (ek `elem` eks) $
|
||||
setEquivilantKey key ek
|
||||
return (Just Verified)
|
||||
b <- hashBackend
|
||||
updateEquivilantKeys b dest key eks
|
||||
|
||||
uploadKey :: Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
|
||||
uploadKey _ _ _ _ = giveup "upload to web not supported"
|
||||
|
|
|
@ -146,6 +146,7 @@ data GitConfig = GitConfig
|
|||
, annexAllowedUrlSchemes :: S.Set Scheme
|
||||
, annexAllowedIPAddresses :: String
|
||||
, annexAllowUnverifiedDownloads :: Bool
|
||||
, annexAllowedComputePrograms :: Maybe String
|
||||
, annexMaxExtensionLength :: Maybe Int
|
||||
, annexMaxExtensions :: Maybe Int
|
||||
, annexJobs :: Concurrency
|
||||
|
@ -261,6 +262,8 @@ extractGitConfig configsource r = GitConfig
|
|||
getmaybe (annexConfig "security.allowed-http-addresses") -- old name
|
||||
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
||||
getmaybe (annexConfig "security.allow-unverified-downloads")
|
||||
, annexAllowedComputePrograms =
|
||||
getmaybe (annexConfig "security.allowed-compute-programs")
|
||||
, annexMaxExtensionLength = getmayberead (annexConfig "maxextensionlength")
|
||||
, annexMaxExtensions = getmayberead (annexConfig "maxextensions")
|
||||
, annexJobs = fromMaybe NonConcurrent $
|
||||
|
|
|
@ -25,8 +25,7 @@ upgrade = do
|
|||
olddir <- fromRepo gitAnnexDir
|
||||
keys <- getKeysPresent0 olddir
|
||||
forM_ keys $ \k ->
|
||||
moveAnnex k (AssociatedFile Nothing)
|
||||
(olddir </> toOsPath (keyFile0 k))
|
||||
moveAnnex k (olddir </> toOsPath (keyFile0 k))
|
||||
|
||||
-- update the symlinks to the key files
|
||||
-- No longer needed here; V1.upgrade does the same thing
|
||||
|
|
|
@ -85,7 +85,7 @@ moveContent = do
|
|||
let d = parentDir f
|
||||
liftIO $ allowWrite d
|
||||
liftIO $ allowWrite f
|
||||
_ <- moveAnnex k (AssociatedFile Nothing) f
|
||||
_ <- moveAnnex k f
|
||||
liftIO $ removeDirectory d
|
||||
|
||||
updateSymlinks :: Annex ()
|
||||
|
|
|
@ -23,14 +23,17 @@ that is in the form "foo=bar" will also result in an environment variable
|
|||
being set, eg `ANNEX_COMPUTE_passes=10` or `ANNEX_COMPUTE_--level=9`.
|
||||
|
||||
For security, the program should avoid exposing user input to the shell
|
||||
unprotected, or otherwise executing it.
|
||||
unprotected, or otherwise executing it. And when running a command, make
|
||||
sure that whatever user input is passed to it can result in only safe and
|
||||
expected behavior.
|
||||
|
||||
The program is run in a temporary directory, which will be cleaned up after
|
||||
it exits.
|
||||
it exits. Note that it may be run in a subdirectory of a temporary
|
||||
directory. This is done when `git-annex addcomputed` was run in a subdirectory
|
||||
of the git repository.
|
||||
|
||||
The content of any annexed file in the repository can be an input
|
||||
to the computation. The program requests an input by writing a line to
|
||||
stdout:
|
||||
The content of any file in the repository can be an input to the
|
||||
computation. The program requests an input by writing a line to stdout:
|
||||
|
||||
INPUT file.raw
|
||||
|
||||
|
@ -38,8 +41,8 @@ Then it can read a line from stdin, which will be the path to the content
|
|||
(eg a `.git/annex/objects/` path).
|
||||
|
||||
If the program needs multiple input files, it should output multiple
|
||||
`INPUT` lines at once, and then read multiple paths from stdin. This
|
||||
allows retrival of the inputs to potentially run in parallel.
|
||||
`INPUT` lines first, and then read multiple paths from stdin. This
|
||||
allows retrieval of the inputs to potentially run in parallel.
|
||||
|
||||
If an input file is not available, the program's stdin will be closed
|
||||
without a path being written to it. So when reading from stdin fails,
|
||||
|
@ -90,16 +93,17 @@ An example `git-annex-compute-foo` shell script follows:
|
|||
#!/bin/sh
|
||||
set -e
|
||||
if [ "$1" != "convert" ]; then
|
||||
echo "Usage: convert input output [passes=n]" >&2
|
||||
exit 1
|
||||
echo "Usage: convert input output [passes=n]" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "$ANNEX_COMPUTE_passes" ];
|
||||
ANNEX_COMPUTE_passes=1
|
||||
if [ -z "$ANNEX_COMPUTE_passes" ]; then
|
||||
ANNEX_COMPUTE_passes=1
|
||||
fi
|
||||
echo "INPUT "$2"
|
||||
echo "INPUT $2"
|
||||
read input
|
||||
echo "OUTPUT $3"
|
||||
echo REPRODUCIBLE
|
||||
if [ -n "$input" ]; then
|
||||
frobnicate --passes="$ANNEX_COMPUTE_passes" <"$input" >"$3"
|
||||
mkdir -p "$(dirname "$3")"
|
||||
frobnicate --passes="$ANNEX_COMPUTE_passes" <"$input" >"$3"
|
||||
fi
|
||||
|
|
103
doc/git-annex-addcomputed.mdwn
Normal file
103
doc/git-annex-addcomputed.mdwn
Normal file
|
@ -0,0 +1,103 @@
|
|||
# NAME
|
||||
|
||||
git-annex addcomputed - adds computed files to the repository
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
git annex addcomputed `--to=remote -- ...`
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
Adds files to the annex that are computed from input files in the
|
||||
repository, using a compute special remote.
|
||||
|
||||
Once a file has been added to a compute remote, commands
|
||||
like `git-annex get` will use it to compute the content of the file.
|
||||
|
||||
The syntax of this command after the `--` is up to the program that
|
||||
the compute special remote is set up to run to perform the comuptation.
|
||||
|
||||
To see the program's usage, you can run:
|
||||
|
||||
git-annex addcomputed --to=foo
|
||||
|
||||
Generally you will provide an input file (or files), and often also an
|
||||
output filename, and additional parameters to control the computation.
|
||||
|
||||
There can be more than one input file that are combined to compute an
|
||||
output file. And multiple output files can be computed at the same time.
|
||||
|
||||
Some examples of how this might look:
|
||||
|
||||
git-annex addcomputed --to=x -- convert file.raw file.jpeg passes=10
|
||||
git-annex addcomputed --to=y -- compress foo --level=9
|
||||
git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz
|
||||
|
||||
Note that parameters that were passed to `git-annex initremote`
|
||||
when setting up the compute special remote will be appended to the end of
|
||||
the parameters provided to `git-annex addcomputed`.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* `--to=remote`
|
||||
|
||||
Specify which remote will compute the files.
|
||||
|
||||
This must be a compute remote. For example, one can be
|
||||
initialized with:
|
||||
|
||||
git-annex initremote photoconv type=compute \
|
||||
program=git-annex-compute-photoconv
|
||||
|
||||
For details about compute remotes, and a list of some
|
||||
of the programs that are available, see
|
||||
<https://git-annex.branchable.com/special_remotes/compute/>
|
||||
|
||||
* `--fast`
|
||||
|
||||
Adds computed files to the repository, without doing any work yet to
|
||||
compute their content.
|
||||
|
||||
This implies `--unreproducible`, because even if the compute remote
|
||||
produces reproducible output, it's not available.
|
||||
|
||||
* `--unreproducible`, `-u`
|
||||
|
||||
Indicate that the computation is not expected to be fully reproducible.
|
||||
It can vary, in ways that produce files that equivilant enough to
|
||||
be interchangeable, but are not necessarily identical.
|
||||
|
||||
This is the default unless the compute remote indicates that it produces
|
||||
reproducible output.
|
||||
|
||||
* `--reproducible`, `-r`
|
||||
|
||||
Indicate that the computation is expected to be fully reproducible.
|
||||
|
||||
This is the default when the compute remote indicates that it produces
|
||||
reproducible output (except when using `--fast`).
|
||||
|
||||
If a computation turns out not to be fully reproducible, then getting
|
||||
a computed file from the compute remote will later fail with a
|
||||
checksum verification error. One thing that can be done then is to use
|
||||
`git-annex recompute --original --unreproducible`.
|
||||
|
||||
* `--backend`
|
||||
|
||||
Specifies which key-value backend to use.
|
||||
|
||||
* Also the [[git-annex-common-options]](1) can be used.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
||||
[[git-annex-recompute]](1)
|
||||
|
||||
[[git-annex-initremote]](1)
|
||||
|
||||
# AUTHOR
|
||||
|
||||
Joey Hess <id@joeyh.name>
|
||||
|
||||
Warning: Automatically converted into a man page by mdwn2man. Edit with care.
|
|
@ -52,7 +52,7 @@ want to use `git annex renameremote`.
|
|||
|
||||
git annex initremote mys3 type=S3 --whatelse
|
||||
|
||||
For a machine-readable list of the parameters, use this with --json.
|
||||
For a machine-readable list of the parameters, use this with `--json`.
|
||||
|
||||
* `--fast`
|
||||
|
||||
|
|
69
doc/git-annex-recompute.mdwn
Normal file
69
doc/git-annex-recompute.mdwn
Normal file
|
@ -0,0 +1,69 @@
|
|||
# NAME
|
||||
|
||||
git-annex recompute - recompute computed files
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
git-annex recompute [path ...]`
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
This updates computed files that were added with
|
||||
[[git-annex-addcomputed]](1).
|
||||
|
||||
By default, this only recomputes files whose input files have changed.
|
||||
The new contents of the input files are used to re-run the computation.
|
||||
|
||||
When the output of the computation is different, the computed file is
|
||||
updated with the new content. The updated file is written to the worktree,
|
||||
but is not staged, in order to avoid overwriting any staged changes.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* `--original`
|
||||
|
||||
Re-run the computation with the original input files.
|
||||
|
||||
* `--remote=name`
|
||||
|
||||
Only recompute files that were computed by this compute remote.
|
||||
|
||||
When this option is not used, all computed files are recomputed using
|
||||
whatever compute remote was originally used to add them. (In cases where
|
||||
a file can be computed by multiple remotes, the one with the lowest
|
||||
configured cost is used.)
|
||||
|
||||
* `--unreproducible`, `-u`
|
||||
|
||||
Indicate that the computation is not expected to be fully reproducible.
|
||||
It can vary, in ways that produce files that equivilant enough to
|
||||
be interchangeable, but are not necessarily identical.
|
||||
|
||||
This is the default unless the compute remote indicates that it produces
|
||||
reproducible output.
|
||||
|
||||
* `--reproducible`, `-r`
|
||||
|
||||
Indicate that the computation is expected to be fully reproducible.
|
||||
|
||||
This is the default when the compute remote indicates that it produces
|
||||
reproducible output.
|
||||
|
||||
* matching options
|
||||
|
||||
The [[git-annex-matching-options]](1) can be used to control what
|
||||
files to recompute.
|
||||
|
||||
* Also the [[git-annex-common-options]](1) can be used.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
||||
[[git-annex-addcomputed]](1)
|
||||
|
||||
# AUTHOR
|
||||
|
||||
Joey Hess <id@joeyh.name>
|
||||
|
||||
Warning: Automatically converted into a man page by mdwn2man. Edit with care.
|
|
@ -186,6 +186,18 @@ content from the key-value store.
|
|||
|
||||
See [[git-annex-undo]](1) for details.
|
||||
|
||||
* `addcomputed`
|
||||
|
||||
Adds computed files to the repository.
|
||||
|
||||
See [[git-annex-addcomputed]](1) for details.
|
||||
|
||||
* `recompute`
|
||||
|
||||
Recomputes computed files.
|
||||
|
||||
See [[git-annex-recompute]](1) for details.
|
||||
|
||||
* `multicast`
|
||||
|
||||
Multicast file distribution.
|
||||
|
@ -1945,6 +1957,11 @@ Remotes are configured using these settings in `.git/config`.
|
|||
the location of the borg repository to use. Normally this is automatically
|
||||
set up by `git annex initremote`, but you can change it if needed.
|
||||
|
||||
* `remote.<name>.annex-compute`
|
||||
|
||||
Used to identify compute special remotes.
|
||||
Normally this is automatically set up by `git annex initremote`.
|
||||
|
||||
* `remote.<name>.annex-ddarrepo`
|
||||
|
||||
Used by ddar special remotes, this configures
|
||||
|
@ -2184,6 +2201,13 @@ Remotes are configured using these settings in `.git/config`.
|
|||
|
||||
Per-remote configuration of annex.security.allow-unverified-downloads.
|
||||
|
||||
* `annex.security.allowed-compute-programs`
|
||||
|
||||
This is a space separated list of compute programs eg
|
||||
"git-annex-compute-foo git-annex-compute-bar". Listing a compute
|
||||
program here allows compute special remotes that use that program to be
|
||||
enabled by `git-annex enableremote` or autoenabled.
|
||||
|
||||
# CONFIGURATION OF ASSISTANT
|
||||
|
||||
* `annex.delayadd`
|
||||
|
|
|
@ -11,6 +11,7 @@ the content of files.
|
|||
* [[Amazon_Glacier|glacier]]
|
||||
* [[bittorrent]]
|
||||
* [[bup]]
|
||||
* [[compute]]
|
||||
* [[ddar]]
|
||||
* [[directory]]
|
||||
* [[gcrypt]] (encrypted git repositories!)
|
||||
|
|
37
doc/special_remotes/compute.mdwn
Normal file
37
doc/special_remotes/compute.mdwn
Normal file
|
@ -0,0 +1,37 @@
|
|||
While other remotes store the contents of annexed files somewhere,
|
||||
this special remote uses a program to compute the contents of annexed
|
||||
files.
|
||||
|
||||
To add a file to a compute special remote, use the [[git-annex-addcomputed]]
|
||||
command. Once a file has been added to a compute special remote, commands
|
||||
like `git-annex get` will use it to compute the content of the file.
|
||||
|
||||
To enable an instance of this special remote:
|
||||
|
||||
# git-annex initremote myremote type=compute program=git-annex-compute-foo
|
||||
|
||||
The `program` parameter is the only required parameter. It is the name of the
|
||||
program to use to compute the contents of annexed files. It must start with
|
||||
"git-annex-compute-". The program needs to be installed somewhere in the
|
||||
`PATH`.
|
||||
|
||||
Any program can be passed to `git-annex initremote`. However, when enabling
|
||||
a compute special remote later with `git-annex enableremote` or due to
|
||||
"autoenable=true", the program must be listed in the git config
|
||||
`annex.security.allowed-compute-programs`.
|
||||
|
||||
All other "field=value" parameters passed to `initremote` will be passed
|
||||
to the program when running [[git-annex-addcomputed]]. Note that when the
|
||||
program takes a dashed option, it can be provided after "--":
|
||||
|
||||
# git-annex initremote myremote type=compute program=git-annex-compute-foo -- --level=9
|
||||
|
||||
## compute programs
|
||||
|
||||
To write programs used by the compute special remote, see the
|
||||
[[design/compute_special_remote_interface]].
|
||||
|
||||
Have you written a generally useful (and secure) compute program?
|
||||
List it here!
|
||||
|
||||
* ...
|
|
@ -0,0 +1,22 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""Re: DataLad exploration of the compute on demand space"""
|
||||
date="2025-03-06T17:39:04Z"
|
||||
content="""
|
||||
Thanks for explaining the design points of datalad-remake. Some
|
||||
different design choices than I have made, but mostly they strike me as
|
||||
implementing what is easier/possible from outside git-annex.
|
||||
|
||||
Eg, storing the compute inputs under `.datalad` in the branch is fine --
|
||||
and might even be useful if you want to make a branch that changes
|
||||
something in there -- but of course in the git-annex implementation it
|
||||
stores the equvilant thing in the git-annex branch.
|
||||
|
||||
I do hope I'm not closing off the design space from such differences
|
||||
by dropping a compute special remote right into git-annex. But I also
|
||||
expect that having a standard and easy way for at least simple
|
||||
computations will lead to a lot of contributions as others use it.
|
||||
|
||||
Your fMRI case seems like one that my compute remote could handle well
|
||||
and easily.
|
||||
"""]]
|
|
@ -0,0 +1,69 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 22"""
|
||||
date="2025-03-06T17:54:50Z"
|
||||
content="""
|
||||
I've merged the compute special remote now.
|
||||
See [[special_remotes/compute]], [[git-annex-addcomputed]]
|
||||
and [[git-annex-recompute]].
|
||||
|
||||
I have opened [[todo/compute_special_remote_remaining_todos]] with
|
||||
some various ways that I want to improve it further. Including, notably,
|
||||
computing on inputs from submodules, which is not currently supported at
|
||||
all.
|
||||
|
||||
----
|
||||
|
||||
Here I'll go down mih's original and quite useful design criteria and see
|
||||
how the compute special remote applies to them:
|
||||
|
||||
### Generate annex keys (that have never existed)
|
||||
|
||||
`git-annex addcomputed --fast`
|
||||
|
||||
### Re-generate annex keys
|
||||
|
||||
`git-annex addcomputed` optionally with the --reproducible option,
|
||||
followed by a later `git-annex get`
|
||||
|
||||
Another thing that fits under this heading is when one of the original
|
||||
input files has gotten modified, and you want to compute a new version of
|
||||
the output file from it, using the same method as was used to compute it
|
||||
before. That's `git-annex recompute $output_file`
|
||||
|
||||
### Worktree provisioning?
|
||||
|
||||
This is the main thing I didn't implement. Given that git-annex is working
|
||||
with large files and needs to support various filesystems and OS's that
|
||||
lack hardlinks and softlinks, it's hard to do this inexpensively.
|
||||
|
||||
Also, it turned out to make sense for the compute program to request
|
||||
the input files it needs, since this lets git-annex learn what the input
|
||||
files are, so it can make them available when regenerating a computed file
|
||||
later. And so the protocol just has git-annex respond with the path to
|
||||
the content of the file.
|
||||
|
||||
### Request one key, receive many
|
||||
|
||||
This is supported. (So is using multiple inputs to produce one (or more)
|
||||
outputs.)
|
||||
|
||||
### Instruction deposition
|
||||
|
||||
`git-annex addcomputed`
|
||||
|
||||
### Storage redundancy tests
|
||||
|
||||
It did make sense to have it automatically `git-annex get` the inputs.
|
||||
Well, I think it makes sense in most cases, this may become a tunable
|
||||
setting of the compute special remote.
|
||||
|
||||
### Trust
|
||||
|
||||
Handled by requiring the user install a `git-annex-compute-foo` command
|
||||
in PATH, and provide the name of the command to `initremote`.
|
||||
|
||||
And for later `enableremote` or `autoenable=true`, it will only
|
||||
allow programs that are listed in the annex.security.allowed-compute-programs
|
||||
git config.
|
||||
"""]]
|
69
doc/todo/compute_special_remote_remaining_todos.mdwn
Normal file
69
doc/todo/compute_special_remote_remaining_todos.mdwn
Normal file
|
@ -0,0 +1,69 @@
|
|||
This is the remainder of my todo list while I was building the
|
||||
compute special remote. --[[Joey]]
|
||||
|
||||
* write a tip showing how to use this
|
||||
|
||||
* Write some simple compute programs so we have something to start with.
|
||||
|
||||
- convert between images eg jpeg to png
|
||||
- run a command in a singularity container (that is one of the inputs)
|
||||
- run a wasm binary (that is one of the inputs)
|
||||
|
||||
* compute on input files in submodules
|
||||
|
||||
* annex.diskreserve can be violated if getting a file computes it but also
|
||||
some other output files, which get added to the annex.
|
||||
|
||||
* would be nice to have a way to see what computations are used by a
|
||||
compute remote for a file. Put it in `whereis` output? But it's not an
|
||||
url. Maybe a separate command? That would also allow querying for eg,
|
||||
what files are inputs for another file. Or it could be exposed in the
|
||||
Remote interface, and made into a file matching option.
|
||||
|
||||
* "getting input from <file>" message uses the original filename,
|
||||
but that file might have been renamed. Would be more clear to use
|
||||
whatever file in the tree currently points to the key it's getting
|
||||
(what if there is not one?)
|
||||
|
||||
* allow git-annex enableremote with program= explicitly specified,
|
||||
without checking annex.security.allowed-compute-programs
|
||||
|
||||
* addcomputed should honor annex.addunlocked.
|
||||
|
||||
What about recompute? It seems it should either write the new version of
|
||||
the file as an unlocked file when the old version was unlocked, or also
|
||||
honor annex.addunlocked.
|
||||
|
||||
Problem: Since recompute does not stage the file, it would have to write
|
||||
the content to the working tree. And then the user would need to
|
||||
git-annex add. But then, if the key was a VURL key, it would add it with
|
||||
the default backend instead, and the file would no longer use a computed
|
||||
key.
|
||||
|
||||
So it, seems that, for this to be done, recompute would need to stage the
|
||||
pointer file.
|
||||
|
||||
* recompute could ingest keys for other files than the one being
|
||||
recomputed, and remember them. Then recomputing those files could just
|
||||
use those keys, without re-running a computation. (Better than --others
|
||||
which got removed.)
|
||||
|
||||
* `git-annex recompute foo bar baz`, when foo depends on bar which depends
|
||||
on baz, and when baz has changed, will not recompute foo, because bar has
|
||||
not changed. It then recomputes bar. So running the command again is
|
||||
needed to recompute foo.
|
||||
|
||||
What it could do is, after it recomputes bar, notice that it already
|
||||
considered foo, and revisit foo, and recompute it then. It could either
|
||||
use a bloom filter to remember the files it considered but did not
|
||||
compute, or it could just notice that the command line includes foo
|
||||
(or includes a directory that contains foo), and then foo is not
|
||||
modified.
|
||||
|
||||
Or it could build a DAG and traverse it, but building a DAG of a large
|
||||
directory tree has its own problems.
|
||||
|
||||
* Should addcomputed honor annex.smallfiles? That would seem to imply
|
||||
that recompute should also support recomputing non-annexed files.
|
||||
Otherwise, adding a file and then recomputing it would vary in
|
||||
what the content of the file is, depending on annex.smallfiles setting.
|
|
@ -562,6 +562,7 @@ Executable git-annex
|
|||
Annex.FileMatcher
|
||||
Annex.Fixup
|
||||
Annex.GitOverlay
|
||||
Annex.GitShaKey
|
||||
Annex.HashObject
|
||||
Annex.Hook
|
||||
Annex.Import
|
||||
|
@ -654,6 +655,7 @@ Executable git-annex
|
|||
CmdLine.Usage
|
||||
Command
|
||||
Command.Add
|
||||
Command.AddComputed
|
||||
Command.AddUnused
|
||||
Command.AddUrl
|
||||
Command.Adjust
|
||||
|
@ -727,6 +729,7 @@ Executable git-annex
|
|||
Command.Proxy
|
||||
Command.Pull
|
||||
Command.Push
|
||||
Command.Recompute
|
||||
Command.ReKey
|
||||
Command.ReadPresentKey
|
||||
Command.RecvKey
|
||||
|
@ -930,6 +933,7 @@ Executable git-annex
|
|||
Remote.BitTorrent
|
||||
Remote.Borg
|
||||
Remote.Bup
|
||||
Remote.Compute
|
||||
Remote.Ddar
|
||||
Remote.Directory
|
||||
Remote.Directory.LegacyChunked
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue