2024-06-18 14:51:37 +00:00
|
|
|
{- proxying
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Annex.Proxy where
|
|
|
|
|
|
|
|
import Annex.Common
|
2024-07-25 17:15:05 +00:00
|
|
|
import qualified Annex
|
2024-06-28 17:22:56 +00:00
|
|
|
import qualified Remote
|
|
|
|
import qualified Types.Remote as Remote
|
|
|
|
import qualified Remote.Git
|
2024-07-25 17:15:05 +00:00
|
|
|
import P2P.Proxy
|
|
|
|
import P2P.Protocol
|
|
|
|
import P2P.IO
|
2024-06-18 14:51:37 +00:00
|
|
|
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
|
2024-06-28 17:22:56 +00:00
|
|
|
import Annex.Concurrent
|
2024-06-28 19:32:00 +00:00
|
|
|
import Annex.Tmp
|
2024-07-29 17:39:28 +00:00
|
|
|
import Annex.Verify
|
2024-08-06 18:18:30 +00:00
|
|
|
import Annex.UUID
|
2024-07-25 17:15:05 +00:00
|
|
|
import Logs.Proxy
|
|
|
|
import Logs.Cluster
|
|
|
|
import Logs.UUID
|
2024-07-26 14:24:23 +00:00
|
|
|
import Logs.Location
|
2024-06-28 19:32:00 +00:00
|
|
|
import Utility.Tmp.Dir
|
|
|
|
import Utility.Metered
|
2024-08-06 18:18:30 +00:00
|
|
|
import Git.Types
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
import qualified Database.Export as Export
|
2024-06-18 14:51:37 +00:00
|
|
|
|
2024-06-28 17:22:56 +00:00
|
|
|
import Control.Concurrent.STM
|
|
|
|
import Control.Concurrent.Async
|
2024-07-28 19:34:59 +00:00
|
|
|
import qualified Data.ByteString as B
|
2024-06-28 17:22:56 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2024-06-28 19:32:00 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2024-07-25 17:15:05 +00:00
|
|
|
import qualified Data.Map as M
|
|
|
|
import qualified Data.Set as S
|
2024-06-28 17:22:56 +00:00
|
|
|
|
|
|
|
proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
|
|
|
|
proxyRemoteSide clientmaxversion bypass r
|
|
|
|
| Remote.remotetype r == Remote.Git.remote =
|
|
|
|
proxyGitRemoteSide clientmaxversion bypass r
|
|
|
|
| otherwise =
|
|
|
|
proxySpecialRemoteSide clientmaxversion r
|
|
|
|
|
|
|
|
proxyGitRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
|
|
|
|
proxyGitRemoteSide clientmaxversion bypass r = mkRemoteSide r $
|
2024-06-27 18:36:55 +00:00
|
|
|
openP2PShellConnection' r clientmaxversion bypass >>= \case
|
|
|
|
Just conn@(OpenConnection (remoterunst, remoteconn, _)) ->
|
|
|
|
return $ Just
|
|
|
|
( remoterunst
|
|
|
|
, remoteconn
|
|
|
|
, void $ liftIO $ closeP2PShellConnection conn
|
|
|
|
)
|
|
|
|
_ -> return Nothing
|
2024-06-28 17:22:56 +00:00
|
|
|
|
|
|
|
proxySpecialRemoteSide :: ProtocolVersion -> Remote -> Annex RemoteSide
|
|
|
|
proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
|
|
|
|
let protoversion = min clientmaxversion maxProtocolVersion
|
|
|
|
remoterunst <- Serving (Remote.uuid r) Nothing <$>
|
|
|
|
liftIO (newTVarIO protoversion)
|
|
|
|
ihdl <- liftIO newEmptyTMVarIO
|
|
|
|
ohdl <- liftIO newEmptyTMVarIO
|
2024-06-28 21:07:01 +00:00
|
|
|
iwaitv <- liftIO newEmptyTMVarIO
|
|
|
|
owaitv <- liftIO newEmptyTMVarIO
|
2024-07-26 19:25:15 +00:00
|
|
|
iclosedv <- liftIO newEmptyTMVarIO
|
|
|
|
oclosedv <- liftIO newEmptyTMVarIO
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
exportdb <- ifM (Remote.isExportSupported r)
|
|
|
|
( Just <$> Export.openDb (Remote.uuid r)
|
|
|
|
, pure Nothing
|
|
|
|
)
|
2024-06-28 17:22:56 +00:00
|
|
|
worker <- liftIO . async =<< forkState
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
(proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv exportdb)
|
2024-06-28 17:22:56 +00:00
|
|
|
let remoteconn = P2PConnection
|
|
|
|
{ connRepo = Nothing
|
|
|
|
, connCheckAuth = const False
|
2024-07-26 19:25:15 +00:00
|
|
|
, connIhdl = P2PHandleTMVar ihdl (Just iwaitv) iclosedv
|
|
|
|
, connOhdl = P2PHandleTMVar ohdl (Just owaitv) oclosedv
|
2024-06-28 17:22:56 +00:00
|
|
|
, connIdent = ConnIdent (Just (Remote.name r))
|
|
|
|
}
|
|
|
|
let closeremoteconn = do
|
2024-07-28 19:11:31 +00:00
|
|
|
liftIO $ atomically $ putTMVar oclosedv ()
|
2024-06-28 17:22:56 +00:00
|
|
|
join $ liftIO (wait worker)
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
maybe noop Export.closeDb exportdb
|
2024-06-28 17:22:56 +00:00
|
|
|
return $ Just
|
|
|
|
( remoterunst
|
|
|
|
, remoteconn
|
|
|
|
, closeremoteconn
|
|
|
|
)
|
|
|
|
|
|
|
|
-- Proxy for the special remote, speaking the P2P protocol.
|
|
|
|
proxySpecialRemote
|
|
|
|
:: ProtocolVersion
|
|
|
|
-> Remote
|
|
|
|
-> TMVar (Either L.ByteString Message)
|
|
|
|
-> TMVar (Either L.ByteString Message)
|
|
|
|
-> TMVar ()
|
2024-06-28 21:07:01 +00:00
|
|
|
-> TMVar ()
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
-> Maybe Export.ExportHandle
|
2024-06-28 17:22:56 +00:00
|
|
|
-> Annex ()
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
|
2024-06-28 17:22:56 +00:00
|
|
|
where
|
2024-06-28 19:32:00 +00:00
|
|
|
go :: Annex ()
|
|
|
|
go = liftIO receivemessage >>= \case
|
2024-06-28 17:22:56 +00:00
|
|
|
Just (CHECKPRESENT k) -> do
|
|
|
|
tryNonAsync (Remote.checkPresent r k) >>= \case
|
2024-06-28 19:32:00 +00:00
|
|
|
Right True -> liftIO $ sendmessage SUCCESS
|
|
|
|
Right False -> liftIO $ sendmessage FAILURE
|
|
|
|
Left err -> liftIO $ propagateerror err
|
2024-06-28 17:22:56 +00:00
|
|
|
go
|
|
|
|
Just (LOCKCONTENT _) -> do
|
|
|
|
-- Special remotes do not support locking content.
|
2024-06-28 19:32:00 +00:00
|
|
|
liftIO $ sendmessage FAILURE
|
2024-06-28 17:22:56 +00:00
|
|
|
go
|
|
|
|
Just (REMOVE k) -> do
|
toward SafeDropProof expiry checking
Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is
based on a LockedCopy. If there are several LockedCopies, it uses the
closest expiry time. That is not optimal, it may be that the proof
expires based on one LockedCopy but another one has not expired. But
that seems unlikely to really happen, and anyway the user can just
re-run a drop if it fails due to expiry.
Pass the SafeDropProof to removeKey, which is responsible for checking
it for expiry in situations where that could be a problem. Which really
only means in Remote.Git.
Made Remote.Git check expiry when dropping from a local remote.
Checking expiry when dropping from a P2P remote is not yet implemented.
P2P.Protocol.remove has SafeDropProof plumbed through to it for that
purpose.
Fixing the remaining 2 build warnings should complete this work.
Note that the use of a POSIXTime here means that if the clock gets set
forward while git-annex is in the middle of a drop, it may say that
dropping took too long. That seems ok. Less ok is that if the clock gets
turned back a sufficient amount (eg 5 minutes), proof expiry won't be
noticed. It might be better to use the Monotonic clock, but that doesn't
advance when a laptop is suspended, and while there is the linux
Boottime clock, that is not available on other systems. Perhaps a
combination of POSIXTime and the Monotonic clock could detect laptop
suspension and also detect clock being turned back?
There is a potential future flag day where
p2pDefaultLockContentRetentionDuration is not assumed, but is probed
using the P2P protocol, and peers that don't support it can no longer
produce a LockedCopy. Until that happens, when git-annex is
communicating with older peers there is a risk of data loss when
a ssh connection closes during LOCKCONTENT.
2024-07-04 16:23:46 +00:00
|
|
|
tryNonAsync (Remote.removeKey r Nothing k) >>= \case
|
2024-06-28 19:32:00 +00:00
|
|
|
Right () -> liftIO $ sendmessage SUCCESS
|
|
|
|
Left err -> liftIO $ propagateerror err
|
2024-06-28 17:22:56 +00:00
|
|
|
go
|
2024-06-28 21:07:01 +00:00
|
|
|
Just (PUT (ProtoAssociatedFile af) k) -> do
|
|
|
|
proxyput af k
|
|
|
|
go
|
2024-06-28 19:32:00 +00:00
|
|
|
Just (GET offset (ProtoAssociatedFile af) k) -> do
|
|
|
|
proxyget offset af k
|
|
|
|
go
|
2024-06-28 18:07:23 +00:00
|
|
|
Just (BYPASS _) -> go
|
2024-06-28 17:22:56 +00:00
|
|
|
Just (CONNECT _) ->
|
|
|
|
-- Not supported and the protocol ends here.
|
2024-06-28 19:32:00 +00:00
|
|
|
liftIO $ sendmessage $ CONNECTDONE (ExitFailure 1)
|
2024-06-28 17:22:56 +00:00
|
|
|
Just NOTIFYCHANGE -> do
|
2024-06-28 19:32:00 +00:00
|
|
|
liftIO $ sendmessage $
|
|
|
|
ERROR "NOTIFYCHANGE unsupported for a special remote"
|
2024-06-28 17:22:56 +00:00
|
|
|
go
|
2024-07-26 01:36:10 +00:00
|
|
|
Just _ -> giveup "protocol error"
|
2024-06-28 17:22:56 +00:00
|
|
|
Nothing -> return ()
|
|
|
|
|
2024-07-28 19:11:31 +00:00
|
|
|
receivemessage = liftIO (atomically recv) >>= \case
|
2024-06-28 17:22:56 +00:00
|
|
|
Right (Right m) -> return (Just m)
|
|
|
|
Right (Left _b) -> giveup "unexpected ByteString received from P2P MVar"
|
|
|
|
Left () -> return Nothing
|
2024-07-28 19:11:31 +00:00
|
|
|
where
|
|
|
|
recv =
|
|
|
|
(Right <$> takeTMVar ohdl)
|
|
|
|
`orElse`
|
|
|
|
(Left <$> readTMVar oclosedv)
|
2024-06-28 21:07:01 +00:00
|
|
|
|
2024-07-28 19:11:31 +00:00
|
|
|
receivebytestring = atomically recv >>= \case
|
2024-07-29 17:39:28 +00:00
|
|
|
Right (Left b) -> return (Just b)
|
2024-07-28 19:11:31 +00:00
|
|
|
Right (Right _m) -> giveup "did not receive ByteString from P2P MVar"
|
2024-07-29 17:39:28 +00:00
|
|
|
Left () -> return Nothing
|
2024-07-28 19:11:31 +00:00
|
|
|
where
|
|
|
|
recv =
|
|
|
|
(Right <$> takeTMVar ohdl)
|
|
|
|
`orElse`
|
|
|
|
(Left <$> readTMVar oclosedv)
|
2024-06-28 17:22:56 +00:00
|
|
|
|
2024-06-28 19:32:00 +00:00
|
|
|
sendmessage m = atomically $ putTMVar ihdl (Right m)
|
|
|
|
|
|
|
|
sendbytestring b = atomically $ putTMVar ihdl (Left b)
|
2024-06-28 17:42:25 +00:00
|
|
|
|
|
|
|
propagateerror err = sendmessage $ ERROR $
|
|
|
|
"proxied special remote reports: " ++ show err
|
2024-06-28 19:32:00 +00:00
|
|
|
|
|
|
|
-- Not using gitAnnexTmpObjectLocation because there might be
|
|
|
|
-- several concurrent GET and PUTs of the same key being proxied
|
|
|
|
-- from this special remote or others, and each needs to happen
|
|
|
|
-- independently. Also, this key is not getting added into the
|
|
|
|
-- local annex objects.
|
|
|
|
withproxytmpfile k a = withOtherTmp $ \othertmpdir ->
|
|
|
|
withTmpDirIn (fromRawFilePath othertmpdir) "proxy" $ \tmpdir ->
|
|
|
|
a (toRawFilePath tmpdir P.</> keyFile k)
|
2024-06-28 21:07:01 +00:00
|
|
|
|
2024-07-29 17:39:28 +00:00
|
|
|
-- Verify the content received from the client, to avoid bad content
|
|
|
|
-- being stored in the special remote.
|
2024-06-28 21:07:01 +00:00
|
|
|
proxyput af k = do
|
2024-07-01 14:53:49 +00:00
|
|
|
liftIO $ sendmessage $ PUT_FROM (Offset 0)
|
|
|
|
withproxytmpfile k $ \tmpfile -> do
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
let store = tryNonAsync (storeput k af (decodeBS tmpfile)) >>= \case
|
2024-07-01 14:53:49 +00:00
|
|
|
Right () -> liftIO $ sendmessage SUCCESS
|
2024-06-28 21:07:01 +00:00
|
|
|
Left err -> liftIO $ propagateerror err
|
|
|
|
liftIO receivemessage >>= \case
|
2024-07-28 19:34:59 +00:00
|
|
|
Just (DATA (Len len)) -> do
|
2024-07-29 17:39:28 +00:00
|
|
|
iv <- startVerifyKeyContentIncrementally Remote.AlwaysVerify k
|
2024-07-28 19:34:59 +00:00
|
|
|
h <- liftIO $ openFile (fromRawFilePath tmpfile) WriteMode
|
2024-07-29 17:39:28 +00:00
|
|
|
gotall <- liftIO $ receivetofile iv h len
|
2024-07-28 19:34:59 +00:00
|
|
|
liftIO $ hClose h
|
2024-07-29 17:39:28 +00:00
|
|
|
verified <- if gotall
|
|
|
|
then fst <$> finishVerifyKeyContentIncrementally' True iv
|
|
|
|
else pure False
|
2024-06-28 21:07:01 +00:00
|
|
|
if protoversion > ProtocolVersion 1
|
2024-07-01 14:53:49 +00:00
|
|
|
then liftIO receivemessage >>= \case
|
2024-07-29 17:39:28 +00:00
|
|
|
Just (VALIDITY Valid)
|
|
|
|
| verified -> store
|
|
|
|
| otherwise -> liftIO $ sendmessage FAILURE
|
2024-07-01 14:53:49 +00:00
|
|
|
Just (VALIDITY Invalid) ->
|
2024-07-28 13:14:42 +00:00
|
|
|
liftIO $ sendmessage FAILURE
|
2024-07-26 01:36:10 +00:00
|
|
|
_ -> giveup "protocol error"
|
2024-07-01 14:53:49 +00:00
|
|
|
else store
|
2024-07-26 01:36:10 +00:00
|
|
|
_ -> giveup "protocol error"
|
2024-07-28 18:36:45 +00:00
|
|
|
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
|
2024-06-28 19:32:00 +00:00
|
|
|
|
proxy stores received keys to known export locations
This handles the workflow where the branch is first pushed to the proxy,
and then files in the exported tree are later are copied to the proxied remote.
Turns out that the way the export log is structured, nothing needs
to be done to finalize the export once the last key is sent to it. Which
is great because that would have been a lot of complication. On
receiving the push, Command.Export runs and calls recordExportBeginning,
does as much as it can to update the export with the files currently
on it, and then calls recordExportUnderway. At that point, the
export.log records the export as "complete", but it's not really. And
that's fine. The same happens when using `git-annex export` when some
files are not available to send. Other repositories that have
access to the special remote can already retrieve files from it. As
the missing files get copied to the exported remote, all that needs
to be done is record each in the export db.
At this point, proxying to exporttree=yes annexobjects=yes special remotes
is fully working. Except for in the case where multiple files in the
tree use the same key, and the files are sent to the proxied remote
before pushing the tree.
It seems that even special remotes without annexobjects=yes will work if
used with the workflow where the git-annex branch is pushed before
copying files. But not with the `git-annex push` workflow.
2024-08-07 13:38:15 +00:00
|
|
|
storeput k af tmpfile = case mexportdb of
|
|
|
|
Just exportdb -> liftIO (Export.getExportTree exportdb k) >>= \case
|
|
|
|
[] -> storeputkey k af tmpfile
|
|
|
|
locs -> do
|
|
|
|
havelocs <- liftIO $ S.fromList
|
|
|
|
<$> Export.getExportedLocation exportdb k
|
|
|
|
let locs' = filter (`S.notMember` havelocs) locs
|
|
|
|
forM_ locs' $ \loc ->
|
|
|
|
storeputexport exportdb k loc tmpfile
|
|
|
|
liftIO $ Export.flushDbQueue exportdb
|
|
|
|
Nothing -> storeputkey k af tmpfile
|
|
|
|
|
|
|
|
storeputkey k af tmpfile =
|
|
|
|
Remote.storeKey r k af (Just tmpfile) nullMeterUpdate
|
|
|
|
|
|
|
|
storeputexport exportdb k loc tmpfile = do
|
|
|
|
Remote.storeExport (Remote.exportActions r) tmpfile k loc nullMeterUpdate
|
|
|
|
liftIO $ Export.addExportedLocation exportdb k loc
|
|
|
|
|
2024-07-29 17:39:28 +00:00
|
|
|
receivetofile iv h n = liftIO receivebytestring >>= \case
|
|
|
|
Just b -> do
|
|
|
|
liftIO $ atomically $
|
|
|
|
putTMVar owaitv ()
|
|
|
|
`orElse`
|
|
|
|
readTMVar oclosedv
|
|
|
|
n' <- storetofile iv h n (L.toChunks b)
|
|
|
|
-- Normally all the data is sent in a single
|
|
|
|
-- lazy bytestring. However, when the special
|
|
|
|
-- remote is a node in a cluster, a PUT is
|
|
|
|
-- streamed to it in multiple chunks.
|
|
|
|
if n' == 0
|
|
|
|
then return True
|
|
|
|
else receivetofile iv h n'
|
|
|
|
Nothing -> return False
|
2024-07-28 19:34:59 +00:00
|
|
|
|
2024-07-29 17:39:28 +00:00
|
|
|
storetofile _ _ n [] = pure n
|
|
|
|
storetofile iv h n (b:bs) = do
|
|
|
|
writeVerifyChunk iv h b
|
|
|
|
storetofile iv h (n - fromIntegral (B.length b)) bs
|
2024-07-28 19:34:59 +00:00
|
|
|
|
2024-06-28 19:32:00 +00:00
|
|
|
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
|
|
|
|
-- Don't verify the content from the remote,
|
|
|
|
-- because the client will do its own verification.
|
|
|
|
let vc = Remote.NoVerify
|
|
|
|
tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
|
2024-07-29 15:18:03 +00:00
|
|
|
Right _ -> liftIO $ senddata offset tmpfile
|
2024-06-28 19:32:00 +00:00
|
|
|
Left err -> liftIO $ propagateerror err
|
|
|
|
|
|
|
|
senddata (Offset offset) f = do
|
|
|
|
size <- fromIntegral <$> getFileSize f
|
|
|
|
let n = max 0 (size - offset)
|
|
|
|
sendmessage $ DATA (Len n)
|
|
|
|
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
|
|
|
|
hSeek h AbsoluteSeek offset
|
|
|
|
sendbs =<< L.hGetContents h
|
|
|
|
-- Important to keep the handle open until
|
|
|
|
-- the client responds. The bytestring
|
|
|
|
-- could still be lazily streaming out to
|
|
|
|
-- the client.
|
|
|
|
waitclientresponse
|
|
|
|
where
|
|
|
|
sendbs bs = do
|
|
|
|
sendbytestring bs
|
|
|
|
when (protoversion > ProtocolVersion 0) $
|
|
|
|
sendmessage (VALIDITY Valid)
|
|
|
|
|
|
|
|
waitclientresponse =
|
|
|
|
receivemessage >>= \case
|
|
|
|
Just SUCCESS -> return ()
|
|
|
|
Just FAILURE -> return ()
|
2024-07-26 01:36:10 +00:00
|
|
|
Just _ -> giveup "protocol error"
|
2024-06-28 19:32:00 +00:00
|
|
|
Nothing -> return ()
|
2024-07-25 17:15:05 +00:00
|
|
|
|
|
|
|
{- Check if this repository can proxy for a specified remote uuid,
|
|
|
|
- and if so enable proxying for it. -}
|
|
|
|
checkCanProxy :: UUID -> UUID -> Annex Bool
|
2024-08-06 18:18:30 +00:00
|
|
|
checkCanProxy remoteuuid myuuid = do
|
|
|
|
myproxies <- M.lookup myuuid <$> getProxies
|
|
|
|
checkCanProxy' myproxies remoteuuid >>= \case
|
2024-07-28 20:04:20 +00:00
|
|
|
Right v -> do
|
|
|
|
Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
|
|
|
|
return True
|
|
|
|
Left Nothing -> return False
|
|
|
|
Left (Just err) -> giveup err
|
2024-07-25 17:15:05 +00:00
|
|
|
|
2024-07-28 20:04:20 +00:00
|
|
|
checkCanProxy' :: Maybe (S.Set Proxy) -> UUID -> Annex (Either (Maybe String) (Either ClusterUUID Remote))
|
|
|
|
checkCanProxy' Nothing _ = return (Left Nothing)
|
|
|
|
checkCanProxy' (Just proxies) remoteuuid =
|
|
|
|
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
|
|
|
|
[] -> notconfigured
|
|
|
|
ps -> case mkClusterUUID remoteuuid of
|
|
|
|
Just cu -> proxyforcluster cu
|
|
|
|
Nothing -> proxyfor ps
|
2024-07-25 17:15:05 +00:00
|
|
|
where
|
|
|
|
proxyfor ps = do
|
|
|
|
rs <- concat . Remote.byCost <$> Remote.remoteList
|
|
|
|
myclusters <- annexClusters <$> Annex.getGitConfig
|
2024-08-06 18:18:30 +00:00
|
|
|
case canProxyForRemote rs ps myclusters remoteuuid of
|
2024-07-25 17:15:05 +00:00
|
|
|
Nothing -> notconfigured
|
|
|
|
Just r -> return (Right (Right r))
|
|
|
|
|
|
|
|
proxyforcluster cu = do
|
|
|
|
clusters <- getClusters
|
|
|
|
if M.member cu (clusterUUIDs clusters)
|
|
|
|
then return (Right (Left cu))
|
|
|
|
else notconfigured
|
|
|
|
|
|
|
|
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
|
|
|
|
Just desc -> return $ Left $ Just $
|
|
|
|
"not configured to proxy for repository " ++ fromUUIDDesc desc
|
|
|
|
Nothing -> return $ Left Nothing
|
2024-07-26 14:24:23 +00:00
|
|
|
|
2024-08-06 18:18:30 +00:00
|
|
|
{- Remotes that this repository is configured to proxy for.
|
|
|
|
-
|
|
|
|
- When there are multiple remotes that access the same repository,
|
|
|
|
- this picks the lowest cost one that is configured to be used as a proxy.
|
|
|
|
-}
|
|
|
|
proxyForRemotes :: Annex [Remote]
|
|
|
|
proxyForRemotes = do
|
|
|
|
myuuid <- getUUID
|
|
|
|
(M.lookup myuuid <$> getProxies) >>= \case
|
|
|
|
Nothing -> return []
|
|
|
|
Just myproxies -> do
|
|
|
|
let myproxies' = S.toList myproxies
|
|
|
|
rs <- concat . Remote.byCost <$> Remote.remoteList
|
|
|
|
myclusters <- annexClusters <$> Annex.getGitConfig
|
|
|
|
return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs
|
|
|
|
|
|
|
|
-- Only proxy for a remote when the git configuration allows it.
|
|
|
|
-- This is important to prevent changes to the git-annex branch
|
|
|
|
-- causing unexpected proxying for remotes.
|
|
|
|
canProxyForRemote
|
|
|
|
:: [Remote] -- ^ must be sorted by cost
|
|
|
|
-> [Proxy]
|
|
|
|
-> M.Map RemoteName ClusterUUID
|
|
|
|
-> UUID
|
|
|
|
-> (Maybe Remote)
|
|
|
|
canProxyForRemote rs myproxies myclusters remoteuuid =
|
|
|
|
headMaybe $ filter canproxy rs
|
|
|
|
where
|
|
|
|
canproxy r =
|
|
|
|
sameuuid r &&
|
|
|
|
proxyisconfigured r &&
|
|
|
|
any (isproxyfor r) myproxies
|
|
|
|
|
|
|
|
sameuuid r = Remote.uuid r == remoteuuid
|
|
|
|
|
|
|
|
isproxyfor r p =
|
|
|
|
proxyRemoteUUID p == remoteuuid &&
|
|
|
|
Remote.name r == proxyRemoteName p
|
|
|
|
|
|
|
|
proxyisconfigured r
|
|
|
|
| remoteAnnexProxy (Remote.gitconfig r) = True
|
|
|
|
-- Proxy for remotes that are configured as cluster nodes.
|
|
|
|
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
|
|
|
|
-- Proxy for a remote when it is proxied by another remote
|
|
|
|
-- which is itself configured as a cluster gateway.
|
|
|
|
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
|
|
|
|
Just proxyuuid -> not $ null $
|
|
|
|
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
|
|
|
|
filter (\p -> Remote.uuid p == proxyuuid) rs
|
|
|
|
Nothing -> False
|
|
|
|
|
2024-07-26 14:24:23 +00:00
|
|
|
mkProxyMethods :: ProxyMethods
|
|
|
|
mkProxyMethods = ProxyMethods
|
2024-08-23 20:35:12 +00:00
|
|
|
{ removedContent = \u k -> logChange NoLiveUpdate k u InfoMissing
|
|
|
|
, addedContent = \u k -> logChange NoLiveUpdate k u InfoPresent
|
2024-07-26 14:24:23 +00:00
|
|
|
}
|