Merge branch 'httpproto'

This commit is contained in:
Joey Hess 2024-07-29 11:33:39 -04:00
commit db66612b8f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 53 additions and 40 deletions

View file

@ -16,7 +16,6 @@ import P2P.Proxy
import P2P.Protocol import P2P.Protocol
import P2P.IO import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection) import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Content
import Annex.Concurrent import Annex.Concurrent
import Annex.Tmp import Annex.Tmp
import Logs.Proxy import Logs.Proxy
@ -209,12 +208,7 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
-- because the client will do its own verification. -- because the client will do its own verification.
let vc = Remote.NoVerify let vc = Remote.NoVerify
tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
Right v -> Right _ -> liftIO $ senddata offset tmpfile
ifM (verifyKeyContentPostRetrieval Remote.RetrievalVerifiableKeysSecure vc v k tmpfile)
( liftIO $ senddata offset tmpfile
, liftIO $ sendmessage $
ERROR "verification of content failed"
)
Left err -> liftIO $ propagateerror err Left err -> liftIO $ propagateerror err
senddata (Offset offset) f = do senddata (Offset offset) f = do

View file

@ -1,4 +1,4 @@
git-annex (10.20240702) UNRELEASED; urgency=medium git-annex (10.20240731) UNRELEASED; urgency=medium
* New HTTP API that is equivilant to the P2P protocol. * New HTTP API that is equivilant to the P2P protocol.
* annex+http and annex+https urls can be configured for * annex+http and annex+https urls can be configured for
@ -13,6 +13,7 @@ git-annex (10.20240702) UNRELEASED; urgency=medium
* Avoid potential data loss in situations where git-annex-shell or * Avoid potential data loss in situations where git-annex-shell or
git-annex remotedaemon is killed while locking a key to prevent its git-annex remotedaemon is killed while locking a key to prevent its
removal. removal.
* When proxying for a special remote, avoid unncessary hashing.
* Added a dependency on clock. * Added a dependency on clock.
* Propagate --force to git-annex transferrer. * Propagate --force to git-annex transferrer.

View file

@ -314,9 +314,9 @@ servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf mof
tooshortv <- liftIO newEmptyTMVarIO tooshortv <- liftIO newEmptyTMVarIO
content <- liftIO $ S.unSourceT stream (gather validityv tooshortv) content <- liftIO $ S.unSourceT stream (gather validityv tooshortv)
res <- withP2PConnection' apiver st cu su bypass sec auth WriteAction res <- withP2PConnection' apiver st cu su bypass sec auth WriteAction
(\cst -> cst { connectionWaitVar = False }) $ \conn -> (\cst -> cst { connectionWaitVar = False }) $ \conn -> do
liftIO $ void $ async $ checktooshort conn tooshortv
liftIO (protoaction conn content validitycheck) liftIO (protoaction conn content validitycheck)
`finally` checktooshort conn tooshortv
case res of case res of
Right (Right (Just plusuuids)) -> return $ resultmangle $ Right (Right (Just plusuuids)) -> return $ resultmangle $
PutResultPlus True (map B64UUID plusuuids) PutResultPlus True (map B64UUID plusuuids)
@ -385,8 +385,8 @@ servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf mof
-- The connection can no longer be used when too short a DATA has -- The connection can no longer be used when too short a DATA has
-- been written to it. -- been written to it.
checktooshort conn tooshortv = checktooshort conn tooshortv = do
liftIO $ whenM (atomically $ fromMaybe True <$> tryTakeTMVar tooshortv) $ liftIO $ whenM (atomically $ takeTMVar tooshortv) $
closeP2PConnection conn closeP2PConnection conn
servePutOffset servePutOffset

View file

@ -220,7 +220,7 @@ withP2PConnections workerpool proxyconnectionpoolsize clusterconcurrency a = do
>>= atomically . putTMVar respvar >>= atomically . putTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv servicer myuuid myproxies proxypool reqv relv endv
Left (Right releaseconn) -> do Left (Right releaseconn) -> do
releaseconn void $ tryNonAsync releaseconn
servicer myuuid myproxies proxypool reqv relv endv servicer myuuid myproxies proxypool reqv relv endv
Left (Left ()) -> return () Left (Left ()) -> return ()
@ -378,12 +378,18 @@ proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool pro
liftIO $ runNetProto proxyfromclientrunst proxyfromclientconn $ liftIO $ runNetProto proxyfromclientrunst proxyfromclientconn $
P2P.net P2P.receiveMessage P2P.net P2P.receiveMessage
let releaseconn returntopool = let closebothsides = do
liftIO $ closeConnection proxyfromclientconn
liftIO $ closeConnection clientconn
let releaseconn connstillusable = do
atomically $ void $ tryPutTMVar relv $ do atomically $ void $ tryPutTMVar relv $ do
unless connstillusable
closebothsides
r <- liftIO $ wait asyncworker r <- liftIO $ wait asyncworker
liftIO $ closeConnection proxyfromclientconn when connstillusable
liftIO $ closeConnection clientconn closebothsides
if returntopool if connstillusable
then liftIO $ do then liftIO $ do
now <- getPOSIXTime now <- getPOSIXTime
evicted <- atomically $ putProxyConnectionPool proxypool proxyconnectionpoolsize connparams $ evicted <- atomically $ putProxyConnectionPool proxypool proxyconnectionpoolsize connparams $
@ -539,13 +545,20 @@ instance Show ProxyConnection where
openedProxyConnection openedProxyConnection
:: UUID :: UUID
-> String
-> Proxy.ProxySelector -> Proxy.ProxySelector
-> Annex () -> Annex ()
-> Proxy.ConcurrencyConfig -> Proxy.ConcurrencyConfig
-> IO ProxyConnection -> Annex ProxyConnection
openedProxyConnection u selector closer concurrency = do openedProxyConnection u desc selector closer concurrency = do
now <- getPOSIXTime now <- liftIO getPOSIXTime
return $ ProxyConnection u selector closer concurrency now fastDebug "P2P.Http" ("Opened proxy connection to " ++ desc)
return $ ProxyConnection u selector closer' concurrency now
where
closer' = do
fastDebug "P2P.Http" ("Closing proxy connection to " ++ desc)
closer
fastDebug "P2P.Http" ("Closed proxy connection to " ++ desc)
openProxyConnectionToRemote openProxyConnectionToRemote
:: AnnexWorkerPool :: AnnexWorkerPool
@ -557,7 +570,8 @@ openProxyConnectionToRemote workerpool clientmaxversion bypass remote =
inAnnexWorker' workerpool $ do inAnnexWorker' workerpool $ do
remoteside <- proxyRemoteSide clientmaxversion bypass remote remoteside <- proxyRemoteSide clientmaxversion bypass remote
concurrencyconfig <- Proxy.noConcurrencyConfig concurrencyconfig <- Proxy.noConcurrencyConfig
liftIO $ openedProxyConnection (Remote.uuid remote) openedProxyConnection (Remote.uuid remote)
("remote " ++ Remote.name remote)
(Proxy.singleProxySelector remoteside) (Proxy.singleProxySelector remoteside)
(Proxy.closeRemoteSide remoteside) (Proxy.closeRemoteSide remoteside)
concurrencyconfig concurrencyconfig
@ -576,7 +590,8 @@ openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid conc
(proxyselector, closenodes) <- (proxyselector, closenodes) <-
clusterProxySelector clusteruuid clientmaxversion bypass clusterProxySelector clusteruuid clientmaxversion bypass
concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency
liftIO $ openedProxyConnection (fromClusterUUID clusteruuid) openedProxyConnection (fromClusterUUID clusteruuid)
("cluster " ++ fromUUID (fromClusterUUID clusteruuid))
proxyselector closenodes concurrencyconfig proxyselector closenodes concurrencyconfig
type ProxyConnectionPool = (Integer, M.Map ProxyConnectionPoolKey [ProxyConnection]) type ProxyConnectionPool = (Integer, M.Map ProxyConnectionPoolKey [ProxyConnection])

View file

@ -9,14 +9,9 @@
module P2P.Http.Url where module P2P.Http.Url where
import Types.UUID
import Utility.FileSystemEncoding
import Utility.PartialPrelude
import Data.List import Data.List
import Network.URI import Network.URI
import System.FilePath.Posix as P import System.FilePath.Posix as P
import qualified Data.UUID as UUID
#ifdef WITH_SERVANT #ifdef WITH_SERVANT
import Servant.Client (BaseUrl(..), Scheme(..)) import Servant.Client (BaseUrl(..), Scheme(..))
import Text.Read import Text.Read

View file

@ -227,7 +227,7 @@ gen r u rc gc rs
, localpath = localpathCalc r , localpath = localpathCalc r
, getRepo = getRepoFromState st , getRepo = getRepoFromState st
, gitconfig = gc , gitconfig = gc
, readonly = Git.repoIsHttp r , readonly = Git.repoIsHttp r && not (isP2PHttp' gc)
, appendonly = False , appendonly = False
, untrustworthy = False , untrustworthy = False
, availability = repoAvail r , availability = repoAvail r

View file

@ -3,8 +3,8 @@ as shown in the tip [[setup_a_public_repository_on_a_web_site]].
That's limited to basic read-only repository access though. Git That's limited to basic read-only repository access though. Git
has [smart HTTP](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP) has [smart HTTP](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP)
that can be used to allow pushes over http. And git-annex has an that can be used to allow pushes over http. And git-annex
equivilant, the [[git annex-p2phttp command|/git-annex-p2phttp]]. has the [[git annex-p2phttp command|/git-annex-p2phttp]].
As well as allowing write access to authorized users over http, As well as allowing write access to authorized users over http,
`git-annex p2phttp` also allows accessing [[clusters]], and other proxied `git-annex p2phttp` also allows accessing [[clusters]], and other proxied
@ -37,3 +37,7 @@ In the git config file of the repository, set `annex.url` to the "annex+http"
(or "annex+https") url. The first time it uses a http remote, git-annex (or "annex+https") url. The first time it uses a http remote, git-annex
downloads its git config file, and sets `remote.name.annexUrl` to the value downloads its git config file, and sets `remote.name.annexUrl` to the value
of the remote's `annex.url`. of the remote's `annex.url`.
Support for this first appeared in git-annex version 10.20240731. Users of
older git-annex won't be able to use the smart http server, but can still
clone the repository from the dumb http server.

View file

@ -28,13 +28,6 @@ Planned schedule of work:
## work notes ## work notes
* An interrupted PUT to cluster that has a node that is a special remote
over http leaves open the connection to the cluster, so the next request
opens another one.
So does an interrupted PUT directly to the proxied ;
special remote over http.
* When part of a file has been sent to a cluster via the http server, * When part of a file has been sent to a cluster via the http server,
the transfer interrupted, and another node is added to the cluster, the transfer interrupted, and another node is added to the cluster,
and the transfer of the file performed again, there is a failure and the transfer of the file performed again, there is a failure
@ -52,7 +45,10 @@ Planned schedule of work:
special remote. This violates a usual invariant that any data being special remote. This violates a usual invariant that any data being
received into a repository gets verified in passing. Although on the received into a repository gets verified in passing. Although on the
other hand, when sending data to a special remote normally, there is also other hand, when sending data to a special remote normally, there is also
no verification. no verification. On the third hand, a p2p http proxy (or for that matter
a ssh server) may have users who are allowed to store objects, but are
not really trusted, and if they can upload garbage without verification,
that could be bad.
## items deferred until later for p2p protocol over http ## items deferred until later for p2p protocol over http
@ -60,7 +56,15 @@ Planned schedule of work:
time (not as proxied remotes), so that eg, every git-annex repository time (not as proxied remotes), so that eg, every git-annex repository
on a server can be served on the same port. on a server can be served on the same port.
* Support proxying to git remotes that use annex+http urls. * Support proxying to git remotes that use annex+http urls. This needs a
translation from P2P protocol to servant-client to P2P protocol.
* Should be possible to use a git-remote-annex annex::$uuid url as
remote.foo.url with remote.foo.annexUrl using annex+http, and so
not need a separate web server to serve the git repository. Doesn't work
currently because git-remote-annex urls only support special remotes.
It would need a new form of git-remote-annex url, eg:
annex::$uuid?annex+http://example.com/git-annex/
* `git-annex p2phttp` could support systemd socket activation. This would * `git-annex p2phttp` could support systemd socket activation. This would
allow making a systemd unit that listens on port 80. allow making a systemd unit that listens on port 80.