Merge branch 'httpproto'

This commit is contained in:
Joey Hess 2024-07-29 11:33:39 -04:00
commit db66612b8f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 53 additions and 40 deletions

View file

@ -16,7 +16,6 @@ import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Content
import Annex.Concurrent
import Annex.Tmp
import Logs.Proxy
@ -209,12 +208,7 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
-- because the client will do its own verification.
let vc = Remote.NoVerify
tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
Right v ->
ifM (verifyKeyContentPostRetrieval Remote.RetrievalVerifiableKeysSecure vc v k tmpfile)
( liftIO $ senddata offset tmpfile
, liftIO $ sendmessage $
ERROR "verification of content failed"
)
Right _ -> liftIO $ senddata offset tmpfile
Left err -> liftIO $ propagateerror err
senddata (Offset offset) f = do

View file

@ -1,4 +1,4 @@
git-annex (10.20240702) UNRELEASED; urgency=medium
git-annex (10.20240731) UNRELEASED; urgency=medium
* New HTTP API that is equivilant to the P2P protocol.
* annex+http and annex+https urls can be configured for
@ -13,6 +13,7 @@ git-annex (10.20240702) UNRELEASED; urgency=medium
* Avoid potential data loss in situations where git-annex-shell or
git-annex remotedaemon is killed while locking a key to prevent its
removal.
* When proxying for a special remote, avoid unncessary hashing.
* Added a dependency on clock.
* Propagate --force to git-annex transferrer.

View file

@ -314,9 +314,9 @@ servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf mof
tooshortv <- liftIO newEmptyTMVarIO
content <- liftIO $ S.unSourceT stream (gather validityv tooshortv)
res <- withP2PConnection' apiver st cu su bypass sec auth WriteAction
(\cst -> cst { connectionWaitVar = False }) $ \conn ->
(\cst -> cst { connectionWaitVar = False }) $ \conn -> do
liftIO $ void $ async $ checktooshort conn tooshortv
liftIO (protoaction conn content validitycheck)
`finally` checktooshort conn tooshortv
case res of
Right (Right (Just plusuuids)) -> return $ resultmangle $
PutResultPlus True (map B64UUID plusuuids)
@ -385,8 +385,8 @@ servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf mof
-- The connection can no longer be used when too short a DATA has
-- been written to it.
checktooshort conn tooshortv =
liftIO $ whenM (atomically $ fromMaybe True <$> tryTakeTMVar tooshortv) $
checktooshort conn tooshortv = do
liftIO $ whenM (atomically $ takeTMVar tooshortv) $
closeP2PConnection conn
servePutOffset

View file

@ -220,7 +220,7 @@ withP2PConnections workerpool proxyconnectionpoolsize clusterconcurrency a = do
>>= atomically . putTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv
Left (Right releaseconn) -> do
releaseconn
void $ tryNonAsync releaseconn
servicer myuuid myproxies proxypool reqv relv endv
Left (Left ()) -> return ()
@ -378,12 +378,18 @@ proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool pro
liftIO $ runNetProto proxyfromclientrunst proxyfromclientconn $
P2P.net P2P.receiveMessage
let releaseconn returntopool =
let closebothsides = do
liftIO $ closeConnection proxyfromclientconn
liftIO $ closeConnection clientconn
let releaseconn connstillusable = do
atomically $ void $ tryPutTMVar relv $ do
unless connstillusable
closebothsides
r <- liftIO $ wait asyncworker
liftIO $ closeConnection proxyfromclientconn
liftIO $ closeConnection clientconn
if returntopool
when connstillusable
closebothsides
if connstillusable
then liftIO $ do
now <- getPOSIXTime
evicted <- atomically $ putProxyConnectionPool proxypool proxyconnectionpoolsize connparams $
@ -539,13 +545,20 @@ instance Show ProxyConnection where
openedProxyConnection
:: UUID
-> String
-> Proxy.ProxySelector
-> Annex ()
-> Proxy.ConcurrencyConfig
-> IO ProxyConnection
openedProxyConnection u selector closer concurrency = do
now <- getPOSIXTime
return $ ProxyConnection u selector closer concurrency now
-> Annex ProxyConnection
openedProxyConnection u desc selector closer concurrency = do
now <- liftIO getPOSIXTime
fastDebug "P2P.Http" ("Opened proxy connection to " ++ desc)
return $ ProxyConnection u selector closer' concurrency now
where
closer' = do
fastDebug "P2P.Http" ("Closing proxy connection to " ++ desc)
closer
fastDebug "P2P.Http" ("Closed proxy connection to " ++ desc)
openProxyConnectionToRemote
:: AnnexWorkerPool
@ -557,7 +570,8 @@ openProxyConnectionToRemote workerpool clientmaxversion bypass remote =
inAnnexWorker' workerpool $ do
remoteside <- proxyRemoteSide clientmaxversion bypass remote
concurrencyconfig <- Proxy.noConcurrencyConfig
liftIO $ openedProxyConnection (Remote.uuid remote)
openedProxyConnection (Remote.uuid remote)
("remote " ++ Remote.name remote)
(Proxy.singleProxySelector remoteside)
(Proxy.closeRemoteSide remoteside)
concurrencyconfig
@ -576,7 +590,8 @@ openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid conc
(proxyselector, closenodes) <-
clusterProxySelector clusteruuid clientmaxversion bypass
concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency
liftIO $ openedProxyConnection (fromClusterUUID clusteruuid)
openedProxyConnection (fromClusterUUID clusteruuid)
("cluster " ++ fromUUID (fromClusterUUID clusteruuid))
proxyselector closenodes concurrencyconfig
type ProxyConnectionPool = (Integer, M.Map ProxyConnectionPoolKey [ProxyConnection])

View file

@ -9,14 +9,9 @@
module P2P.Http.Url where
import Types.UUID
import Utility.FileSystemEncoding
import Utility.PartialPrelude
import Data.List
import Network.URI
import System.FilePath.Posix as P
import qualified Data.UUID as UUID
#ifdef WITH_SERVANT
import Servant.Client (BaseUrl(..), Scheme(..))
import Text.Read

View file

@ -227,7 +227,7 @@ gen r u rc gc rs
, localpath = localpathCalc r
, getRepo = getRepoFromState st
, gitconfig = gc
, readonly = Git.repoIsHttp r
, readonly = Git.repoIsHttp r && not (isP2PHttp' gc)
, appendonly = False
, untrustworthy = False
, availability = repoAvail r

View file

@ -3,8 +3,8 @@ as shown in the tip [[setup_a_public_repository_on_a_web_site]].
That's limited to basic read-only repository access though. Git
has [smart HTTP](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP)
that can be used to allow pushes over http. And git-annex has an
equivilant, the [[git annex-p2phttp command|/git-annex-p2phttp]].
that can be used to allow pushes over http. And git-annex
has the [[git annex-p2phttp command|/git-annex-p2phttp]].
As well as allowing write access to authorized users over http,
`git-annex p2phttp` also allows accessing [[clusters]], and other proxied
@ -37,3 +37,7 @@ In the git config file of the repository, set `annex.url` to the "annex+http"
(or "annex+https") url. The first time it uses a http remote, git-annex
downloads its git config file, and sets `remote.name.annexUrl` to the value
of the remote's `annex.url`.
Support for this first appeared in git-annex version 10.20240731. Users of
older git-annex won't be able to use the smart http server, but can still
clone the repository from the dumb http server.

View file

@ -28,13 +28,6 @@ Planned schedule of work:
## work notes
* An interrupted PUT to cluster that has a node that is a special remote
over http leaves open the connection to the cluster, so the next request
opens another one.
So does an interrupted PUT directly to the proxied ;
special remote over http.
* When part of a file has been sent to a cluster via the http server,
the transfer interrupted, and another node is added to the cluster,
and the transfer of the file performed again, there is a failure
@ -52,7 +45,10 @@ Planned schedule of work:
special remote. This violates a usual invariant that any data being
received into a repository gets verified in passing. Although on the
other hand, when sending data to a special remote normally, there is also
no verification.
no verification. On the third hand, a p2p http proxy (or for that matter
a ssh server) may have users who are allowed to store objects, but are
not really trusted, and if they can upload garbage without verification,
that could be bad.
## items deferred until later for p2p protocol over http
@ -60,7 +56,15 @@ Planned schedule of work:
time (not as proxied remotes), so that eg, every git-annex repository
on a server can be served on the same port.
* Support proxying to git remotes that use annex+http urls.
* Support proxying to git remotes that use annex+http urls. This needs a
translation from P2P protocol to servant-client to P2P protocol.
* Should be possible to use a git-remote-annex annex::$uuid url as
remote.foo.url with remote.foo.annexUrl using annex+http, and so
not need a separate web server to serve the git repository. Doesn't work
currently because git-remote-annex urls only support special remotes.
It would need a new form of git-remote-annex url, eg:
annex::$uuid?annex+http://example.com/git-annex/
* `git-annex p2phttp` could support systemd socket activation. This would
allow making a systemd unit that listens on port 80.