diff --git a/Annex/Proxy.hs b/Annex/Proxy.hs index 854ce289e2..16d4fc2cdd 100644 --- a/Annex/Proxy.hs +++ b/Annex/Proxy.hs @@ -16,7 +16,6 @@ import P2P.Proxy import P2P.Protocol import P2P.IO import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection) -import Annex.Content import Annex.Concurrent import Annex.Tmp import Logs.Proxy @@ -209,12 +208,7 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go -- because the client will do its own verification. let vc = Remote.NoVerify tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case - Right v -> - ifM (verifyKeyContentPostRetrieval Remote.RetrievalVerifiableKeysSecure vc v k tmpfile) - ( liftIO $ senddata offset tmpfile - , liftIO $ sendmessage $ - ERROR "verification of content failed" - ) + Right _ -> liftIO $ senddata offset tmpfile Left err -> liftIO $ propagateerror err senddata (Offset offset) f = do diff --git a/CHANGELOG b/CHANGELOG index db33d0ba37..36397dbfa9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -git-annex (10.20240702) UNRELEASED; urgency=medium +git-annex (10.20240731) UNRELEASED; urgency=medium * New HTTP API that is equivilant to the P2P protocol. * annex+http and annex+https urls can be configured for @@ -13,6 +13,7 @@ git-annex (10.20240702) UNRELEASED; urgency=medium * Avoid potential data loss in situations where git-annex-shell or git-annex remotedaemon is killed while locking a key to prevent its removal. + * When proxying for a special remote, avoid unncessary hashing. * Added a dependency on clock. * Propagate --force to git-annex transferrer. diff --git a/P2P/Http/Server.hs b/P2P/Http/Server.hs index 044dd4f950..8bc0284db4 100644 --- a/P2P/Http/Server.hs +++ b/P2P/Http/Server.hs @@ -314,9 +314,9 @@ servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf mof tooshortv <- liftIO newEmptyTMVarIO content <- liftIO $ S.unSourceT stream (gather validityv tooshortv) res <- withP2PConnection' apiver st cu su bypass sec auth WriteAction - (\cst -> cst { connectionWaitVar = False }) $ \conn -> + (\cst -> cst { connectionWaitVar = False }) $ \conn -> do + liftIO $ void $ async $ checktooshort conn tooshortv liftIO (protoaction conn content validitycheck) - `finally` checktooshort conn tooshortv case res of Right (Right (Just plusuuids)) -> return $ resultmangle $ PutResultPlus True (map B64UUID plusuuids) @@ -385,8 +385,8 @@ servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf mof -- The connection can no longer be used when too short a DATA has -- been written to it. - checktooshort conn tooshortv = - liftIO $ whenM (atomically $ fromMaybe True <$> tryTakeTMVar tooshortv) $ + checktooshort conn tooshortv = do + liftIO $ whenM (atomically $ takeTMVar tooshortv) $ closeP2PConnection conn servePutOffset diff --git a/P2P/Http/State.hs b/P2P/Http/State.hs index 36f6d6ce69..7e43df81de 100644 --- a/P2P/Http/State.hs +++ b/P2P/Http/State.hs @@ -220,7 +220,7 @@ withP2PConnections workerpool proxyconnectionpoolsize clusterconcurrency a = do >>= atomically . putTMVar respvar servicer myuuid myproxies proxypool reqv relv endv Left (Right releaseconn) -> do - releaseconn + void $ tryNonAsync releaseconn servicer myuuid myproxies proxypool reqv relv endv Left (Left ()) -> return () @@ -378,12 +378,18 @@ proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool pro liftIO $ runNetProto proxyfromclientrunst proxyfromclientconn $ P2P.net P2P.receiveMessage - let releaseconn returntopool = + let closebothsides = do + liftIO $ closeConnection proxyfromclientconn + liftIO $ closeConnection clientconn + + let releaseconn connstillusable = do atomically $ void $ tryPutTMVar relv $ do + unless connstillusable + closebothsides r <- liftIO $ wait asyncworker - liftIO $ closeConnection proxyfromclientconn - liftIO $ closeConnection clientconn - if returntopool + when connstillusable + closebothsides + if connstillusable then liftIO $ do now <- getPOSIXTime evicted <- atomically $ putProxyConnectionPool proxypool proxyconnectionpoolsize connparams $ @@ -539,13 +545,20 @@ instance Show ProxyConnection where openedProxyConnection :: UUID + -> String -> Proxy.ProxySelector -> Annex () -> Proxy.ConcurrencyConfig - -> IO ProxyConnection -openedProxyConnection u selector closer concurrency = do - now <- getPOSIXTime - return $ ProxyConnection u selector closer concurrency now + -> Annex ProxyConnection +openedProxyConnection u desc selector closer concurrency = do + now <- liftIO getPOSIXTime + fastDebug "P2P.Http" ("Opened proxy connection to " ++ desc) + return $ ProxyConnection u selector closer' concurrency now + where + closer' = do + fastDebug "P2P.Http" ("Closing proxy connection to " ++ desc) + closer + fastDebug "P2P.Http" ("Closed proxy connection to " ++ desc) openProxyConnectionToRemote :: AnnexWorkerPool @@ -557,7 +570,8 @@ openProxyConnectionToRemote workerpool clientmaxversion bypass remote = inAnnexWorker' workerpool $ do remoteside <- proxyRemoteSide clientmaxversion bypass remote concurrencyconfig <- Proxy.noConcurrencyConfig - liftIO $ openedProxyConnection (Remote.uuid remote) + openedProxyConnection (Remote.uuid remote) + ("remote " ++ Remote.name remote) (Proxy.singleProxySelector remoteside) (Proxy.closeRemoteSide remoteside) concurrencyconfig @@ -576,7 +590,8 @@ openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid conc (proxyselector, closenodes) <- clusterProxySelector clusteruuid clientmaxversion bypass concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency - liftIO $ openedProxyConnection (fromClusterUUID clusteruuid) + openedProxyConnection (fromClusterUUID clusteruuid) + ("cluster " ++ fromUUID (fromClusterUUID clusteruuid)) proxyselector closenodes concurrencyconfig type ProxyConnectionPool = (Integer, M.Map ProxyConnectionPoolKey [ProxyConnection]) diff --git a/P2P/Http/Url.hs b/P2P/Http/Url.hs index b4d0a86264..09a8e56f9a 100644 --- a/P2P/Http/Url.hs +++ b/P2P/Http/Url.hs @@ -9,14 +9,9 @@ module P2P.Http.Url where -import Types.UUID -import Utility.FileSystemEncoding -import Utility.PartialPrelude - import Data.List import Network.URI import System.FilePath.Posix as P -import qualified Data.UUID as UUID #ifdef WITH_SERVANT import Servant.Client (BaseUrl(..), Scheme(..)) import Text.Read diff --git a/Remote/Git.hs b/Remote/Git.hs index 90fb301c34..f59ef528b6 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -227,7 +227,7 @@ gen r u rc gc rs , localpath = localpathCalc r , getRepo = getRepoFromState st , gitconfig = gc - , readonly = Git.repoIsHttp r + , readonly = Git.repoIsHttp r && not (isP2PHttp' gc) , appendonly = False , untrustworthy = False , availability = repoAvail r diff --git a/doc/tips/smart_http_server.mdwn b/doc/tips/smart_http_server.mdwn index d7a56d0596..54264e3499 100644 --- a/doc/tips/smart_http_server.mdwn +++ b/doc/tips/smart_http_server.mdwn @@ -3,8 +3,8 @@ as shown in the tip [[setup_a_public_repository_on_a_web_site]]. That's limited to basic read-only repository access though. Git has [smart HTTP](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP) -that can be used to allow pushes over http. And git-annex has an -equivilant, the [[git annex-p2phttp command|/git-annex-p2phttp]]. +that can be used to allow pushes over http. And git-annex +has the [[git annex-p2phttp command|/git-annex-p2phttp]]. As well as allowing write access to authorized users over http, `git-annex p2phttp` also allows accessing [[clusters]], and other proxied @@ -37,3 +37,7 @@ In the git config file of the repository, set `annex.url` to the "annex+http" (or "annex+https") url. The first time it uses a http remote, git-annex downloads its git config file, and sets `remote.name.annexUrl` to the value of the remote's `annex.url`. + +Support for this first appeared in git-annex version 10.20240731. Users of +older git-annex won't be able to use the smart http server, but can still +clone the repository from the dumb http server. diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index fcec9843fe..7d2af32c0f 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -28,13 +28,6 @@ Planned schedule of work: ## work notes -* An interrupted PUT to cluster that has a node that is a special remote - over http leaves open the connection to the cluster, so the next request - opens another one. - - So does an interrupted PUT directly to the proxied ; - special remote over http. - * When part of a file has been sent to a cluster via the http server, the transfer interrupted, and another node is added to the cluster, and the transfer of the file performed again, there is a failure @@ -52,7 +45,10 @@ Planned schedule of work: special remote. This violates a usual invariant that any data being received into a repository gets verified in passing. Although on the other hand, when sending data to a special remote normally, there is also - no verification. + no verification. On the third hand, a p2p http proxy (or for that matter + a ssh server) may have users who are allowed to store objects, but are + not really trusted, and if they can upload garbage without verification, + that could be bad. ## items deferred until later for p2p protocol over http @@ -60,7 +56,15 @@ Planned schedule of work: time (not as proxied remotes), so that eg, every git-annex repository on a server can be served on the same port. -* Support proxying to git remotes that use annex+http urls. +* Support proxying to git remotes that use annex+http urls. This needs a + translation from P2P protocol to servant-client to P2P protocol. + +* Should be possible to use a git-remote-annex annex::$uuid url as + remote.foo.url with remote.foo.annexUrl using annex+http, and so + not need a separate web server to serve the git repository. Doesn't work + currently because git-remote-annex urls only support special remotes. + It would need a new form of git-remote-annex url, eg: + annex::$uuid?annex+http://example.com/git-annex/ * `git-annex p2phttp` could support systemd socket activation. This would allow making a systemd unit that listens on port 80.