2016-12-06 16:19:47 +00:00
|
|
|
{- git remotes using the git-annex P2P protocol
|
|
|
|
-
|
2018-03-08 20:11:00 +00:00
|
|
|
- Copyright 2016-2018 Joey Hess <id@joeyh.name>
|
2016-12-06 16:19:47 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2016-12-06 16:19:47 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Remote.P2P (
|
|
|
|
remote,
|
|
|
|
chainGen
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Annex.Common
|
2016-12-06 19:08:00 +00:00
|
|
|
import qualified Annex
|
|
|
|
import qualified P2P.Protocol as P2P
|
2016-12-06 16:19:47 +00:00
|
|
|
import P2P.Address
|
2016-12-06 19:08:00 +00:00
|
|
|
import P2P.Annex
|
2016-12-06 19:40:31 +00:00
|
|
|
import P2P.IO
|
2016-12-06 19:49:39 +00:00
|
|
|
import P2P.Auth
|
2016-12-06 16:19:47 +00:00
|
|
|
import Types.Remote
|
|
|
|
import qualified Git
|
2016-12-06 19:49:39 +00:00
|
|
|
import Annex.UUID
|
2016-12-06 16:19:47 +00:00
|
|
|
import Config
|
|
|
|
import Config.Cost
|
|
|
|
import Remote.Helper.Git
|
2019-02-20 19:55:01 +00:00
|
|
|
import Remote.Helper.ExportImport
|
2018-03-08 20:11:00 +00:00
|
|
|
import Remote.Helper.P2P
|
2016-12-06 19:49:39 +00:00
|
|
|
import Utility.AuthToken
|
fix encryption of content to gcrypt and git-lfs
Fix serious regression in gcrypt and encrypted git-lfs remotes.
Since version 7.20200202.7, git-annex incorrectly stored content
on those remotes without encrypting it.
Problem was, Remote.Git enumerates all git remotes, including git-lfs
and gcrypt. It then dispatches to those. So, Remote.List used the
RemoteConfigParser from Remote.Git, instead of from git-lfs or gcrypt,
and that parser does not know about encryption fields, so did not
include them in the ParsedRemoteConfig. (Also didn't include other
fields specific to those remotes, perhaps chunking etc also didn't
get through.)
To fix, had to move RemoteConfig parsing down into the generate methods
of each remote, rather than doing it in Remote.List.
And a consequence of that was that ParsedRemoteConfig had to change to
include the RemoteConfig that got parsed, so that testremote can
generate a new remote based on an existing remote.
(I would have rather fixed this just inside Remote.Git, but that was not
practical, at least not w/o re-doing work that Remote.List already did.
Big ugly mostly mechanical patch seemed preferable to making git-annex
slower.)
2020-02-26 21:20:56 +00:00
|
|
|
import Annex.SpecialRemote.Config
|
2016-12-06 19:08:00 +00:00
|
|
|
|
|
|
|
import Control.Concurrent.STM
|
2016-12-06 16:19:47 +00:00
|
|
|
|
|
|
|
remote :: RemoteType
|
2017-09-07 17:45:31 +00:00
|
|
|
remote = RemoteType
|
|
|
|
{ typename = "p2p"
|
2016-12-06 16:19:47 +00:00
|
|
|
-- Remote.Git takes care of enumerating P2P remotes,
|
|
|
|
-- and will call chainGen on them.
|
2017-09-07 17:45:31 +00:00
|
|
|
, enumerate = const (return [])
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
, generate = \_ _ _ _ _ -> return Nothing
|
2020-01-14 17:18:15 +00:00
|
|
|
, configParser = mkRemoteConfigParser []
|
2017-09-07 17:45:31 +00:00
|
|
|
, setup = error "P2P remotes are set up using git-annex p2p"
|
|
|
|
, exportSupported = exportUnsupported
|
2019-02-20 19:55:01 +00:00
|
|
|
, importSupported = importUnsupported
|
add thirdPartyPopulated interface
This is to support, eg a borg repo as a special remote, which is
populated not by running git-annex commands, but by using borg. Then
git-annex sync lists the content of the remote, learns which files are
annex objects, and treats those as present in the remote.
So, most of the import machinery is reused, to a new purpose. While
normally importtree maintains a remote tracking branch, this does not,
because the files stored in the remote are annex object files, not
user-visible filenames. But, internally, a git tree is still generated,
of the files on the remote that are annex objects. This tree is used
by retrieveExportWithContentIdentifier, etc. As with other import/export
remotes, that the tree is recorded in the export log, and gets grafted
into the git-annex branch.
importKey changed to be able to return Nothing, to indicate when an
ImportLocation is not an annex object and so should be skipped from
being included in the tree.
It did not seem to make sense to have git-annex import do this, since
from the user's perspective, it's not like other imports. So only
git-annex sync does it.
Note that, git-annex sync does not yet download objects from such
remotes that are preferred content. importKeys is run with
content downloading disabled, to avoid getting the content of all
objects. Perhaps what's needed is for seekSyncContent to be run with these
remotes, but I don't know if it will just work (in particular, it needs
to avoid trying to transfer objects to them), so I skipped that for now.
(Untested and unused as of yet.)
This commit was sponsored by Jochen Bartl on Patreon.
2020-12-18 18:52:57 +00:00
|
|
|
, thirdPartyPopulated = False
|
2017-09-07 17:45:31 +00:00
|
|
|
}
|
2016-12-06 16:19:47 +00:00
|
|
|
|
fix encryption of content to gcrypt and git-lfs
Fix serious regression in gcrypt and encrypted git-lfs remotes.
Since version 7.20200202.7, git-annex incorrectly stored content
on those remotes without encrypting it.
Problem was, Remote.Git enumerates all git remotes, including git-lfs
and gcrypt. It then dispatches to those. So, Remote.List used the
RemoteConfigParser from Remote.Git, instead of from git-lfs or gcrypt,
and that parser does not know about encryption fields, so did not
include them in the ParsedRemoteConfig. (Also didn't include other
fields specific to those remotes, perhaps chunking etc also didn't
get through.)
To fix, had to move RemoteConfig parsing down into the generate methods
of each remote, rather than doing it in Remote.List.
And a consequence of that was that ParsedRemoteConfig had to change to
include the RemoteConfig that got parsed, so that testremote can
generate a new remote based on an existing remote.
(I would have rather fixed this just inside Remote.Git, but that was not
practical, at least not w/o re-doing work that Remote.List already did.
Big ugly mostly mechanical patch seemed preferable to making git-annex
slower.)
2020-02-26 21:20:56 +00:00
|
|
|
chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
|
|
|
|
chainGen addr r u rc gc rs = do
|
|
|
|
c <- parsedRemoteConfig remote rc
|
2016-12-06 19:08:00 +00:00
|
|
|
connpool <- mkConnectionPool
|
2017-01-06 19:23:30 +00:00
|
|
|
cst <- remoteCost gc veryExpensiveRemoteCost
|
2018-03-08 20:11:00 +00:00
|
|
|
let protorunner = runProto u addr connpool
|
|
|
|
let withconn = withConnection u addr connpool
|
2016-12-06 16:19:47 +00:00
|
|
|
let this = Remote
|
|
|
|
{ uuid = u
|
|
|
|
, cost = cst
|
|
|
|
, name = Git.repoDescribe r
|
bwlimit
Added annex.bwlimit and remote.name.annex-bwlimit config that works for git
remotes and many but not all special remotes.
This nearly works, at least for a git remote on the same disk. With it set
to 100kb/1s, the meter displays an actual bandwidth of 128 kb/s, with
occasional spikes to 160 kb/s. So it needs to delay just a bit longer...
I'm unsure why.
However, at the beginning a lot of data flows before it determines the
right bandwidth limit. A granularity of less than 1s would probably improve
that.
And, I don't know yet if it makes sense to have it be 100ks/1s rather than
100kb/s. Is there a situation where the user would want a larger
granularity? Does granulatity need to be configurable at all? I only used that
format for the config really in order to reuse an existing parser.
This can't support for external special remotes, or for ones that
themselves shell out to an external command. (Well, it could, but it
would involve pausing and resuming the child process tree, which seems
very hard to implement and very strange besides.) There could also be some
built-in special remotes that it still doesn't work for, due to them not
having a progress meter whose displays blocks the bandwidth using thread.
But I don't think there are actually any that run a separate thread for
downloads than the thread that displays the progress meter.
Sponsored-by: Graham Spencer on Patreon
2021-09-21 20:58:02 +00:00
|
|
|
, storeKey = store gc (const protorunner)
|
|
|
|
, retrieveKeyFile = retrieve gc (const protorunner)
|
2020-05-13 21:05:56 +00:00
|
|
|
, retrieveKeyFileCheap = Nothing
|
2018-06-21 15:35:27 +00:00
|
|
|
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
2018-03-08 20:11:00 +00:00
|
|
|
, removeKey = remove protorunner
|
|
|
|
, lockContent = Just $ lock withconn runProtoConn u
|
|
|
|
, checkPresent = checkpresent protorunner
|
2016-12-06 16:19:47 +00:00
|
|
|
, checkPresentCheap = False
|
2017-09-01 17:02:07 +00:00
|
|
|
, exportActions = exportUnsupported
|
2019-02-20 19:55:01 +00:00
|
|
|
, importActions = importUnsupported
|
2016-12-06 16:19:47 +00:00
|
|
|
, whereisKey = Nothing
|
|
|
|
, remoteFsck = Nothing
|
|
|
|
, repairRepo = Nothing
|
|
|
|
, config = c
|
|
|
|
, localpath = Nothing
|
2018-06-04 18:31:55 +00:00
|
|
|
, getRepo = return r
|
removal of the rest of remoteGitConfig
In keyUrls, the GitConfig is used only by annexLocations
to support configured Differences. Since such configurations affect all
clones of a repository, the local repo's GitConfig must have the same
information as the remote's GitConfig would have. So, used getGitConfig
to get the local GitConfig, which is cached and so available cheaply.
That actually fixed a bug noone had ever noticed: keyUrls is
used for remotes accessed over http. The full git config of such a
remote is normally not available, so the remoteGitConfig that keyUrls
used would not have the necessary information in it.
In copyFromRemoteCheap', it uses gitAnnexLocation,
which does need the GitConfig of the remote repo itself in order to
check if it's crippled, supports symlinks, etc. So, made the
State include that GitConfig, cached. The use of gitAnnexLocation is
within a (not $ Git.repoIsUrl repo) guard, so it's local, and so
its git config will always be read and available.
(Note that gitAnnexLocation in turn calls annexLocations, so the
Differences config it uses in this case comes from the remote repo's
GitConfig and not from the local repo's GitConfig. As explained above
this is ok since they must have the same value.)
Not very happy with this mess of different GitConfigs not type-safe and
some read only sometimes etc. Very hairy. Think I got it this change
right. Test suite passes..
This commit was sponsored by Ethan Aubin.
2018-06-05 18:23:34 +00:00
|
|
|
, gitconfig = gc
|
2016-12-06 16:19:47 +00:00
|
|
|
, readonly = False
|
2018-08-30 15:12:18 +00:00
|
|
|
, appendonly = False
|
2020-12-28 19:08:53 +00:00
|
|
|
, untrustworthy = False
|
2016-12-06 16:19:47 +00:00
|
|
|
, availability = GloballyAvailable
|
|
|
|
, remotetype = remote
|
|
|
|
, mkUnavailable = return Nothing
|
|
|
|
, getInfo = gitRepoInfo this
|
|
|
|
, claimUrl = Nothing
|
|
|
|
, checkUrl = Nothing
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
, remoteStateHandle = rs
|
2016-12-06 16:19:47 +00:00
|
|
|
}
|
2016-12-06 19:08:00 +00:00
|
|
|
return (Just this)
|
|
|
|
|
2018-03-08 18:02:18 +00:00
|
|
|
-- | A connection to the peer, which can be closed.
|
2018-03-12 17:43:19 +00:00
|
|
|
type Connection = ClosableConnection (RunState, P2PConnection)
|
2016-12-06 19:08:00 +00:00
|
|
|
|
|
|
|
type ConnectionPool = TVar [Connection]
|
2016-12-06 16:19:47 +00:00
|
|
|
|
2016-12-06 19:08:00 +00:00
|
|
|
mkConnectionPool :: Annex ConnectionPool
|
|
|
|
mkConnectionPool = liftIO $ newTVarIO []
|
2016-12-06 16:19:47 +00:00
|
|
|
|
2016-12-06 19:08:00 +00:00
|
|
|
-- Runs the Proto action.
|
2016-12-07 16:39:28 +00:00
|
|
|
runProto :: UUID -> P2PAddress -> ConnectionPool -> P2P.Proto a -> Annex (Maybe a)
|
2018-03-08 20:11:00 +00:00
|
|
|
runProto u addr connpool a = withConnection u addr connpool (runProtoConn a)
|
2016-12-06 16:19:47 +00:00
|
|
|
|
2018-03-08 20:11:00 +00:00
|
|
|
runProtoConn :: P2P.Proto a -> Connection -> Annex (Connection, Maybe a)
|
|
|
|
runProtoConn _ ClosedConnection = return (ClosedConnection, Nothing)
|
2018-03-12 17:43:19 +00:00
|
|
|
runProtoConn a c@(OpenConnection (runst, conn)) = do
|
|
|
|
v <- runFullProto runst conn a
|
2016-12-06 19:08:00 +00:00
|
|
|
-- When runFullProto fails, the connection is no longer usable,
|
|
|
|
-- so close it.
|
2016-12-08 19:47:49 +00:00
|
|
|
case v of
|
|
|
|
Left e -> do
|
2018-09-25 20:49:59 +00:00
|
|
|
warning $ "Lost connection to peer (" ++ describeProtoFailure e ++ ")"
|
2016-12-06 19:49:39 +00:00
|
|
|
liftIO $ closeConnection conn
|
2016-12-08 19:47:49 +00:00
|
|
|
return (ClosedConnection, Nothing)
|
2018-03-12 17:43:19 +00:00
|
|
|
Right r -> return (c, Just r)
|
2016-12-06 16:19:47 +00:00
|
|
|
|
2016-12-06 19:08:00 +00:00
|
|
|
-- Uses an open connection if one is available in the ConnectionPool;
|
|
|
|
-- otherwise opens a new connection.
|
|
|
|
--
|
|
|
|
-- Once the action is done, the connection is added back to the
|
|
|
|
-- ConnectionPool, unless it's no longer open.
|
2016-12-07 16:39:28 +00:00
|
|
|
withConnection :: UUID -> P2PAddress -> ConnectionPool -> (Connection -> Annex (Connection, a)) -> Annex a
|
|
|
|
withConnection u addr connpool a = bracketOnError get cache go
|
2016-12-06 19:08:00 +00:00
|
|
|
where
|
|
|
|
get = do
|
|
|
|
mc <- liftIO $ atomically $ do
|
|
|
|
l <- readTVar connpool
|
|
|
|
case l of
|
|
|
|
[] -> do
|
|
|
|
writeTVar connpool []
|
|
|
|
return Nothing
|
|
|
|
(c:cs) -> do
|
|
|
|
writeTVar connpool cs
|
|
|
|
return (Just c)
|
2016-12-07 16:39:28 +00:00
|
|
|
maybe (openConnection u addr) return mc
|
2016-12-06 19:08:00 +00:00
|
|
|
|
|
|
|
cache ClosedConnection = return ()
|
|
|
|
cache conn = liftIO $ atomically $ modifyTVar' connpool (conn:)
|
2016-12-06 16:19:47 +00:00
|
|
|
|
2016-12-06 19:08:00 +00:00
|
|
|
go conn = do
|
|
|
|
(conn', r) <- a conn
|
|
|
|
cache conn'
|
|
|
|
return r
|
2016-12-06 16:19:47 +00:00
|
|
|
|
2016-12-07 16:39:28 +00:00
|
|
|
openConnection :: UUID -> P2PAddress -> Annex Connection
|
|
|
|
openConnection u addr = do
|
2016-12-06 19:40:31 +00:00
|
|
|
g <- Annex.gitRepo
|
|
|
|
v <- liftIO $ tryNonAsync $ connectPeer g addr
|
2016-12-06 19:08:00 +00:00
|
|
|
case v of
|
2016-12-06 19:49:39 +00:00
|
|
|
Right conn -> do
|
|
|
|
myuuid <- getUUID
|
|
|
|
authtoken <- fromMaybe nullAuthToken
|
|
|
|
<$> loadP2PRemoteAuthToken addr
|
2018-03-12 17:43:19 +00:00
|
|
|
let proto = P2P.auth myuuid authtoken $
|
|
|
|
-- Before 6.20180312, the protocol server
|
|
|
|
-- had a bug that made negotiating the
|
|
|
|
-- protocol version terminate the
|
|
|
|
-- connection. So, this must stay disabled
|
|
|
|
-- until the old version is not in use
|
|
|
|
-- anywhere.
|
|
|
|
--P2P.negotiateProtocolVersion P2P.maxProtocolVersion
|
|
|
|
return ()
|
|
|
|
runst <- liftIO $ mkRunState Client
|
2018-03-12 19:19:40 +00:00
|
|
|
res <- liftIO $ runNetProto runst conn proto
|
2016-12-06 19:49:39 +00:00
|
|
|
case res of
|
2016-12-08 19:47:49 +00:00
|
|
|
Right (Just theiruuid)
|
2018-03-12 17:43:19 +00:00
|
|
|
| u == theiruuid -> return (OpenConnection (runst, conn))
|
2016-12-07 16:39:28 +00:00
|
|
|
| otherwise -> do
|
|
|
|
liftIO $ closeConnection conn
|
|
|
|
warning "Remote peer uuid seems to have changed."
|
|
|
|
return ClosedConnection
|
2016-12-08 19:47:49 +00:00
|
|
|
Right Nothing -> do
|
2016-12-07 16:39:28 +00:00
|
|
|
warning "Unable to authenticate with peer."
|
2016-12-08 19:47:49 +00:00
|
|
|
liftIO $ closeConnection conn
|
|
|
|
return ClosedConnection
|
|
|
|
Left e -> do
|
2018-09-25 20:49:59 +00:00
|
|
|
warning $ "Problem communicating with peer. (" ++ describeProtoFailure e ++ ")"
|
2016-12-08 19:47:49 +00:00
|
|
|
liftIO $ closeConnection conn
|
2016-12-06 19:49:39 +00:00
|
|
|
return ClosedConnection
|
2016-12-08 19:47:49 +00:00
|
|
|
Left e -> do
|
|
|
|
warning $ "Unable to connect to peer. (" ++ show e ++ ")"
|
2016-12-07 16:39:28 +00:00
|
|
|
return ClosedConnection
|