2018-03-08 20:11:00 +00:00
|
|
|
{- Helpers for remotes using the git-annex P2P protocol.
|
|
|
|
-
|
2024-06-28 17:42:25 +00:00
|
|
|
- Copyright 2016-2024 Joey Hess <id@joeyh.name>
|
2018-03-08 20:11:00 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2018-03-08 20:11:00 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE RankNTypes #-}
|
|
|
|
|
|
|
|
module Remote.Helper.P2P where
|
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import qualified P2P.Protocol as P2P
|
|
|
|
import P2P.IO
|
|
|
|
import Types.Remote
|
|
|
|
import Annex.Content
|
|
|
|
import Messages.Progress
|
|
|
|
import Utility.Metered
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
import Utility.Tuple
|
2018-03-08 20:11:00 +00:00
|
|
|
import Types.NumCopies
|
2021-07-27 18:07:23 +00:00
|
|
|
import Annex.Verify
|
2024-06-18 16:07:01 +00:00
|
|
|
import Logs.Location
|
2024-06-28 17:42:25 +00:00
|
|
|
import Utility.SafeOutput
|
2018-03-08 20:11:00 +00:00
|
|
|
|
|
|
|
import Control.Concurrent
|
|
|
|
|
|
|
|
-- Runs a Proto action using a connection it sets up.
|
|
|
|
type ProtoRunner a = P2P.Proto a -> Annex (Maybe a)
|
|
|
|
|
|
|
|
-- Runs a Proto action using a ClosableConnection.
|
|
|
|
type ProtoConnRunner c = forall a. P2P.Proto a -> ClosableConnection c -> Annex (ClosableConnection c, Maybe a)
|
|
|
|
|
|
|
|
-- Runs an Annex action with a connection from the pool, adding it back to
|
|
|
|
-- the pool when done.
|
|
|
|
type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) -> Annex a
|
|
|
|
|
2024-06-23 12:21:11 +00:00
|
|
|
store :: UUID -> RemoteGitConfig -> ProtoRunner (Maybe [UUID]) -> Key -> AssociatedFile -> MeterUpdate -> Annex ()
|
|
|
|
store remoteuuid gc runner k af p = do
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
let sizer = KeySizer k (fmap (toRawFilePath . fst3) <$> prepSendAnnex k)
|
2024-01-19 19:14:26 +00:00
|
|
|
let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc
|
bwlimit
Added annex.bwlimit and remote.name.annex-bwlimit config that works for git
remotes and many but not all special remotes.
This nearly works, at least for a git remote on the same disk. With it set
to 100kb/1s, the meter displays an actual bandwidth of 128 kb/s, with
occasional spikes to 160 kb/s. So it needs to delay just a bit longer...
I'm unsure why.
However, at the beginning a lot of data flows before it determines the
right bandwidth limit. A granularity of less than 1s would probably improve
that.
And, I don't know yet if it makes sense to have it be 100ks/1s rather than
100kb/s. Is there a situation where the user would want a larger
granularity? Does granulatity need to be configurable at all? I only used that
format for the config really in order to reuse an existing parser.
This can't support for external special remotes, or for ones that
themselves shell out to an external command. (Well, it could, but it
would involve pausing and resuming the child process tree, which seems
very hard to implement and very strange besides.) There could also be some
built-in special remotes that it still doesn't work for, due to them not
having a progress meter whose displays blocks the bandwidth using thread.
But I don't think there are actually any that run a separate thread for
downloads than the thread that displays the progress meter.
Sponsored-by: Graham Spencer on Patreon
2021-09-21 20:58:02 +00:00
|
|
|
metered (Just p) sizer bwlimit $ \_ p' ->
|
remove git-annex-shell compat code
* Removed support for accessing git remotes that use versions of
git-annex older than 6.20180312.
* git-annex-shell: Removed several commands that were only needed to
support git-annex versions older than 6.20180312.
(lockcontent, recvkey, sendkey, transferinfo, commit)
The P2P protocol was added in that version, and used ever since, so
this code was only needed for interop with older versions.
"git-annex-shell commit" is used by newer git-annex versions, though
unnecessarily so, because the p2pstdio command makes a single commit at
shutdown. Luckily, it was run with stderr and stdout sent to /dev/null,
and non-zero exit status or other exceptions are caught and ignored. So,
that was able to be removed from git-annex-shell too.
git-annex-shell inannex, recvkey, sendkey, and dropkey are still used by
gcrypt special remotes accessed over ssh, so those had to be kept.
It would probably be possible to convert that to using the P2P protocol,
but it would be another multi-year transition.
Some git-annex-shell fields were able to be removed. I hoped to remove
all of them, and the very concept of them, but unfortunately autoinit
is used by git-annex sync, and gcrypt uses remoteuuid.
The main win here is really in Remote.Git, removing piles of hairy fallback
code.
Sponsored-by: Luke Shumaker
2021-10-11 19:35:54 +00:00
|
|
|
runner (P2P.put k af p') >>= \case
|
2024-06-18 16:07:01 +00:00
|
|
|
Just (Just fanoutuuids) -> do
|
|
|
|
-- Storing on the remote can cause it
|
|
|
|
-- to be stored on additional UUIDs,
|
|
|
|
-- so record those.
|
|
|
|
forM_ fanoutuuids $ \u ->
|
2024-06-23 12:21:11 +00:00
|
|
|
when (u /= remoteuuid) $
|
|
|
|
logChange k u InfoPresent
|
2024-06-18 16:07:01 +00:00
|
|
|
Just Nothing -> giveup "Transfer failed"
|
2020-05-14 18:08:09 +00:00
|
|
|
Nothing -> remoteUnavail
|
2018-03-08 20:11:00 +00:00
|
|
|
|
remove git-annex-shell compat code
* Removed support for accessing git remotes that use versions of
git-annex older than 6.20180312.
* git-annex-shell: Removed several commands that were only needed to
support git-annex versions older than 6.20180312.
(lockcontent, recvkey, sendkey, transferinfo, commit)
The P2P protocol was added in that version, and used ever since, so
this code was only needed for interop with older versions.
"git-annex-shell commit" is used by newer git-annex versions, though
unnecessarily so, because the p2pstdio command makes a single commit at
shutdown. Luckily, it was run with stderr and stdout sent to /dev/null,
and non-zero exit status or other exceptions are caught and ignored. So,
that was able to be removed from git-annex-shell too.
git-annex-shell inannex, recvkey, sendkey, and dropkey are still used by
gcrypt special remotes accessed over ssh, so those had to be kept.
It would probably be possible to convert that to using the P2P protocol,
but it would be another multi-year transition.
Some git-annex-shell fields were able to be removed. I hoped to remove
all of them, and the very concept of them, but unfortunately autoinit
is used by git-annex sync, and gcrypt uses remoteuuid.
The main win here is really in Remote.Git, removing piles of hairy fallback
code.
Sponsored-by: Luke Shumaker
2021-10-11 19:35:54 +00:00
|
|
|
retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
bwlimit
Added annex.bwlimit and remote.name.annex-bwlimit config that works for git
remotes and many but not all special remotes.
This nearly works, at least for a git remote on the same disk. With it set
to 100kb/1s, the meter displays an actual bandwidth of 128 kb/s, with
occasional spikes to 160 kb/s. So it needs to delay just a bit longer...
I'm unsure why.
However, at the beginning a lot of data flows before it determines the
right bandwidth limit. A granularity of less than 1s would probably improve
that.
And, I don't know yet if it makes sense to have it be 100ks/1s rather than
100kb/s. Is there a situation where the user would want a larger
granularity? Does granulatity need to be configurable at all? I only used that
format for the config really in order to reuse an existing parser.
This can't support for external special remotes, or for ones that
themselves shell out to an external command. (Well, it could, but it
would involve pausing and resuming the child process tree, which seems
very hard to implement and very strange besides.) There could also be some
built-in special remotes that it still doesn't work for, due to them not
having a progress meter whose displays blocks the bandwidth using thread.
But I don't think there are actually any that run a separate thread for
downloads than the thread that displays the progress meter.
Sponsored-by: Graham Spencer on Patreon
2021-09-21 20:58:02 +00:00
|
|
|
retrieve gc runner k af dest p verifyconfig = do
|
2021-02-09 21:03:27 +00:00
|
|
|
iv <- startVerifyKeyContentIncrementally verifyconfig k
|
2024-01-19 19:14:26 +00:00
|
|
|
let bwlimit = remoteAnnexBwLimitDownload gc <|> remoteAnnexBwLimit gc
|
bwlimit
Added annex.bwlimit and remote.name.annex-bwlimit config that works for git
remotes and many but not all special remotes.
This nearly works, at least for a git remote on the same disk. With it set
to 100kb/1s, the meter displays an actual bandwidth of 128 kb/s, with
occasional spikes to 160 kb/s. So it needs to delay just a bit longer...
I'm unsure why.
However, at the beginning a lot of data flows before it determines the
right bandwidth limit. A granularity of less than 1s would probably improve
that.
And, I don't know yet if it makes sense to have it be 100ks/1s rather than
100kb/s. Is there a situation where the user would want a larger
granularity? Does granulatity need to be configurable at all? I only used that
format for the config really in order to reuse an existing parser.
This can't support for external special remotes, or for ones that
themselves shell out to an external command. (Well, it could, but it
would involve pausing and resuming the child process tree, which seems
very hard to implement and very strange besides.) There could also be some
built-in special remotes that it still doesn't work for, due to them not
having a progress meter whose displays blocks the bandwidth using thread.
But I don't think there are actually any that run a separate thread for
downloads than the thread that displays the progress meter.
Sponsored-by: Graham Spencer on Patreon
2021-09-21 20:58:02 +00:00
|
|
|
metered (Just p) k bwlimit $ \m p' ->
|
remove git-annex-shell compat code
* Removed support for accessing git remotes that use versions of
git-annex older than 6.20180312.
* git-annex-shell: Removed several commands that were only needed to
support git-annex versions older than 6.20180312.
(lockcontent, recvkey, sendkey, transferinfo, commit)
The P2P protocol was added in that version, and used ever since, so
this code was only needed for interop with older versions.
"git-annex-shell commit" is used by newer git-annex versions, though
unnecessarily so, because the p2pstdio command makes a single commit at
shutdown. Luckily, it was run with stderr and stdout sent to /dev/null,
and non-zero exit status or other exceptions are caught and ignored. So,
that was able to be removed from git-annex-shell too.
git-annex-shell inannex, recvkey, sendkey, and dropkey are still used by
gcrypt special remotes accessed over ssh, so those had to be kept.
It would probably be possible to convert that to using the P2P protocol,
but it would be another multi-year transition.
Some git-annex-shell fields were able to be removed. I hoped to remove
all of them, and the very concept of them, but unfortunately autoinit
is used by git-annex sync, and gcrypt uses remoteuuid.
The main win here is really in Remote.Git, removing piles of hairy fallback
code.
Sponsored-by: Luke Shumaker
2021-10-11 19:35:54 +00:00
|
|
|
runner (P2P.get dest k iv af m p') >>= \case
|
2020-05-13 21:05:56 +00:00
|
|
|
Just (True, v) -> return v
|
2021-03-06 21:47:05 +00:00
|
|
|
Just (False, _) -> giveup "Transfer failed"
|
2020-05-14 18:08:09 +00:00
|
|
|
Nothing -> remoteUnavail
|
2018-03-08 20:11:00 +00:00
|
|
|
|
2024-06-23 13:28:18 +00:00
|
|
|
remove :: UUID -> ProtoRunner (Bool, Maybe [UUID]) -> Key -> Annex ()
|
|
|
|
remove remoteuuid runner k = runner (P2P.remove k) >>= \case
|
|
|
|
Just (True, alsoremoveduuids) -> note alsoremoveduuids
|
|
|
|
Just (False, alsoremoveduuids) -> do
|
|
|
|
note alsoremoveduuids
|
|
|
|
giveup "removing content from remote failed"
|
2020-05-14 18:08:09 +00:00
|
|
|
Nothing -> remoteUnavail
|
2024-06-23 13:28:18 +00:00
|
|
|
where
|
|
|
|
-- The remote reports removal from other UUIDs than its own,
|
|
|
|
-- so record those.
|
|
|
|
note alsoremoveduuids =
|
|
|
|
forM_ (fromMaybe [] alsoremoveduuids) $ \u ->
|
|
|
|
when (u /= remoteuuid) $
|
|
|
|
logChange k u InfoMissing
|
2018-03-08 20:11:00 +00:00
|
|
|
|
2024-06-28 17:42:25 +00:00
|
|
|
checkpresent :: ProtoRunner (Either String Bool) -> Key -> Annex Bool
|
|
|
|
checkpresent runner k =
|
|
|
|
runner (P2P.checkPresent k)
|
|
|
|
>>= \case
|
|
|
|
Nothing -> remoteUnavail
|
|
|
|
Just (Right b) -> return b
|
|
|
|
Just (Left err) -> giveup (safeOutput err)
|
2018-03-08 20:11:00 +00:00
|
|
|
|
|
|
|
lock :: WithConn a c -> ProtoConnRunner c -> UUID -> Key -> (VerifiedCopy -> Annex a) -> Annex a
|
|
|
|
lock withconn connrunner u k callback = withconn $ \conn -> do
|
|
|
|
connv <- liftIO $ newMVar conn
|
|
|
|
let runproto d p = do
|
|
|
|
c <- liftIO $ takeMVar connv
|
|
|
|
(c', mr) <- connrunner p c
|
|
|
|
liftIO $ putMVar connv c'
|
|
|
|
return (fromMaybe d mr)
|
|
|
|
r <- P2P.lockContentWhile runproto k go
|
|
|
|
conn' <- liftIO $ takeMVar connv
|
|
|
|
return (conn', r)
|
|
|
|
where
|
|
|
|
go False = giveup "can't lock content"
|
|
|
|
go True = withVerifiedCopy LockedCopy u (return True) callback
|
2020-05-14 18:08:09 +00:00
|
|
|
|
|
|
|
remoteUnavail :: a
|
|
|
|
remoteUnavail = giveup "can't connect to remote"
|