use REMOVE-BEFORE in P2P protocol
Only clusters still need to be fixed to close this todo.
This commit is contained in:
parent
1243af4a18
commit
99b7a0cfe9
5 changed files with 72 additions and 47 deletions
|
@ -23,12 +23,12 @@ safeDropProofExpired :: Annex ()
|
|||
safeDropProofExpired = do
|
||||
showNote "unsafe"
|
||||
showLongNote $ UnquotedString
|
||||
"Dropping took too long, and locks on remotes may have expired."
|
||||
"Dropping took too long, and locks may have expired."
|
||||
|
||||
checkSafeDropProofEndTime :: Maybe SafeDropProof -> IO Bool
|
||||
checkSafeDropProofEndTime p = case safeDropProofEndTime =<< p of
|
||||
Nothing -> return True
|
||||
Just t -> do
|
||||
Just endtime -> do
|
||||
now <- getPOSIXTime
|
||||
return (t < now)
|
||||
return (endtime > now)
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import Annex.Verify
|
|||
|
||||
import Control.Monad.Free
|
||||
import Control.Concurrent.STM
|
||||
import Data.Time.Clock.POSIX
|
||||
import qualified Data.ByteString as S
|
||||
|
||||
-- Full interpreter for Proto, that can receive and send objects.
|
||||
|
@ -156,7 +157,10 @@ runLocal runst runner a = case a of
|
|||
UpdateMeterTotalSize m sz next -> do
|
||||
liftIO $ setMeterTotalSize m sz
|
||||
runner next
|
||||
RunValidityCheck checkaction next -> runner . next =<< checkaction
|
||||
RunValidityCheck checkaction next ->
|
||||
runner . next =<< checkaction
|
||||
GetLocalCurrentTime next ->
|
||||
runner . next =<< liftIO getPOSIXTime
|
||||
where
|
||||
transfer mk k af sd ta = case runst of
|
||||
-- Update transfer logs when serving.
|
||||
|
|
|
@ -42,6 +42,7 @@ import qualified Data.ByteString as B
|
|||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Set as S
|
||||
import Data.Char
|
||||
import Data.Time.Clock.POSIX
|
||||
import Control.Applicative
|
||||
import Prelude
|
||||
|
||||
|
@ -327,6 +328,8 @@ data LocalF c
|
|||
-- not known until the data is being received.
|
||||
| RunValidityCheck (Annex Validity) (Validity -> c)
|
||||
-- ^ Runs a deferred validity check.
|
||||
| GetLocalCurrentTime (POSIXTime -> c)
|
||||
-- ^ Gets the local time.
|
||||
deriving (Functor)
|
||||
|
||||
type Local = Free LocalF
|
||||
|
@ -397,9 +400,49 @@ lockContentWhile runproto key a = bracket setup cleanup a
|
|||
cleanup False = return ()
|
||||
|
||||
remove :: Maybe SafeDropProof -> Key -> Proto (Either String Bool, Maybe [UUID])
|
||||
remove proof key = do
|
||||
net $ sendMessage (REMOVE key)
|
||||
checkSuccessFailurePlus
|
||||
remove proof key =
|
||||
case safeDropProofEndTime =<< proof of
|
||||
Nothing -> removeanytime
|
||||
Just endtime -> do
|
||||
ver <- net getProtocolVersion
|
||||
if ver >= ProtocolVersion 3
|
||||
then removeBefore endtime key
|
||||
-- Peer is too old to support REMOVE-BEFORE
|
||||
else removeanytime
|
||||
where
|
||||
removeanytime = do
|
||||
net $ sendMessage (REMOVE key)
|
||||
checkSuccessFailurePlus
|
||||
|
||||
{- The endtime is the last local time at which the key can be removed.
|
||||
- To tell the remote how long it has to remove the key, get its current
|
||||
- timestamp, and add to it the number of seconds from the current local
|
||||
- time until the endtime.
|
||||
-
|
||||
- Order of retrieving timestamps matters. Getting the local time after the
|
||||
- remote timestamp means that, if there is some delay in getting the
|
||||
- response from the remote, that is reflected in the local time, and so
|
||||
- reduces the allowed time.
|
||||
-}
|
||||
removeBefore :: POSIXTime -> Key -> Proto (Either String Bool, Maybe [UUID])
|
||||
removeBefore endtime key = do
|
||||
net $ sendMessage GETTIMESTAMP
|
||||
net receiveMessage >>= \case
|
||||
Just (TIMESTAMP remotetime) -> do
|
||||
localtime <- local getLocalCurrentTime
|
||||
let timeleft = endtime - localtime
|
||||
let timeleft' = MonotonicTimestamp (floor timeleft)
|
||||
let remoteendtime = remotetime + timeleft'
|
||||
if timeleft <= 0
|
||||
then return (Right False, Nothing)
|
||||
else do
|
||||
net $ sendMessage $
|
||||
REMOVE_BEFORE remoteendtime key
|
||||
checkSuccessFailurePlus
|
||||
Just (ERROR err) -> return (Left err, Nothing)
|
||||
_ -> do
|
||||
net $ sendMessage (ERROR "expected TIMESTAMP")
|
||||
return (Right False, Nothing)
|
||||
|
||||
get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
|
||||
get dest key iv af m p =
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
- License: BSD-2-clause
|
||||
-}
|
||||
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Utility.MonotonicClock where
|
||||
|
@ -19,7 +20,7 @@ import Utility.Exception
|
|||
#endif
|
||||
|
||||
newtype MonotonicTimestamp = MonotonicTimestamp Integer
|
||||
deriving (Show, Eq, Ord)
|
||||
deriving (Show, Eq, Ord, Num)
|
||||
|
||||
-- On linux, this uses a clock that advances while the system is suspended,
|
||||
-- except for on very old kernels (eg 2.6.32).
|
||||
|
|
|
@ -47,7 +47,7 @@ remotedaemon` for tor, or something similar for future P2P over HTTP
|
|||
process is kept running. An admin may bounce the HTTP server at any point,
|
||||
or the whole system reboot.
|
||||
|
||||
----
|
||||
## retention locking
|
||||
|
||||
So, this needs a way to make lockContentShared guarentee it remains
|
||||
locked for an amount of time even after the process has exited.
|
||||
|
@ -64,41 +64,7 @@ OTOH putting the timestamp in the lock file may be hard (eg on Windows).
|
|||
> P2P LOCKCONTENT uses a 10 minute retention in case it gets killed,
|
||||
> but other values can be used in the future safely.
|
||||
|
||||
----
|
||||
|
||||
Extending the P2P protocol is a bit tricky, because the same P2P
|
||||
protocol connection could be used for several different things at
|
||||
the same time. A PRE-REMOVE N Key might be followed by removals of other
|
||||
keys, and eventually a removal of the requested key. There are
|
||||
sometimes pools of P2P connections that get used like this.
|
||||
So the server would need to cache some number of PRE-REMOVE timestamps.
|
||||
How many?
|
||||
|
||||
Certainly care would need to be taken to send PRE-REMOVE to the same
|
||||
connection as REMOVE. How?
|
||||
|
||||
Could this be done without extending the REMOVE side of the P2P protocol?
|
||||
|
||||
1. check start time
|
||||
2. LOCKCONTENT
|
||||
3. prepare to remove
|
||||
4. in checkVerifiedCopy,
|
||||
check current time.. fail if more than 10 minutes from start
|
||||
5. REMOVE
|
||||
|
||||
The issue with this is that git-annex could be paused for any amount of
|
||||
time between steps 4 and 5. Usually it won't pause..
|
||||
mkSafeDropProof calls checkVerifiedCopy and constructs the proof,
|
||||
and then it immediately sends REMOVE. But of course sending REMOVE
|
||||
could take arbitrarily long. Or git-annex could be paused at just the wrong
|
||||
point.
|
||||
|
||||
Ok, let's reconsider... Add GETTIMESTAMP which causes the server to
|
||||
return its current timestamp. The same timestamp must be returned on any
|
||||
connection to the server, eg the server must have a single clock.
|
||||
That can be called before LOCKCONTENT.
|
||||
Then REMOVE Key Timestamp can fail if the current time is past the
|
||||
specified timestamp.
|
||||
## clusters
|
||||
|
||||
How to handle this when proxying to a cluster? In a cluster, each node
|
||||
has a different clock. So GETTIMESTAMP will return a bunch of times.
|
||||
|
@ -107,13 +73,24 @@ Then REMOVE Key Timestamp can have the timestamp adjusted when it's sent
|
|||
out to each client, by calling GETTIMESTAMP again and applying the offsets
|
||||
between the cluster's clock and each node's clock.
|
||||
|
||||
This approach would need to use a monotonic clock!
|
||||
TODO
|
||||
|
||||
---
|
||||
## future flag day
|
||||
|
||||
There is a potential future flag day where
|
||||
p2pDefaultLockContentRetentionDuration is not assumed, but is probed
|
||||
using the P2P protocol, and peers that don't support it can no longer
|
||||
produce a LockedCopy. Until that happens, when git-annex is
|
||||
produce a LockedCopy. And P2P.Protocol.remove does not fall back to REMOVE
|
||||
when the peer does not support REMOVE-WHEN and there's a proof expiry time.
|
||||
|
||||
Until that flag day, when git-annex is
|
||||
communicating with older peers there is a risk of data loss when
|
||||
a ssh connection closes during LOCKCONTENT.
|
||||
|
||||
I think that now is not the right time for that flag day, because it will
|
||||
cause disruption. Everyone would have to upgrade remote git-annex versions
|
||||
in order to drop content from those remotes, or with content locked on
|
||||
those remotes. This problem is not likely enough to occur to seem worth
|
||||
that disruption.
|
||||
|
||||
A flag day might be worth doing in a couple of years though. --[[Joey]]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue