Merge remote-tracking branch 'origin/httpproto'

This commit is contained in:
Joey Hess 2024-07-29 11:25:27 -04:00
commit 74f81ebd04
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
46 changed files with 4090 additions and 1024 deletions

View file

@ -115,7 +115,8 @@ newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a
-- Values that can be read, but not modified by an Annex action.
data AnnexRead = AnnexRead
{ activekeys :: TVar (M.Map Key ThreadId)
{ branchstate :: MVar BranchState
, activekeys :: TVar (M.Map Key ThreadId)
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Keys.DbHandle
, sshstalecleaned :: TMVar Bool
@ -137,6 +138,7 @@ data AnnexRead = AnnexRead
newAnnexRead :: GitConfig -> IO AnnexRead
newAnnexRead c = do
bs <- newMVar startBranchState
emptyactivekeys <- newTVarIO M.empty
emptyactiveremotes <- newMVar M.empty
kh <- Keys.newDbHandle
@ -146,7 +148,8 @@ newAnnexRead c = do
cm <- newTMVarIO M.empty
cc <- newTMVarIO (CredentialCache M.empty)
return $ AnnexRead
{ activekeys = emptyactivekeys
{ branchstate = bs
, activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes
, keysdbhandle = kh
, sshstalecleaned = sc
@ -180,7 +183,6 @@ data AnnexState = AnnexState
, output :: MessageState
, concurrency :: ConcurrencySetting
, daemon :: Bool
, branchstate :: BranchState
, repoqueue :: Maybe (Git.Queue.Queue Annex)
, catfilehandles :: CatFileHandles
, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
@ -235,7 +237,6 @@ newAnnexState c r = do
, output = o
, concurrency = ConcurrencyCmdLine NonConcurrent
, daemon = False
, branchstate = startBranchState
, repoqueue = Nothing
, catfilehandles = catFileHandlesNonConcurrent
, hashobjecthandle = Nothing

View file

@ -262,7 +262,7 @@ updateTo' pairs = do
else commitIndex jl branchref merge_desc commitrefs
)
addMergedRefs tomerge
invalidateCache
invalidateCacheAll
stagejournalwhen dirty jl a
| dirty = stageJournal jl a
@ -487,7 +487,7 @@ set jl ru f c = do
-- evaluating a Journalable Builder twice, which is not very
-- efficient. Instead, assume that it's not common to need to read
-- a log file immediately after writing it.
invalidateCache
invalidateCache f
{- Appends content to the journal file. -}
append :: Journalable content => JournalLocked -> RawFilePath -> AppendableJournalFile -> content -> Annex ()
@ -495,7 +495,7 @@ append jl f appendable toappend = do
journalChanged
appendJournalFile jl appendable toappend
fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f)
invalidateCache
invalidateCache f
{- Commit message used when making a commit of whatever data has changed
- to the git-annex branch. -}

View file

@ -2,7 +2,7 @@
-
- Runtime state about the git-annex branch, and a small cache.
-
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -16,14 +16,18 @@ import qualified Annex
import Logs
import qualified Git
import Control.Concurrent
import qualified Data.ByteString.Lazy as L
getState :: Annex BranchState
getState = Annex.getState Annex.branchstate
getState = do
v <- Annex.getRead Annex.branchstate
liftIO $ readMVar v
changeState :: (BranchState -> BranchState) -> Annex ()
changeState changer = Annex.changeState $ \s ->
s { Annex.branchstate = changer (Annex.branchstate s) }
changeState changer = do
v <- Annex.getRead Annex.branchstate
liftIO $ modifyMVar_ v $ return . changer
{- Runs an action to check that the index file exists, if it's not been
- checked before in this run of git-annex. -}
@ -130,5 +134,11 @@ getCache file state = go (cachedFileContents state)
| f == file && not (needInteractiveAccess state) = Just c
| otherwise = go rest
invalidateCache :: Annex ()
invalidateCache = changeState $ \s -> s { cachedFileContents = [] }
invalidateCache :: RawFilePath -> Annex ()
invalidateCache f = changeState $ \s -> s
{ cachedFileContents = filter (\(f', _) -> f' /= f)
(cachedFileContents s)
}
invalidateCacheAll :: Annex ()
invalidateCacheAll = changeState $ \s -> s { cachedFileContents = [] }

View file

@ -18,6 +18,7 @@ import P2P.Protocol
import P2P.IO
import Annex.Proxy
import Annex.UUID
import Annex.BranchState
import Logs.Location
import Logs.PreferredContent
import Types.Command
@ -38,22 +39,17 @@ proxyCluster
-> (forall a. Annex () -> ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform))
-> CommandPerform
proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
enableInteractiveBranchAccess
getClientProtocolVersion (fromClusterUUID clusteruuid) clientside
withclientversion (protoerrhandler noop)
where
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
withclientversion (Just (clientmaxversion, othermsg)) = do
-- The protocol versions supported by the nodes are not
-- known at this point, and would be too expensive to
-- determine. Instead, pick the newest protocol version
-- that we and the client both speak. The proxy code
-- checks protocol versions when operating on multiple
-- nodes, and allows nodes to have different protocol
-- versions.
-- checks protocol versions of remotes, so nodes can
-- have different protocol versions.
let protocolversion = min maxProtocolVersion clientmaxversion
sendClientProtocolVersion clientside othermsg protocolversion
(getclientbypass protocolversion) (protoerrhandler noop)
@ -64,16 +60,29 @@ proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
(withclientbypass protocolversion) (protoerrhandler noop)
withclientbypass protocolversion (bypassuuids, othermsg) = do
(selectnode, closenodes) <- clusterProxySelector clusteruuid
protocolversion bypassuuids
concurrencyconfig <- getConcurrencyConfig
(selectnode, closenodes) <-
clusterProxySelector clusteruuid
protocolversion bypassuuids
proxystate <- liftIO mkProxyState
proxy proxydone proxymethods proxystate servermode clientside
(fromClusterUUID clusteruuid)
selectnode concurrencyconfig protocolversion
othermsg (protoerrhandler closenodes)
concurrencyconfig <- concurrencyConfigJobs
let proxyparams = ProxyParams
{ proxyMethods = mkProxyMethods
, proxyState = proxystate
, proxyServerMode = servermode
, proxyClientSide = clientside
, proxyUUID = fromClusterUUID clusteruuid
, proxySelector = selectnode
, proxyConcurrencyConfig = concurrencyconfig
, proxyClientProtocolVersion = protocolversion
}
proxy proxydone proxyparams othermsg
(protoerrhandler closenodes)
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Bypass -> Annex (ProxySelector, Annex ())
clusterProxySelector
:: ClusterUUID
-> ProtocolVersion
-> Bypass
-> Annex (ProxySelector, Annex ())
clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters
@ -113,7 +122,6 @@ clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
-- instead it can be locked on individual nodes that are
-- proxied to the client.
, proxyLOCKCONTENT = const (pure Nothing)
, proxyUNLOCKCONTENT = pure Nothing
}
return (proxyselector, closenodes)
where

View file

@ -181,11 +181,13 @@ data GetPrivate = GetPrivate Bool
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFileStale (GetPrivate getprivate) file = do
st <- Annex.getState id
let repo = Annex.repo st
bs <- getState
liftIO $
if getprivate && privateUUIDsKnown' st
then do
x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st))
getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case
x <- getfrom (gitAnnexJournalDir bs repo)
getfrom (gitAnnexPrivateJournalDir bs repo) >>= \case
Nothing -> return $ case x of
Nothing -> NoJournalledContent
Just b -> JournalledContent b
@ -195,7 +197,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
-- happens in a merge of two
-- git-annex branches.
Just x' -> x' <> y
else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case
else getfrom (gitAnnexJournalDir bs repo) >>= return . \case
Nothing -> NoJournalledContent
Just b -> JournalledContent b
where
@ -223,8 +225,9 @@ discardIncompleteAppend v
- journal is staged as it is run. -}
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale getjournaldir = do
st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
bs <- getState
repo <- Annex.gitRepo
let d = getjournaldir bs repo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents (fromRawFilePath d)
return $ filter (`notElem` [".", ".."]) $
@ -233,8 +236,9 @@ getJournalledFilesStale getjournaldir = do
{- Directory handle open on a journal directory. -}
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle getjournaldir a = do
st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
bs <- getState
repo <- Annex.gitRepo
let d = getjournaldir bs repo
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
where
-- avoid overhead of creating the journal directory when it already

View file

@ -8,23 +8,31 @@
module Annex.Proxy where
import Annex.Common
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import qualified Annex
import qualified Remote
import qualified Types.Remote as Remote
import qualified Remote.Git
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Content
import Annex.Concurrent
import Annex.Tmp
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Logs.Location
import Utility.Tmp.Dir
import Utility.Metered
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
import qualified Data.Map as M
import qualified Data.Set as S
proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxyRemoteSide clientmaxversion bypass r
@ -53,18 +61,19 @@ proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
ohdl <- liftIO newEmptyTMVarIO
iwaitv <- liftIO newEmptyTMVarIO
owaitv <- liftIO newEmptyTMVarIO
endv <- liftIO newEmptyTMVarIO
iclosedv <- liftIO newEmptyTMVarIO
oclosedv <- liftIO newEmptyTMVarIO
worker <- liftIO . async =<< forkState
(proxySpecialRemote protoversion r ihdl ohdl owaitv endv)
(proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv)
let remoteconn = P2PConnection
{ connRepo = Nothing
, connCheckAuth = const False
, connIhdl = P2PHandleTMVar ihdl iwaitv
, connOhdl = P2PHandleTMVar ohdl owaitv
, connIhdl = P2PHandleTMVar ihdl (Just iwaitv) iclosedv
, connOhdl = P2PHandleTMVar ohdl (Just owaitv) oclosedv
, connIdent = ConnIdent (Just (Remote.name r))
}
let closeremoteconn = do
liftIO $ atomically $ putTMVar endv ()
liftIO $ atomically $ putTMVar oclosedv ()
join $ liftIO (wait worker)
return $ Just
( remoterunst
@ -81,7 +90,7 @@ proxySpecialRemote
-> TMVar ()
-> TMVar ()
-> Annex ()
proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
where
go :: Annex ()
go = liftIO receivemessage >>= \case
@ -114,23 +123,28 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
liftIO $ sendmessage $
ERROR "NOTIFYCHANGE unsupported for a special remote"
go
Just _ -> giveup "protocol error M"
Just _ -> giveup "protocol error"
Nothing -> return ()
getnextmessageorend =
liftIO $ atomically $
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar endv)
receivemessage = getnextmessageorend >>= \case
receivemessage = liftIO (atomically recv) >>= \case
Right (Right m) -> return (Just m)
Right (Left _b) -> giveup "unexpected ByteString received from P2P MVar"
Left () -> return Nothing
where
recv =
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar oclosedv)
receivebytestring = atomically (takeTMVar ohdl) >>= \case
Left b -> return b
Right _m -> giveup "did not receive ByteString from P2P MVar"
receivebytestring = atomically recv >>= \case
Right (Left b) -> return b
Right (Right _m) -> giveup "did not receive ByteString from P2P MVar"
Left () -> giveup "connection closed"
where
recv =
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar oclosedv)
sendmessage m = atomically $ putTMVar ihdl (Right m)
@ -155,21 +169,40 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err
liftIO receivemessage >>= \case
Just (DATA (Len _)) -> do
b <- liftIO receivebytestring
liftIO $ L.writeFile (fromRawFilePath tmpfile) b
-- Signal that the whole bytestring
-- has been received.
liftIO $ atomically $ putTMVar owaitv ()
Just (DATA (Len len)) -> do
h <- liftIO $ openFile (fromRawFilePath tmpfile) WriteMode
liftIO $ receivetofile h len
liftIO $ hClose h
if protoversion > ProtocolVersion 1
then liftIO receivemessage >>= \case
Just (VALIDITY Valid) ->
store
Just (VALIDITY Invalid) ->
return ()
_ -> giveup "protocol error N"
liftIO $ sendmessage FAILURE
_ -> giveup "protocol error"
else store
_ -> giveup "protocol error O"
_ -> giveup "protocol error"
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
receivetofile h n = do
b <- liftIO receivebytestring
liftIO $ atomically $
putTMVar owaitv ()
`orElse`
readTMVar oclosedv
n' <- storetofile h n (L.toChunks b)
-- Normally all the data is sent in a single
-- lazy bytestring. However, when the special
-- remote is a node in a cluster, a PUT is
-- streamed to it in multiple chunks.
if n' == 0
then return ()
else receivetofile h n'
storetofile _ n [] = pure n
storetofile h n (b:bs) = do
B.hPut h b
storetofile h (n - fromIntegral (B.length b)) bs
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
-- Don't verify the content from the remote,
@ -206,6 +239,70 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
receivemessage >>= \case
Just SUCCESS -> return ()
Just FAILURE -> return ()
Just _ -> giveup "protocol error P"
Just _ -> giveup "protocol error"
Nothing -> return ()
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid ouruuid = do
ourproxies <- M.lookup ouruuid <$> getProxies
checkCanProxy' ourproxies remoteuuid >>= \case
Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True
Left Nothing -> return False
Left (Just err) -> giveup err
checkCanProxy' :: Maybe (S.Set Proxy) -> UUID -> Annex (Either (Maybe String) (Either ClusterUUID Remote))
checkCanProxy' Nothing _ = return (Left Nothing)
checkCanProxy' (Just proxies) remoteuuid =
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = Remote.uuid r == remoteuuid
let samename r p = Remote.name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> return (Right (Right r))
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch causing unexpected proxying for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then return (Right (Left cu))
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> return $ Left $ Just $
"not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing
mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}

View file

@ -1,6 +1,6 @@
{- verification
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -21,6 +21,8 @@ module Annex.Verify (
finishVerifyKeyContentIncrementally,
verifyKeyContentIncrementally,
IncrementalVerifier(..),
writeVerifyChunk,
resumeVerifyFromOffset,
tailVerify,
) where
@ -32,6 +34,7 @@ import qualified Types.Backend
import qualified Backend
import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..))
import Utility.Hash (IncrementalVerifier(..))
import Utility.Metered
import Annex.WorkerPool
import Types.WorkerPool
import Types.Key
@ -213,6 +216,44 @@ verifyKeyContentIncrementally verifyconfig k a = do
a miv
snd <$> finishVerifyKeyContentIncrementally miv
writeVerifyChunk :: Maybe IncrementalVerifier -> Handle -> S.ByteString -> IO ()
writeVerifyChunk (Just iv) h c = do
S.hPut h c
updateIncrementalVerifier iv c
writeVerifyChunk Nothing h c = S.hPut h c
{- Given a file handle that is open for reading (and likely also for writing),
- and an offset, feeds the current content of the file up to the offset to
- the IncrementalVerifier. Leaves the file seeked to the offset.
- Returns the meter with the offset applied. -}
resumeVerifyFromOffset
:: Integer
-> Maybe IncrementalVerifier
-> MeterUpdate
-> Handle
-> IO MeterUpdate
resumeVerifyFromOffset o incrementalverifier meterupdate h
| o /= 0 = do
maybe noop (`go` o) incrementalverifier
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return offsetmeterupdate
| otherwise = return meterupdate
where
offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o)
go iv n
| n == 0 = return ()
| otherwise = do
let c = if n > fromIntegral defaultChunkSize
then defaultChunkSize
else fromIntegral n
b <- S.hGet h c
updateIncrementalVerifier iv b
unless (b == S.empty) $
go iv (n - fromIntegral (S.length b))
-- | Runs a writer action that retrieves to a file. In another thread,
-- reads the file as it grows, and feeds it to the incremental verifier.
--

View file

@ -97,13 +97,11 @@ changeStageTo mytid tv getnewstage = liftIO $
-- | Waits until there's an idle StartStage worker in the worker pool,
-- removes it from the pool, and returns its state.
--
-- If the worker pool is not already allocated, returns Nothing.
waitStartWorkerSlot :: TMVar (WorkerPool t) -> STM (Maybe (t, WorkerStage))
waitStartWorkerSlot :: TMVar (WorkerPool t) -> STM (t, WorkerStage)
waitStartWorkerSlot tv = do
pool <- takeTMVar tv
v <- go pool
return $ Just (v, StartStage)
return (v, StartStage)
where
go pool = case spareVals pool of
[] -> retry

View file

@ -52,6 +52,11 @@ buildFlags = filter (not . null)
#ifdef WITH_MAGICMIME
, "MagicMime"
#endif
#ifdef WITH_SERVANT
, "Servant"
#else
#warning Building without servant, will not support annex+http urls or git-annex p2phttp.
#endif
#ifdef WITH_BENCHMARK
, "Benchmark"
#endif

View file

@ -1,5 +1,13 @@
git-annex (10.20240702) UNRELEASED; urgency=medium
* New HTTP API that is equivilant to the P2P protocol.
* annex+http and annex+https urls can be configured for
remote.name.annexUrl to use the HTTP API to communicate with a server.
This supports writable repositories, as well as accessing clusters
and proxied remotes over HTTP.
* New p2phttp command to serve the HTTP API.
* Added a build flag for servant, enabling annex+http urls and
git-annex p2phttp.
* assistant: Fix a race condition that could cause a pointer file to
get ingested into the annex.
* Avoid potential data loss in situations where git-annex-shell or

View file

@ -87,9 +87,8 @@ commandAction start = do
runconcurrent sizelimit Nothing = runnonconcurrent sizelimit
runconcurrent sizelimit (Just tv) =
liftIO (atomically (waitStartWorkerSlot tv)) >>= maybe
(runnonconcurrent sizelimit)
(runconcurrent' sizelimit tv)
liftIO (atomically (waitStartWorkerSlot tv))
>>= runconcurrent' sizelimit tv
runconcurrent' sizelimit tv (workerstrd, workerstage) = do
aid <- liftIO $ async $ snd
<$> Annex.run workerstrd

View file

@ -1,6 +1,6 @@
{- git-annex main program
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -118,6 +118,9 @@ import qualified Command.Upgrade
import qualified Command.Forget
import qualified Command.OldKeys
import qualified Command.P2P
#ifdef WITH_SERVANT
import qualified Command.P2PHttp
#endif
import qualified Command.Proxy
import qualified Command.DiffDriver
import qualified Command.Smudge
@ -245,6 +248,9 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.Forget.cmd
, Command.OldKeys.cmd
, Command.P2P.cmd
#ifdef WITH_SERVANT
, Command.P2PHttp.cmd
#endif
, Command.Proxy.cmd
, Command.DiffDriver.cmd
, Command.Smudge.cmd

View file

@ -8,7 +8,6 @@
module CmdLine.GitAnnexShell where
import Annex.Common
import qualified Annex
import qualified Git.Construct
import qualified Git.Config
import CmdLine
@ -20,11 +19,7 @@ import CmdLine.GitAnnexShell.Fields
import Remote.GCrypt (getGCryptUUID)
import P2P.Protocol (ServerMode(..))
import Git.Types
import qualified Types.Remote as R
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Remote
import Annex.Proxy
import qualified Command.ConfigList
import qualified Command.NotifyChanges
@ -36,7 +31,6 @@ import qualified Command.SendKey
import qualified Command.DropKey
import qualified Data.Map as M
import qualified Data.Set as S
cmdsMap :: M.Map ServerMode [Command]
cmdsMap = M.fromList $ map mk
@ -90,7 +84,7 @@ commonShellOptions =
check u
| u == toUUID expected = noop
| otherwise =
unlessM (checkProxy (toUUID expected) u) $
unlessM (checkCanProxy (toUUID expected) u) $
unexpectedUUID expected u
checkGCryptUUID expected = check =<< getGCryptUUID True =<< gitRepo
@ -184,61 +178,3 @@ checkField (field, val)
| field == fieldName remoteUUID = fieldCheck remoteUUID val
| field == fieldName autoInit = fieldCheck autoInit val
| otherwise = False
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkProxy :: UUID -> UUID -> Annex Bool
checkProxy remoteuuid ouruuid = M.lookup ouruuid <$> getProxies >>= \case
Nothing -> return False
-- This repository has (or had) proxying enabled. So it's
-- ok to display error messages that talk about proxies.
Just proxies ->
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . byCost <$> remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = uuid r == remoteuuid
let samename r p = name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Right r) }
return True
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch making git-annex-shell unexpectedly
-- proxy for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (R.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (R.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . R.gitconfig) $
filter (\p -> R.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Left cu) }
return True
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> giveup $ "not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return False

View file

@ -50,7 +50,7 @@ start cu clustername gatewayremote = starting "extendcluster" ai si $ do
let setcus f = setConfig f (fromUUID (fromClusterUUID cu))
unless (M.member clustername myclusters) $ do
setcus $ annexConfig ("cluster." <> encodeBS clustername)
setcus $ remoteAnnexConfig gatewayremote $
setcus $ mkRemoteConfigKey gatewayremote $
remoteGitConfigKey ClusterGatewayField
next $ return True
where

173
Command/P2PHttp.hs Normal file
View file

@ -0,0 +1,173 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
module Command.P2PHttp where
import Command
import P2P.Http.Server
import P2P.Http.Url
import qualified P2P.Protocol as P2P
import Utility.Env
import Servant
import qualified Network.Wai.Handler.Warp as Warp
import qualified Network.Wai.Handler.WarpTLS as Warp
import Network.Socket (PortNumber)
import qualified Data.Map as M
import Data.String
cmd :: Command
cmd = withAnnexOptions [jobsOption] $ command "p2phttp" SectionPlumbing
"communicate in P2P protocol over http"
paramNothing (seek <$$> optParser)
data Options = Options
{ portOption :: Maybe PortNumber
, bindOption :: Maybe String
, certFileOption :: Maybe FilePath
, privateKeyFileOption :: Maybe FilePath
, chainFileOption :: [FilePath]
, authEnvOption :: Bool
, authEnvHttpOption :: Bool
, unauthReadOnlyOption :: Bool
, unauthAppendOnlyOption :: Bool
, wideOpenOption :: Bool
, proxyConnectionsOption :: Maybe Integer
, clusterJobsOption :: Maybe Int
}
optParser :: CmdParamsDesc -> Parser Options
optParser _ = Options
<$> optional (option auto
( long "port" <> metavar paramNumber
<> help "specify port to listen on"
))
<*> optional (strOption
( long "bind" <> metavar paramAddress
<> help "specify address to bind to"
))
<*> optional (strOption
( long "certfile" <> metavar paramFile
<> help "TLS certificate file for HTTPS"
))
<*> optional (strOption
( long "privatekeyfile" <> metavar paramFile
<> help "TLS private key file for HTTPS"
))
<*> many (strOption
( long "chainfile" <> metavar paramFile
<> help "TLS chain file"
))
<*> switch
( long "authenv"
<> help "authenticate users from environment (https only)"
)
<*> switch
( long "authenv-http"
<> help "authenticate users from environment (including http)"
)
<*> switch
( long "unauth-readonly"
<> help "allow unauthenticated users to read the repository"
)
<*> switch
( long "unauth-appendonly"
<> help "allow unauthenticated users to read and append to the repository"
)
<*> switch
( long "wideopen"
<> help "give unauthenticated users full read+write access"
)
<*> optional (option auto
( long "proxyconnections" <> metavar paramNumber
<> help "maximum number of idle connections when proxying"
))
<*> optional (option auto
( long "clusterjobs" <> metavar paramNumber
<> help "number of concurrent node accesses per connection"
))
seek :: Options -> CommandSeek
seek o = getAnnexWorkerPool $ \workerpool ->
withP2PConnections workerpool
(fromMaybe 1 $ proxyConnectionsOption o)
(fromMaybe 1 $ clusterJobsOption o)
(go workerpool)
where
go workerpool acquireconn = liftIO $ do
authenv <- getAuthEnv
st <- mkP2PHttpServerState acquireconn workerpool $
mkGetServerMode authenv o
let settings = Warp.setPort port $ Warp.setHost host $
Warp.defaultSettings
case (certFileOption o, privateKeyFileOption o) of
(Nothing, Nothing) -> Warp.runSettings settings (p2pHttpApp st)
(Just certfile, Just privatekeyfile) -> do
let tlssettings = Warp.tlsSettingsChain
certfile (chainFileOption o) privatekeyfile
Warp.runTLS tlssettings settings (p2pHttpApp st)
_ -> giveup "You must use both --certfile and --privatekeyfile options to enable HTTPS."
port = maybe
(fromIntegral defaultP2PHttpProtocolPort)
fromIntegral
(portOption o)
host = maybe
(fromString "*") -- both ipv4 and ipv6
fromString
(bindOption o)
mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode
mkGetServerMode _ o _ Nothing
| wideOpenOption o = Just P2P.ServeReadWrite
| unauthAppendOnlyOption o = Just P2P.ServeAppendOnly
| unauthReadOnlyOption o = Just P2P.ServeReadOnly
| otherwise = Nothing
mkGetServerMode authenv o issecure (Just auth) =
case (issecure, authEnvOption o, authEnvHttpOption o) of
(Secure, True, _) -> checkauth
(NotSecure, _, True) -> checkauth
_ -> noauth
where
checkauth = case M.lookup auth authenv of
Just servermode -> Just servermode
Nothing -> noauth
noauth = mkGetServerMode authenv o issecure Nothing
getAuthEnv :: IO (M.Map Auth P2P.ServerMode)
getAuthEnv = do
environ <- getEnvironment
let permmap = M.fromList (mapMaybe parseperms environ)
return $ M.fromList $
map (addperms permmap) $
mapMaybe parseusername environ
where
parseperms (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PERMISSIONS_" k of
Nothing -> Nothing
Just username -> case v of
"readonly" -> Just
(encodeBS username, P2P.ServeReadOnly)
"appendonly" -> Just
(encodeBS username, P2P.ServeAppendOnly)
_ -> Nothing
parseusername (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PASSWORD_" k of
Nothing -> Nothing
Just username -> Just $ Auth (encodeBS username) (encodeBS v)
deprefix prefix s
| prefix `isPrefixOf` s = Just (drop (length prefix) s)
| otherwise = Nothing
addperms permmap auth@(Auth user _) =
case M.lookup user permmap of
Nothing -> (auth, P2P.ServeReadWrite)
Just perms -> (auth, perms)

View file

@ -16,7 +16,6 @@ import qualified Annex
import Annex.Proxy
import Annex.UUID
import qualified CmdLine.GitAnnexShell.Checks as Checks
import Logs.Location
import Logs.Cluster
import Annex.Cluster
import qualified Remote
@ -61,7 +60,7 @@ performLocal theiruuid servermode = do
performProxy :: UUID -> P2P.ServerMode -> Remote -> CommandPerform
performProxy clientuuid servermode r = do
clientside <- proxyClientSide clientuuid
clientside <- mkProxyClientSide clientuuid
getClientProtocolVersion (Remote.uuid r) clientside
(withclientversion clientside)
(p2pErrHandler noop)
@ -77,29 +76,29 @@ performProxy clientuuid servermode r = do
p2pDone
let errhandler = p2pErrHandler (closeRemoteSide remoteside)
proxystate <- liftIO mkProxyState
let runproxy othermsg' = proxy closer
proxymethods proxystate
servermode clientside
(Remote.uuid r)
(singleProxySelector remoteside)
concurrencyconfig
protocolversion othermsg' errhandler
let proxyparams = ProxyParams
{ proxyMethods = mkProxyMethods
, proxyState = proxystate
, proxyServerMode = servermode
, proxyClientSide = clientside
, proxyUUID = Remote.uuid r
, proxySelector = singleProxySelector remoteside
, proxyConcurrencyConfig = concurrencyconfig
, proxyClientProtocolVersion = protocolversion
}
let runproxy othermsg' = proxy closer proxyparams
othermsg' errhandler
sendClientProtocolVersion clientside othermsg protocolversion
runproxy errhandler
withclientversion _ Nothing = p2pDone
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
performProxyCluster :: UUID -> ClusterUUID -> P2P.ServerMode -> CommandPerform
performProxyCluster clientuuid clusteruuid servermode = do
clientside <- proxyClientSide clientuuid
clientside <- mkProxyClientSide clientuuid
proxyCluster clusteruuid p2pDone servermode clientside p2pErrHandler
proxyClientSide :: UUID -> Annex ClientSide
proxyClientSide clientuuid = do
mkProxyClientSide :: UUID -> Annex ClientSide
mkProxyClientSide clientuuid = do
clientrunst <- liftIO (mkRunState $ Serving clientuuid Nothing)
ClientSide clientrunst <$> liftIO (stdioP2PConnectionDupped Nothing)

View file

@ -42,7 +42,7 @@ getBasicAuthFromCredential r ccv u = do
Just c -> go (const noop) c
Nothing -> do
let storeincache = \c -> atomically $ do
(CredentialCache cc') <- takeTMVar ccv
CredentialCache cc' <- takeTMVar ccv
putTMVar ccv (CredentialCache (M.insert bu c cc'))
go storeincache =<< getUrlCredential u r
Nothing -> go (const noop) =<< getUrlCredential u r
@ -113,7 +113,9 @@ data CredentialCache = CredentialCache (M.Map CredentialBaseURL Credential)
-- when credential.useHttpPath is false, one Credential is cached
-- for each git repo accessed, and there are a reasonably small number of
-- those, so the cache will not grow too large.
data CredentialBaseURL = CredentialBaseURL URI
data CredentialBaseURL
= CredentialBaseURI URI
| CredentialBaseURL String
deriving (Show, Eq, Ord)
mkCredentialBaseURL :: Repo -> URLString -> Maybe CredentialBaseURL
@ -123,4 +125,4 @@ mkCredentialBaseURL r s = do
Config.get (ConfigKey "credential.useHttpPath") (ConfigValue "") r
if usehttppath
then Nothing
else Just $ CredentialBaseURL $ u { uriPath = "" }
else Just $ CredentialBaseURI $ u { uriPath = "" }

View file

@ -29,7 +29,6 @@ import Annex.Verify
import Control.Monad.Free
import Control.Concurrent.STM
import Data.Time.Clock.POSIX
import qualified Data.ByteString as S
-- Full interpreter for Proto, that can receive and send objects.
runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a)
@ -101,6 +100,26 @@ runLocal runst runner a = case a of
Left e -> return $ Left $ ProtoFailureException e
Right (Left e) -> return $ Left e
Right (Right ok) -> runner (next ok)
SendContentWith consumer getb validitycheck next -> do
v <- tryNonAsync $ do
let fallback = return $ Left $
ProtoFailureMessage "Transfer failed"
let consumer' b ti = do
validator <- consumer b
indicatetransferred ti
return validator
runner getb >>= \case
Left e -> giveup $ describeProtoFailure e
Right b -> checktransfer (\ti -> Right <$> consumer' b ti) fallback >>= \case
Left e -> return (Left e)
Right validator ->
runner validitycheck >>= \case
Right v -> Right <$> validator v
_ -> Right <$> validator Nothing
case v of
Left e -> return $ Left $ ProtoFailureException e
Right (Left e) -> return $ Left e
Right (Right ok) -> runner (next ok)
SetPresent k u next -> do
v <- tryNonAsync $ logChange k u InfoPresent
case v of
@ -171,43 +190,13 @@ runLocal runst runner a = case a of
-- a client.
Client _ -> ta nullMeterUpdate
resumefromoffset o incrementalverifier p h
| o /= 0 = do
p' <- case incrementalverifier of
Just iv -> do
go iv o
return p
_ -> return $ offsetMeterUpdate p (toBytesProcessed o)
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return p'
| otherwise = return p
where
go iv n
| n == 0 = return ()
| otherwise = do
let c = if n > fromIntegral defaultChunkSize
then defaultChunkSize
else fromIntegral n
b <- S.hGet h c
updateIncrementalVerifier iv b
unless (b == S.empty) $
go iv (n - fromIntegral (S.length b))
storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do
v <- runner getb
case v of
Right b -> do
liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do
p' <- resumefromoffset o incrementalverifier p h
let writechunk = case incrementalverifier of
Nothing -> \c -> S.hPut h c
Just iv -> \c -> do
S.hPut h c
updateIncrementalVerifier iv c
meteredWrite p' writechunk b
p' <- resumeVerifyFromOffset o incrementalverifier p h
meteredWrite p' (writeVerifyChunk incrementalverifier h) b
indicatetransferred ti
rightsize <- do

184
P2P/Http.hs Normal file
View file

@ -0,0 +1,184 @@
{- P2P protocol over HTTP
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module P2P.Http (
module P2P.Http,
module P2P.Http.Types,
) where
import P2P.Http.Types
import Servant
import qualified Data.ByteString as B
type P2PHttpAPI
= "git-annex" :> SU :> PV3 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV2 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV1 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV0 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV3 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV2 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV1 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV0 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV3 :> "remove" :> RemoveAPI RemoveResultPlus
:<|> "git-annex" :> SU :> PV2 :> "remove" :> RemoveAPI RemoveResultPlus
:<|> "git-annex" :> SU :> PV1 :> "remove" :> RemoveAPI RemoveResult
:<|> "git-annex" :> SU :> PV0 :> "remove" :> RemoveAPI RemoveResult
:<|> "git-annex" :> SU :> PV3 :> "remove-before" :> RemoveBeforeAPI
:<|> "git-annex" :> SU :> PV3 :> "gettimestamp" :> GetTimestampAPI
:<|> "git-annex" :> SU :> PV3 :> "put" :> PutAPI PutResultPlus
:<|> "git-annex" :> SU :> PV2 :> "put" :> PutAPI PutResultPlus
:<|> "git-annex" :> SU :> PV1 :> "put" :> PutAPI PutResult
:<|> "git-annex" :> SU :> PV0 :> "put" :> PutAPI PutResult
:<|> "git-annex" :> SU :> PV3 :> "putoffset"
:> PutOffsetAPI PutOffsetResultPlus
:<|> "git-annex" :> SU :> PV2 :> "putoffset"
:> PutOffsetAPI PutOffsetResultPlus
:<|> "git-annex" :> SU :> PV1 :> "putoffset"
:> PutOffsetAPI PutOffsetResult
:<|> "git-annex" :> SU :> PV3 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV2 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV1 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV0 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV3 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> PV2 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> PV1 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> PV0 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> "key" :> GetGenericAPI
p2pHttpAPI :: Proxy P2PHttpAPI
p2pHttpAPI = Proxy
type GetGenericAPI
= CaptureKey
:> CU Optional
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> StreamGet NoFraming OctetStream
(Headers '[DataLengthHeader] (SourceIO B.ByteString))
type GetAPI
= CaptureKey
:> CU Required
:> BypassUUIDs
:> AssociatedFileParam
:> OffsetParam
:> IsSecure
:> AuthHeader
:> StreamGet NoFraming OctetStream
(Headers '[DataLengthHeader] (SourceIO B.ByteString))
type CheckPresentAPI
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] CheckPresentResult
type RemoveAPI result
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] result
type RemoveBeforeAPI
= KeyParam
:> CU Required
:> BypassUUIDs
:> QueryParam' '[Required] "timestamp" Timestamp
:> IsSecure
:> AuthHeader
:> Post '[JSON] RemoveResultPlus
type GetTimestampAPI
= CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] GetTimestampResult
type PutAPI result
= DataLengthHeaderRequired
:> KeyParam
:> CU Required
:> BypassUUIDs
:> AssociatedFileParam
:> OffsetParam
:> StreamBody NoFraming OctetStream (SourceIO B.ByteString)
:> IsSecure
:> AuthHeader
:> Post '[JSON] result
type PutOffsetAPI result
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] result
type LockContentAPI
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] LockResult
type KeepLockedAPI
= LockIDParam
:> CU Optional
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Header "Connection" ConnectionKeepAlive
:> Header "Keep-Alive" KeepAlive
:> StreamBody NewlineFraming JSON (SourceIO UnlockRequest)
:> Post '[JSON] LockResult
type SU = Capture "serveruuid" (B64UUID ServerSide)
type CU req = QueryParam' '[req] "clientuuid" (B64UUID ClientSide)
type BypassUUIDs = QueryParams "bypass" (B64UUID Bypass)
type CaptureKey = Capture "key" B64Key
type KeyParam = QueryParam' '[Required] "key" B64Key
type AssociatedFileParam = QueryParam "associatedfile" B64FilePath
type OffsetParam = QueryParam "offset" Offset
type DataLengthHeader = Header DataLengthHeader' DataLength
type DataLengthHeaderRequired = Header' '[Required] DataLengthHeader' DataLength
type DataLengthHeader' = "X-git-annex-data-length"
type LockIDParam = QueryParam' '[Required] "lockid" LockID
type AuthHeader = Header "Authorization" Auth
type PV3 = Capture "v3" V3
type PV2 = Capture "v2" V2
type PV1 = Capture "v1" V1
type PV0 = Capture "v0" V0

535
P2P/Http/Client.hs Normal file
View file

@ -0,0 +1,535 @@
{- P2P protocol over HTTP, client
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds, TypeApplications #-}
{-# LANGUAGE CPP #-}
module P2P.Http.Client (
module P2P.Http.Client,
module P2P.Http.Types,
Validity(..),
) where
import Types
import P2P.Http.Types
import P2P.Protocol hiding (Offset, Bypass, auth, FileSize)
import Utility.Metered
import Utility.FileSize
import Types.NumCopies
#ifdef WITH_SERVANT
import qualified Annex
import Annex.UUID
import Annex.Url
import Types.Remote
import P2P.Http
import P2P.Http.Url
import Annex.Common
import Annex.Concurrent
import Utility.Url (BasicAuth(..))
import Utility.HumanTime
import qualified Git.Credential as Git
import Servant hiding (BasicAuthData(..))
import Servant.Client.Streaming
import qualified Servant.Types.SourceT as S
import Network.HTTP.Types.Status
import Network.HTTP.Client
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy.Internal as LI
import qualified Data.Map as M
import Data.Time.Clock.POSIX
import Control.Concurrent.STM
import Control.Concurrent.Async
import Control.Concurrent
import System.IO.Unsafe
#endif
import qualified Data.ByteString.Lazy as L
type ClientAction a
#ifdef WITH_SERVANT
= ClientEnv
-> ProtocolVersion
-> B64UUID ServerSide
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Maybe Auth
-> Annex (Either ClientError a)
#else
= ()
#endif
p2pHttpClient
:: Remote
-> (String -> Annex a)
-> ClientAction a
-> Annex a
p2pHttpClient rmt fallback clientaction =
p2pHttpClientVersions (const True) rmt fallback clientaction >>= \case
Just res -> return res
Nothing -> fallback "git-annex HTTP API server is missing an endpoint"
p2pHttpClientVersions
:: (ProtocolVersion -> Bool)
-> Remote
-> (String -> Annex a)
-> ClientAction a
-> Annex (Maybe a)
#ifdef WITH_SERVANT
p2pHttpClientVersions allowedversion rmt fallback clientaction =
case p2pHttpBaseUrl <$> remoteAnnexP2PHttpUrl (gitconfig rmt) of
Nothing -> error "internal"
Just baseurl -> do
mgr <- httpManager <$> getUrlOptions
let clientenv = mkClientEnv mgr baseurl
ccv <- Annex.getRead Annex.gitcredentialcache
Git.CredentialCache cc <- liftIO $ atomically $
readTMVar ccv
case M.lookup (Git.CredentialBaseURL credentialbaseurl) cc of
Nothing -> go clientenv Nothing False Nothing versions
Just cred -> go clientenv (Just cred) True (credauth cred) versions
where
versions = filter allowedversion allProtocolVersions
go clientenv mcred credcached mauth (v:vs) = do
myuuid <- getUUID
res <- clientaction clientenv v
(B64UUID (uuid rmt))
(B64UUID myuuid)
[]
mauth
case res of
Right resp -> do
unless credcached $ cachecred mcred
return (Just resp)
Left (FailureResponse _ resp)
| statusCode (responseStatusCode resp) == 404 && not (null vs) ->
go clientenv mcred credcached mauth vs
| statusCode (responseStatusCode resp) == 401 ->
case mcred of
Nothing -> authrequired clientenv (v:vs)
Just cred -> do
inRepo $ Git.rejectUrlCredential cred
Just <$> fallback (showstatuscode resp)
| otherwise -> Just <$> fallback (showstatuscode resp)
Left (ConnectionError ex) -> case fromException ex of
Just (HttpExceptionRequest _ (ConnectionFailure err)) -> Just <$> fallback
("unable to connect to HTTP server: " ++ show err)
_ -> Just <$> fallback (show ex)
Left clienterror -> Just <$> fallback
("git-annex HTTP API server returned an unexpected response: " ++ show clienterror)
go _ _ _ _ [] = return Nothing
authrequired clientenv vs = do
cred <- prompt $
inRepo $ Git.getUrlCredential credentialbaseurl
go clientenv (Just cred) False (credauth cred) vs
showstatuscode resp =
show (statusCode (responseStatusCode resp))
++ " " ++
decodeBS (statusMessage (responseStatusCode resp))
credentialbaseurl = case p2pHttpUrlString <$> remoteAnnexP2PHttpUrl (gitconfig rmt) of
Nothing -> error "internal"
Just url -> url
credauth cred = do
ba <- Git.credentialBasicAuth cred
return $ Auth
(encodeBS (basicAuthUser ba))
(encodeBS (basicAuthPassword ba))
cachecred mcred = case mcred of
Just cred -> do
inRepo $ Git.approveUrlCredential cred
ccv <- Annex.getRead Annex.gitcredentialcache
liftIO $ atomically $ do
Git.CredentialCache cc <- takeTMVar ccv
putTMVar ccv $ Git.CredentialCache $
M.insert (Git.CredentialBaseURL credentialbaseurl) cred cc
Nothing -> noop
#else
p2pHttpClient _rmt fallback () = fallback
"This remote uses an annex+http url, but this version of git-annex is not build with support for that."
#endif
clientGet
:: Key
-> AssociatedFile
-> (L.ByteString -> IO BytesProcessed)
-- ^ Must consume the entire ByteString before returning its
-- total size.
-> Maybe FileSize
-- ^ Size of existing file, when resuming.
-> ClientAction Validity
#ifdef WITH_SERVANT
clientGet k af consumer startsz clientenv (ProtocolVersion ver) su cu bypass auth = liftIO $ do
let offset = fmap (Offset . fromIntegral) startsz
withClientM (cli (B64Key k) cu bypass baf offset auth) clientenv $ \case
Left err -> return (Left err)
Right respheaders -> do
b <- S.unSourceT (getResponse respheaders) gather
BytesProcessed len <- consumer b
let DataLength dl = case lookupResponseHeader @DataLengthHeader' respheaders of
Header hdr -> hdr
_ -> error "missing data length header"
return $ Right $
if dl == len then Valid else Invalid
where
cli =case ver of
3 -> v3 su V3
2 -> v2 su V2
1 -> v1 su V1
0 -> v0 su V0
_ -> error "unsupported protocol version"
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
gather = unsafeInterleaveIO . gather'
gather' S.Stop = return LI.Empty
gather' (S.Error err) = giveup err
gather' (S.Skip s) = gather' s
gather' (S.Effect ms) = ms >>= gather'
gather' (S.Yield v s) = LI.Chunk v <$> unsafeInterleaveIO (gather' s)
baf = associatedFileToB64FilePath af
#else
clientGet _ _ _ _ = ()
#endif
clientCheckPresent :: Key -> ClientAction Bool
#ifdef WITH_SERVANT
clientCheckPresent key clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli su (B64Key key) cu bypass auth) clientenv $ \case
Left err -> return (Left err)
Right (CheckPresentResult res) -> return (Right res)
where
cli = case ver of
3 -> flip v3 V3
2 -> flip v2 V2
1 -> flip v1 V1
0 -> flip v0 V0
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientCheckPresent _ = ()
#endif
-- Similar to P2P.Protocol.remove.
clientRemoveWithProof
:: Maybe SafeDropProof
-> Key
-> Annex RemoveResultPlus
-> Remote
-> Annex RemoveResultPlus
clientRemoveWithProof proof k unabletoremove remote =
case safeDropProofEndTime =<< proof of
Nothing -> removeanytime
Just endtime -> removebefore endtime
where
removeanytime = p2pHttpClient remote giveup (clientRemove k)
removebefore endtime =
p2pHttpClientVersions useversion remote giveup clientGetTimestamp >>= \case
Just (GetTimestampResult (Timestamp remotetime)) ->
removebefore' endtime remotetime
-- Peer is too old to support REMOVE-BEFORE.
Nothing -> removeanytime
removebefore' endtime remotetime =
canRemoveBefore endtime remotetime (liftIO getPOSIXTime) >>= \case
Just remoteendtime -> p2pHttpClient remote giveup $
clientRemoveBefore k (Timestamp remoteendtime)
Nothing -> unabletoremove
useversion v = v >= ProtocolVersion 3
clientRemove :: Key -> ClientAction RemoveResultPlus
#ifdef WITH_SERVANT
clientRemove k clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM cli clientenv return
where
bk = B64Key k
cli = case ver of
3 -> v3 su V3 bk cu bypass auth
2 -> v2 su V2 bk cu bypass auth
1 -> plus <$> v1 su V1 bk cu bypass auth
0 -> plus <$> v0 su V0 bk cu bypass auth
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientRemove _ = ()
#endif
clientRemoveBefore
:: Key
-> Timestamp
-> ClientAction RemoveResultPlus
#ifdef WITH_SERVANT
clientRemoveBefore k ts clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli su (B64Key k) cu bypass ts auth) clientenv return
where
cli = case ver of
3 -> flip v3 V3
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> _ = client p2pHttpAPI
#else
clientRemoveBefore _ _ = ()
#endif
clientGetTimestamp :: ClientAction GetTimestampResult
#ifdef WITH_SERVANT
clientGetTimestamp clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli su cu bypass auth) clientenv return
where
cli = case ver of
3 -> flip v3 V3
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
v3 :<|> _ = client p2pHttpAPI
#else
clientGetTimestamp = ()
#endif
clientPut
:: MeterUpdate
-> Key
-> Maybe Offset
-> AssociatedFile
-> FilePath
-> FileSize
-> Annex Bool
-- ^ Called after sending the file to check if it's valid.
-> ClientAction PutResultPlus
#ifdef WITH_SERVANT
clientPut meterupdate k moffset af contentfile contentfilesize validitycheck clientenv (ProtocolVersion ver) su cu bypass auth = do
checkv <- liftIO newEmptyTMVarIO
checkresultv <- liftIO newEmptyTMVarIO
let checker = do
liftIO $ atomically $ takeTMVar checkv
validitycheck >>= liftIO . atomically . putTMVar checkresultv
checkerthread <- liftIO . async =<< forkState checker
v <- liftIO $ withBinaryFile contentfile ReadMode $ \h -> do
when (offset /= 0) $
hSeek h AbsoluteSeek offset
withClientM (cli (stream h checkv checkresultv)) clientenv return
case v of
Left err -> do
void $ liftIO $ atomically $ tryPutTMVar checkv ()
join $ liftIO (wait checkerthread)
return (Left err)
Right res -> do
join $ liftIO (wait checkerthread)
return (Right res)
where
stream h checkv checkresultv = S.SourceT $ \a -> do
bl <- hGetContentsMetered h meterupdate
v <- newMVar (0, filter (not . B.null) (L.toChunks bl))
a (go v)
where
go v = S.fromActionStep B.null $ modifyMVar v $ \case
(n, (b:[])) -> do
let !n' = n + B.length b
ifM (checkvalid n')
( return ((n', []), b)
-- The key's content is invalid, but
-- the amount of data is the same as
-- the DataLengthHeader indicates.
-- Truncate the stream by one byte to
-- indicate to the server that it's
-- not valid.
, return
( (n' - 1, [])
, B.take (B.length b - 1) b
)
)
(n, []) -> do
void $ checkvalid n
return ((n, []), mempty)
(n, (b:bs)) ->
let !n' = n + B.length b
in return ((n', bs), b)
checkvalid n = do
void $ liftIO $ atomically $ tryPutTMVar checkv ()
valid <- liftIO $ atomically $ readTMVar checkresultv
if not valid
then return (n /= fromIntegral nlen)
else return True
baf = case af of
AssociatedFile Nothing -> Nothing
AssociatedFile (Just f) -> Just (B64FilePath f)
len = DataLength nlen
nlen = contentfilesize - offset
offset = case moffset of
Nothing -> 0
Just (Offset o) -> fromIntegral o
bk = B64Key k
cli src = case ver of
3 -> v3 su V3 len bk cu bypass baf moffset src auth
2 -> v2 su V2 len bk cu bypass baf moffset src auth
1 -> plus <$> v1 su V1 len bk cu bypass baf moffset src auth
0 -> plus <$> v0 su V0 len bk cu bypass baf moffset src auth
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientPut _ _ _ _ _ _ _ = ()
#endif
clientPutOffset
:: Key
-> ClientAction PutOffsetResultPlus
#ifdef WITH_SERVANT
clientPutOffset k clientenv (ProtocolVersion ver) su cu bypass auth
| ver == 0 = return (Right (PutOffsetResultPlus (Offset 0)))
| otherwise = liftIO $ withClientM cli clientenv return
where
bk = B64Key k
cli = case ver of
3 -> v3 su V3 bk cu bypass auth
2 -> v2 su V2 bk cu bypass auth
1 -> plus <$> v1 su V1 bk cu bypass auth
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> _ = client p2pHttpAPI
#else
clientPutOffset _ = ()
#endif
clientLockContent
:: Key
-> ClientAction LockResult
#ifdef WITH_SERVANT
clientLockContent k clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli (B64Key k) cu bypass auth) clientenv return
where
cli = case ver of
3 -> v3 su V3
2 -> v2 su V2
1 -> v1 su V1
0 -> v0 su V0
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientLockContent _ = ()
#endif
clientKeepLocked
:: LockID
-> UUID
-> a
-> (VerifiedCopy -> Annex a)
-- ^ Callback is run only after successfully connecting to the http
-- server. The lock will remain held until the callback returns,
-- and then will be dropped.
-> ClientAction a
#ifdef WITH_SERVANT
clientKeepLocked lckid remoteuuid unablelock callback clientenv (ProtocolVersion ver) su cu bypass auth = do
readyv <- liftIO newEmptyTMVarIO
keeplocked <- liftIO newEmptyTMVarIO
let cli' = cli lckid (Just cu) bypass auth
(Just connectionKeepAlive) (Just keepAlive)
(S.fromStepT (unlocksender readyv keeplocked))
starttime <- liftIO getPOSIXTime
tid <- liftIO $ async $ withClientM cli' clientenv $ \case
Right (LockResult _ _) ->
atomically $ writeTMVar readyv (Right False)
Left err ->
atomically $ writeTMVar readyv (Left err)
let releaselock = liftIO $ do
atomically $ putTMVar keeplocked False
wait tid
liftIO (atomically $ takeTMVar readyv) >>= \case
Left err -> do
liftIO $ wait tid
return (Left err)
Right False -> do
liftIO $ wait tid
return (Right unablelock)
Right True -> do
let checker = return $ Left $ starttime + retentionduration
Right
<$> withVerifiedCopy LockedCopy remoteuuid checker callback
`finally` releaselock
where
retentionduration = fromIntegral $
durationSeconds p2pDefaultLockContentRetentionDuration
unlocksender readyv keeplocked =
S.Yield (UnlockRequest False) $ S.Effect $ do
return $ S.Effect $ do
liftIO $ atomically $ void $
tryPutTMVar readyv (Right True)
stilllocked <- liftIO $ atomically $
takeTMVar keeplocked
return $ if stilllocked
then unlocksender readyv keeplocked
else S.Yield (UnlockRequest True) S.Stop
cli = case ver of
3 -> v3 su V3
2 -> v2 su V2
1 -> v1 su V1
0 -> v0 su V0
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientKeepLocked _ _ _ _ = ()
#endif

478
P2P/Http/Server.hs Normal file
View file

@ -0,0 +1,478 @@
{- P2P protocol over HTTP, server
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module P2P.Http.Server (
module P2P.Http,
module P2P.Http.Server,
module P2P.Http.Types,
module P2P.Http.State,
) where
import Annex.Common
import P2P.Http
import P2P.Http.Types
import P2P.Http.State
import P2P.Protocol hiding (Offset, Bypass, auth)
import P2P.IO
import P2P.Annex
import Annex.WorkerPool
import Types.WorkerPool
import Types.Direction
import Utility.Metered
import Servant
import qualified Servant.Types.SourceT as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Control.Concurrent.STM
import Control.Concurrent.Async
import Control.Concurrent
import System.IO.Unsafe
import Data.Either
p2pHttpApp :: P2PHttpServerState -> Application
p2pHttpApp = serve p2pHttpAPI . serveP2pHttp
serveP2pHttp :: P2PHttpServerState -> Server P2PHttpAPI
serveP2pHttp st
= serveGet st
:<|> serveGet st
:<|> serveGet st
:<|> serveGet st
:<|> serveCheckPresent st
:<|> serveCheckPresent st
:<|> serveCheckPresent st
:<|> serveCheckPresent st
:<|> serveRemove st id
:<|> serveRemove st id
:<|> serveRemove st dePlus
:<|> serveRemove st dePlus
:<|> serveRemoveBefore st
:<|> serveGetTimestamp st
:<|> servePut st id
:<|> servePut st id
:<|> servePut st dePlus
:<|> servePut st dePlus
:<|> servePutOffset st id
:<|> servePutOffset st id
:<|> servePutOffset st dePlus
:<|> serveLockContent st
:<|> serveLockContent st
:<|> serveLockContent st
:<|> serveLockContent st
:<|> serveKeepLocked st
:<|> serveKeepLocked st
:<|> serveKeepLocked st
:<|> serveKeepLocked st
:<|> serveGetGeneric st
serveGetGeneric
:: P2PHttpServerState
-> B64UUID ServerSide
-> B64Key
-> Maybe (B64UUID ClientSide)
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler (Headers '[DataLengthHeader] (S.SourceT IO B.ByteString))
serveGetGeneric st su@(B64UUID u) k mcu bypass =
-- Use V0 because it does not alter the returned data to indicate
-- Invalid content.
serveGet st su V0 k (fromMaybe scu mcu) bypass Nothing Nothing
where
-- Reuse server UUID as client UUID.
scu = B64UUID u :: B64UUID ClientSide
serveGet
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Maybe B64FilePath
-> Maybe Offset
-> IsSecure
-> Maybe Auth
-> Handler (Headers '[DataLengthHeader] (S.SourceT IO B.ByteString))
serveGet st su apiver (B64Key k) cu bypass baf startat sec auth = do
conn <- getP2PConnection apiver st cu su bypass sec auth ReadAction id
bsv <- liftIO newEmptyTMVarIO
endv <- liftIO newEmptyTMVarIO
validityv <- liftIO newEmptyTMVarIO
finalv <- liftIO newEmptyTMVarIO
annexworker <- liftIO $ async $ inAnnexWorker st $ do
let storer _offset len = sendContentWith $ \bs -> liftIO $ do
atomically $ putTMVar bsv (len, bs)
atomically $ takeTMVar endv
signalFullyConsumedByteString $
connOhdl $ serverP2PConnection conn
return $ \v -> do
liftIO $ atomically $ putTMVar validityv v
return True
enteringStage (TransferStage Upload) $
runFullProto (clientRunState conn) (clientP2PConnection conn) $
void $ receiveContent Nothing nullMeterUpdate
sizer storer getreq
void $ liftIO $ forkIO $ waitfinal endv finalv conn annexworker
(Len len, bs) <- liftIO $ atomically $ takeTMVar bsv
bv <- liftIO $ newMVar (filter (not . B.null) (L.toChunks bs))
szv <- liftIO $ newMVar 0
let streamer = S.SourceT $ \s -> s =<< return
(stream (bv, szv, len, endv, validityv, finalv))
return $ addHeader (DataLength len) streamer
where
stream (bv, szv, len, endv, validityv, finalv) =
S.fromActionStep B.null $
modifyMVar bv $ nextchunk szv $
checkvalidity szv len endv validityv finalv
nextchunk szv checkvalid (b:[]) = do
updateszv szv b
ifM checkvalid
( return ([], b)
-- The key's content is invalid, but
-- the amount of data is the same as the
-- DataLengthHeader indicated. Truncate
-- the response by one byte to indicate
-- to the client that it's not valid.
, return ([], B.take (B.length b - 1) b)
)
nextchunk szv _checkvalid (b:bs) = do
updateszv szv b
return (bs, b)
nextchunk _szv checkvalid [] = do
void checkvalid
-- Result ignored because 0 bytes of data are sent,
-- so even if the key is invalid, if that's the
-- amount of data that the DataLengthHeader indicates,
-- we've successfully served an empty key.
return ([], mempty)
updateszv szv b = modifyMVar szv $ \sz ->
let !sz' = sz + fromIntegral (B.length b)
in return (sz', ())
-- Returns False when the key's content is invalid, but the
-- amount of data sent was the same as indicated by the
-- DataLengthHeader.
checkvalidity szv len endv validityv finalv =
ifM (atomically $ isEmptyTMVar endv)
( do
atomically $ putTMVar endv ()
validity <- atomically $ takeTMVar validityv
sz <- takeMVar szv
atomically $ putTMVar finalv ()
atomically $ putTMVar endv ()
return $ case validity of
Nothing -> True
Just Valid -> True
Just Invalid -> sz /= len
, pure True
)
waitfinal endv finalv conn annexworker = do
-- Wait for everything to be transferred before
-- stopping the annexworker. The finalv will usually
-- be written to at the end. If the client disconnects
-- early that does not happen, so catch STM exception.
alltransferred <- isRight
<$> tryNonAsync (liftIO $ atomically $ takeTMVar finalv)
-- Make sure the annexworker is not left blocked on endv
-- if the client disconnected early.
void $ liftIO $ atomically $ tryPutTMVar endv ()
void $ tryNonAsync $ if alltransferred
then releaseP2PConnection conn
else closeP2PConnection conn
void $ tryNonAsync $ wait annexworker
sizer = pure $ Len $ case startat of
Just (Offset o) -> fromIntegral o
Nothing -> 0
getreq offset = P2P.Protocol.GET offset af k
af = ProtoAssociatedFile $ b64FilePathToAssociatedFile baf
serveCheckPresent
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler CheckPresentResult
serveCheckPresent st su apiver (B64Key k) cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth ReadAction id
$ \conn -> liftIO $ proxyClientNetProto conn $ checkPresent k
case res of
Right b -> return (CheckPresentResult b)
Left err -> throwError $ err500 { errBody = encodeBL err }
serveRemove
:: APIVersion v
=> P2PHttpServerState
-> (RemoveResultPlus -> t)
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler t
serveRemove st resultmangle su apiver (B64Key k) cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth RemoveAction id
$ \conn ->
liftIO $ proxyClientNetProto conn $ remove Nothing k
case res of
(Right b, plusuuids) -> return $ resultmangle $
RemoveResultPlus b (map B64UUID (fromMaybe [] plusuuids))
(Left err, _) -> throwError $
err500 { errBody = encodeBL err }
serveRemoveBefore
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Timestamp
-> IsSecure
-> Maybe Auth
-> Handler RemoveResultPlus
serveRemoveBefore st su apiver (B64Key k) cu bypass (Timestamp ts) sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth RemoveAction id
$ \conn ->
liftIO $ proxyClientNetProto conn $
removeBeforeRemoteEndTime ts k
case res of
(Right b, plusuuids) -> return $
RemoveResultPlus b (map B64UUID (fromMaybe [] plusuuids))
(Left err, _) -> throwError $
err500 { errBody = encodeBL err }
serveGetTimestamp
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler GetTimestampResult
serveGetTimestamp st su apiver cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth ReadAction id
$ \conn ->
liftIO $ proxyClientNetProto conn getTimestamp
case res of
Right ts -> return $ GetTimestampResult (Timestamp ts)
Left err -> throwError $
err500 { errBody = encodeBL err }
servePut
:: APIVersion v
=> P2PHttpServerState
-> (PutResultPlus -> t)
-> B64UUID ServerSide
-> v
-> DataLength
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Maybe B64FilePath
-> Maybe Offset
-> S.SourceT IO B.ByteString
-> IsSecure
-> Maybe Auth
-> Handler t
servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf moffset stream sec auth = do
validityv <- liftIO newEmptyTMVarIO
let validitycheck = local $ runValidityCheck $
liftIO $ atomically $ readTMVar validityv
tooshortv <- liftIO newEmptyTMVarIO
content <- liftIO $ S.unSourceT stream (gather validityv tooshortv)
res <- withP2PConnection' apiver st cu su bypass sec auth WriteAction
(\cst -> cst { connectionWaitVar = False }) $ \conn ->
liftIO (protoaction conn content validitycheck)
`finally` checktooshort conn tooshortv
case res of
Right (Right (Just plusuuids)) -> return $ resultmangle $
PutResultPlus True (map B64UUID plusuuids)
Right (Right Nothing) -> return $ resultmangle $
PutResultPlus False []
Right (Left protofail) -> throwError $
err500 { errBody = encodeBL (describeProtoFailure protofail) }
Left err -> throwError $
err500 { errBody = encodeBL (show err) }
where
protoaction conn content validitycheck = inAnnexWorker st $
enteringStage (TransferStage Download) $
runFullProto (clientRunState conn) (clientP2PConnection conn) $
protoaction' content validitycheck
protoaction' content validitycheck = put' k af $ \offset' ->
let offsetdelta = offset' - offset
in case compare offset' offset of
EQ -> sendContent' nullMeterUpdate (Len len)
content validitycheck
GT -> sendContent' nullMeterUpdate
(Len (len - fromIntegral offsetdelta))
(L.drop (fromIntegral offsetdelta) content)
validitycheck
LT -> sendContent' nullMeterUpdate
(Len len)
content
(validitycheck >>= \_ -> return Invalid)
offset = case moffset of
Just (Offset o) -> o
Nothing -> 0
af = b64FilePathToAssociatedFile baf
-- Streams the ByteString from the client. Avoids returning a longer
-- than expected ByteString by truncating to the expected length.
-- Returns a shorter than expected ByteString when the data is not
-- valid.
gather validityv tooshortv = unsafeInterleaveIO . go 0
where
go n S.Stop = do
atomically $ do
writeTMVar validityv $
if n == len then Valid else Invalid
writeTMVar tooshortv (n /= len)
return LI.Empty
go n (S.Error _err) = do
atomically $ do
writeTMVar validityv Invalid
writeTMVar tooshortv (n /= len)
return LI.Empty
go n (S.Skip s) = go n s
go n (S.Effect ms) = ms >>= go n
go n (S.Yield v s) =
let !n' = n + fromIntegral (B.length v)
in if n' > len
then do
atomically $ do
writeTMVar validityv Invalid
writeTMVar tooshortv True
return $ LI.Chunk
(B.take (fromIntegral (len - n')) v)
LI.Empty
else LI.Chunk v <$> unsafeInterleaveIO (go n' s)
-- The connection can no longer be used when too short a DATA has
-- been written to it.
checktooshort conn tooshortv =
liftIO $ whenM (atomically $ fromMaybe True <$> tryTakeTMVar tooshortv) $
closeP2PConnection conn
servePutOffset
:: APIVersion v
=> P2PHttpServerState
-> (PutOffsetResultPlus -> t)
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler t
servePutOffset st resultmangle su apiver (B64Key k) cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth WriteAction
(\cst -> cst { connectionWaitVar = False }) $ \conn ->
liftIO $ proxyClientNetProto conn $ getPutOffset k af
case res of
Right offset -> return $ resultmangle $
PutOffsetResultPlus (Offset offset)
Left plusuuids -> return $ resultmangle $
PutOffsetResultAlreadyHavePlus (map B64UUID plusuuids)
where
af = AssociatedFile Nothing
serveLockContent
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler LockResult
serveLockContent st su apiver (B64Key k) cu bypass sec auth = do
conn <- getP2PConnection apiver st cu su bypass sec auth WriteAction id
let lock = do
lockresv <- newEmptyTMVarIO
unlockv <- newEmptyTMVarIO
annexworker <- async $ inAnnexWorker st $ do
lockres <- runFullProto (clientRunState conn) (clientP2PConnection conn) $ do
net $ sendMessage (LOCKCONTENT k)
checkSuccess
liftIO $ atomically $ putTMVar lockresv lockres
liftIO $ atomically $ takeTMVar unlockv
void $ runFullProto (clientRunState conn) (clientP2PConnection conn) $ do
net $ sendMessage UNLOCKCONTENT
atomically (takeTMVar lockresv) >>= \case
Right True -> return (Just (annexworker, unlockv))
_ -> return Nothing
let unlock (annexworker, unlockv) = do
atomically $ putTMVar unlockv ()
void $ wait annexworker
releaseP2PConnection conn
liftIO $ mkLocker lock unlock >>= \case
Just (locker, lockid) -> do
liftIO $ storeLock lockid locker st
return $ LockResult True (Just lockid)
Nothing -> return $ LockResult False Nothing
serveKeepLocked
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> LockID
-> Maybe (B64UUID ClientSide)
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Maybe ConnectionKeepAlive
-> Maybe KeepAlive
-> S.SourceT IO UnlockRequest
-> Handler LockResult
serveKeepLocked st _su _apiver lckid _cu _bypass sec auth _ _ unlockrequeststream = do
checkAuthActionClass st sec auth WriteAction $ \_ -> do
liftIO $ keepingLocked lckid st
_ <- liftIO $ S.unSourceT unlockrequeststream go
return (LockResult False Nothing)
where
go S.Stop = dropLock lckid st
go (S.Error _err) = dropLock lckid st
go (S.Skip s) = go s
go (S.Effect ms) = ms >>= go
go (S.Yield (UnlockRequest False) s) = go s
go (S.Yield (UnlockRequest True) _) = dropLock lckid st

642
P2P/Http/State.hs Normal file
View file

@ -0,0 +1,642 @@
{- P2P protocol over HTTP, server state
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
module P2P.Http.State where
import Annex.Common
import qualified Annex
import P2P.Http.Types
import qualified P2P.Protocol as P2P
import qualified P2P.IO as P2P
import P2P.IO
import P2P.Annex
import Annex.UUID
import Types.NumCopies
import Types.WorkerPool
import Annex.WorkerPool
import Annex.BranchState
import Types.Cluster
import CmdLine.Action (startConcurrency)
import Utility.ThreadScheduler
import Utility.HumanTime
import Logs.Proxy
import Annex.Proxy
import Annex.Cluster
import qualified P2P.Proxy as Proxy
import qualified Types.Remote as Remote
import Servant
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import Control.Concurrent.Async
import Control.Concurrent.STM
import Data.Time.Clock.POSIX
data P2PHttpServerState = P2PHttpServerState
{ acquireP2PConnection :: AcquireP2PConnection
, annexWorkerPool :: AnnexWorkerPool
, getServerMode :: GetServerMode
, openLocks :: TMVar (M.Map LockID Locker)
}
type AnnexWorkerPool = TMVar (WorkerPool (Annex.AnnexState, Annex.AnnexRead))
-- Nothing when the server is not allowed to serve any requests.
type GetServerMode = IsSecure -> Maybe Auth -> Maybe P2P.ServerMode
mkP2PHttpServerState :: AcquireP2PConnection -> AnnexWorkerPool -> GetServerMode -> IO P2PHttpServerState
mkP2PHttpServerState acquireconn annexworkerpool getservermode = P2PHttpServerState
<$> pure acquireconn
<*> pure annexworkerpool
<*> pure getservermode
<*> newTMVarIO mempty
data ActionClass = ReadAction | WriteAction | RemoveAction
deriving (Eq)
withP2PConnection
:: APIVersion v
=> v
-> P2PHttpServerState
-> B64UUID ClientSide
-> B64UUID ServerSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (ConnectionParams -> ConnectionParams)
-> (P2PConnectionPair -> Handler (Either ProtoFailure a))
-> Handler a
withP2PConnection apiver st cu su bypass sec auth actionclass fconnparams connaction =
withP2PConnection' apiver st cu su bypass sec auth actionclass fconnparams connaction'
where
connaction' conn = connaction conn >>= \case
Right r -> return r
Left err -> throwError $
err500 { errBody = encodeBL (describeProtoFailure err) }
withP2PConnection'
:: APIVersion v
=> v
-> P2PHttpServerState
-> B64UUID ClientSide
-> B64UUID ServerSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (ConnectionParams -> ConnectionParams)
-> (P2PConnectionPair -> Handler a)
-> Handler a
withP2PConnection' apiver st cu su bypass sec auth actionclass fconnparams connaction = do
conn <- getP2PConnection apiver st cu su bypass sec auth actionclass fconnparams
connaction conn
`finally` liftIO (releaseP2PConnection conn)
getP2PConnection
:: APIVersion v
=> v
-> P2PHttpServerState
-> B64UUID ClientSide
-> B64UUID ServerSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (ConnectionParams -> ConnectionParams)
-> Handler P2PConnectionPair
getP2PConnection apiver st cu su bypass sec auth actionclass fconnparams =
checkAuthActionClass st sec auth actionclass go
where
go servermode = liftIO (acquireP2PConnection st cp) >>= \case
Left (ConnectionFailed err) ->
throwError err502 { errBody = encodeBL err }
Left TooManyConnections ->
throwError err503
Right v -> return v
where
cp = fconnparams $ ConnectionParams
{ connectionProtocolVersion = protocolVersion apiver
, connectionServerUUID = fromB64UUID su
, connectionClientUUID = fromB64UUID cu
, connectionBypass = map fromB64UUID bypass
, connectionServerMode = servermode
, connectionWaitVar = True
}
checkAuthActionClass
:: P2PHttpServerState
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (P2P.ServerMode -> Handler a)
-> Handler a
checkAuthActionClass st sec auth actionclass go =
case (getServerMode st sec auth, actionclass) of
(Just P2P.ServeReadWrite, _) -> go P2P.ServeReadWrite
(Just P2P.ServeAppendOnly, RemoveAction) -> throwError err403
(Just P2P.ServeAppendOnly, _) -> go P2P.ServeAppendOnly
(Just P2P.ServeReadOnly, ReadAction) -> go P2P.ServeReadOnly
(Just P2P.ServeReadOnly, _) -> throwError err403
(Nothing, _) -> throwError basicAuthRequired
basicAuthRequired :: ServerError
basicAuthRequired = err401 { errHeaders = [(h, v)] }
where
h = "WWW-Authenticate"
v = "Basic realm=\"git-annex\", charset=\"UTF-8\""
data ConnectionParams = ConnectionParams
{ connectionProtocolVersion :: P2P.ProtocolVersion
, connectionServerUUID :: UUID
, connectionClientUUID :: UUID
, connectionBypass :: [UUID]
, connectionServerMode :: P2P.ServerMode
, connectionWaitVar :: Bool
}
deriving (Show, Eq, Ord)
data ConnectionProblem
= ConnectionFailed String
| TooManyConnections
deriving (Show, Eq)
proxyClientNetProto :: P2PConnectionPair -> P2P.Proto a -> IO (Either P2P.ProtoFailure a)
proxyClientNetProto conn = runNetProto
(clientRunState conn) (clientP2PConnection conn)
type AcquireP2PConnection
= ConnectionParams
-> IO (Either ConnectionProblem P2PConnectionPair)
withP2PConnections
:: AnnexWorkerPool
-> ProxyConnectionPoolSize
-> ClusterConcurrency
-> (AcquireP2PConnection -> Annex a)
-> Annex a
withP2PConnections workerpool proxyconnectionpoolsize clusterconcurrency a = do
enableInteractiveBranchAccess
myuuid <- getUUID
myproxies <- M.lookup myuuid <$> getProxies
reqv <- liftIO newEmptyTMVarIO
relv <- liftIO newEmptyTMVarIO
endv <- liftIO newEmptyTMVarIO
proxypool <- liftIO $ newTMVarIO (0, mempty)
asyncservicer <- liftIO $ async $
servicer myuuid myproxies proxypool reqv relv endv
let endit = do
liftIO $ atomically $ putTMVar endv ()
liftIO $ wait asyncservicer
a (acquireconn reqv) `finally` endit
where
acquireconn reqv connparams = do
respvar <- newEmptyTMVarIO
atomically $ putTMVar reqv (connparams, respvar)
atomically $ takeTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv = do
reqrel <- liftIO $
atomically $
(Right <$> takeTMVar reqv)
`orElse`
(Left . Right <$> takeTMVar relv)
`orElse`
(Left . Left <$> takeTMVar endv)
case reqrel of
Right (connparams, respvar) -> do
servicereq myuuid myproxies proxypool relv connparams
>>= atomically . putTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv
Left (Right releaseconn) -> do
releaseconn
servicer myuuid myproxies proxypool reqv relv endv
Left (Left ()) -> return ()
servicereq myuuid myproxies proxypool relv connparams
| connectionServerUUID connparams == myuuid =
localConnection relv connparams workerpool
| otherwise =
atomically (getProxyConnectionPool proxypool connparams) >>= \case
Just conn -> proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool conn
Nothing -> checkcanproxy myproxies proxypool relv connparams
checkcanproxy myproxies proxypool relv connparams =
inAnnexWorker' workerpool
(checkCanProxy' myproxies (connectionServerUUID connparams))
>>= \case
Right (Left reason) -> return $ Left $
ConnectionFailed $
fromMaybe "unknown uuid" reason
Right (Right (Right proxyremote)) -> proxyconnection $
openProxyConnectionToRemote workerpool
(connectionProtocolVersion connparams)
bypass proxyremote
Right (Right (Left clusteruuid)) -> proxyconnection $
openProxyConnectionToCluster workerpool
(connectionProtocolVersion connparams)
bypass clusteruuid clusterconcurrency
Left ex -> return $ Left $
ConnectionFailed $ show ex
where
bypass = P2P.Bypass $ S.fromList $ connectionBypass connparams
proxyconnection openconn = openconn >>= \case
Right conn -> proxyConnection proxyconnectionpoolsize
relv connparams workerpool proxypool conn
Left ex -> return $ Left $
ConnectionFailed $ show ex
data P2PConnectionPair = P2PConnectionPair
{ clientRunState :: RunState
, clientP2PConnection :: P2PConnection
, serverP2PConnection :: P2PConnection
, releaseP2PConnection :: IO ()
-- ^ Releases a P2P connection, which can be reused for other
-- requests.
, closeP2PConnection :: IO ()
-- ^ Closes a P2P connection, which is in a state where it is no
-- longer usable.
}
localConnection
:: TMVar (IO ())
-> ConnectionParams
-> AnnexWorkerPool
-> IO (Either ConnectionProblem P2PConnectionPair)
localConnection relv connparams workerpool =
localP2PConnectionPair connparams relv $ \serverrunst serverconn ->
inAnnexWorker' workerpool $
void $ runFullProto serverrunst serverconn $
P2P.serveOneCommandAuthed
(connectionServerMode connparams)
(connectionServerUUID connparams)
localP2PConnectionPair
:: ConnectionParams
-> TMVar (IO ())
-> (RunState -> P2PConnection -> IO (Either SomeException ()))
-> IO (Either ConnectionProblem P2PConnectionPair)
localP2PConnectionPair connparams relv startworker = do
(clientconn, serverconn) <- mkP2PConnectionPair connparams
("http client", "http server")
clientrunst <- mkClientRunState connparams
serverrunst <- mkServerRunState connparams
asyncworker <- async $
startworker serverrunst serverconn
let releaseconn = atomically $ void $ tryPutTMVar relv $
liftIO $ wait asyncworker
>>= either throwM return
return $ Right $ P2PConnectionPair
{ clientRunState = clientrunst
, clientP2PConnection = clientconn
, serverP2PConnection = serverconn
, releaseP2PConnection = releaseconn
, closeP2PConnection = releaseconn
}
mkP2PConnectionPair
:: ConnectionParams
-> (String, String)
-> IO (P2PConnection, P2PConnection)
mkP2PConnectionPair connparams (n1, n2) = do
hdl1 <- newEmptyTMVarIO
hdl2 <- newEmptyTMVarIO
wait1 <- newEmptyTMVarIO
wait2 <- newEmptyTMVarIO
closed1 <- newEmptyTMVarIO
closed2 <- newEmptyTMVarIO
let h1 = P2PHandleTMVar hdl1
(if connectionWaitVar connparams then Just wait1 else Nothing)
closed1
let h2 = P2PHandleTMVar hdl2
(if connectionWaitVar connparams then Just wait2 else Nothing)
closed2
let clientconn = P2PConnection Nothing
(const True) h2 h1
(ConnIdent (Just n1))
let serverconn = P2PConnection Nothing
(const True) h1 h2
(ConnIdent (Just n2))
return (clientconn, serverconn)
mkServerRunState :: ConnectionParams -> IO RunState
mkServerRunState connparams = do
prototvar <- newTVarIO $ connectionProtocolVersion connparams
mkRunState $ const $ Serving
(connectionClientUUID connparams)
Nothing
prototvar
mkClientRunState :: ConnectionParams -> IO RunState
mkClientRunState connparams = do
prototvar <- newTVarIO $ connectionProtocolVersion connparams
mkRunState $ const $ Client prototvar
proxyConnection
:: ProxyConnectionPoolSize
-> TMVar (IO ())
-> ConnectionParams
-> AnnexWorkerPool
-> TMVar ProxyConnectionPool
-> ProxyConnection
-> IO (Either ConnectionProblem P2PConnectionPair)
proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool proxyconn = do
(clientconn, proxyfromclientconn) <-
mkP2PConnectionPair connparams ("http client", "proxy")
clientrunst <- mkClientRunState connparams
proxyfromclientrunst <- mkClientRunState connparams
asyncworker <- async $
inAnnexWorker' workerpool $ do
proxystate <- liftIO Proxy.mkProxyState
let proxyparams = Proxy.ProxyParams
{ Proxy.proxyMethods = mkProxyMethods
, Proxy.proxyState = proxystate
, Proxy.proxyServerMode = connectionServerMode connparams
, Proxy.proxyClientSide = Proxy.ClientSide proxyfromclientrunst proxyfromclientconn
, Proxy.proxyUUID = proxyConnectionRemoteUUID proxyconn
, Proxy.proxySelector = proxyConnectionSelector proxyconn
, Proxy.proxyConcurrencyConfig = proxyConnectionConcurrency proxyconn
, Proxy.proxyClientProtocolVersion = connectionProtocolVersion connparams
}
let proxy mrequestmessage = case mrequestmessage of
Just requestmessage -> do
Proxy.proxyRequest proxydone proxyparams
requestcomplete requestmessage protoerrhandler
Nothing -> return ()
protoerrhandler proxy $
liftIO $ runNetProto proxyfromclientrunst proxyfromclientconn $
P2P.net P2P.receiveMessage
let releaseconn returntopool =
atomically $ void $ tryPutTMVar relv $ do
r <- liftIO $ wait asyncworker
liftIO $ closeConnection proxyfromclientconn
liftIO $ closeConnection clientconn
if returntopool
then liftIO $ do
now <- getPOSIXTime
evicted <- atomically $ putProxyConnectionPool proxypool proxyconnectionpoolsize connparams $
proxyconn { proxyConnectionLastUsed = now }
maybe noop closeproxyconnection evicted
else closeproxyconnection proxyconn
either throwM return r
return $ Right $ P2PConnectionPair
{ clientRunState = clientrunst
, clientP2PConnection = clientconn
, serverP2PConnection = proxyfromclientconn
, releaseP2PConnection = releaseconn True
, closeP2PConnection = releaseconn False
}
where
protoerrhandler cont a = a >>= \case
Left _ -> proxyConnectionCloser proxyconn
Right v -> cont v
proxydone = return ()
requestcomplete () = return ()
closeproxyconnection =
void . inAnnexWorker' workerpool . proxyConnectionCloser
data Locker = Locker
{ lockerThread :: Async ()
, lockerVar :: TMVar Bool
-- ^ Left empty until the thread has taken the lock
-- (or failed to do so), then True while the lock is held,
-- and setting to False causes the lock to be released.
, lockerTimeoutDisable :: TMVar ()
-- ^ Until this is filled, the lock will be subject to timeout.
-- Once filled the lock will remain held until explicitly dropped.
}
mkLocker :: (IO (Maybe a)) -> (a -> IO ()) -> IO (Maybe (Locker, LockID))
mkLocker lock unlock = do
lv <- newEmptyTMVarIO
timeoutdisablev <- newEmptyTMVarIO
let setlocked = putTMVar lv
locktid <- async $ lock >>= \case
Nothing ->
atomically $ setlocked False
Just st -> do
atomically $ setlocked True
atomically $ do
v <- takeTMVar lv
if v
then retry
else setlocked False
unlock st
locksuccess <- atomically $ readTMVar lv
if locksuccess
then do
timeouttid <- async $ do
threadDelaySeconds $ Seconds $ fromIntegral $
durationSeconds p2pDefaultLockContentRetentionDuration
atomically (tryReadTMVar timeoutdisablev) >>= \case
Nothing -> void $ atomically $
writeTMVar lv False
Just () -> noop
tid <- async $ do
wait locktid
cancel timeouttid
lckid <- B64UUID <$> genUUID
return (Just (Locker tid lv timeoutdisablev, lckid))
else do
wait locktid
return Nothing
storeLock :: LockID -> Locker -> P2PHttpServerState -> IO ()
storeLock lckid locker st = atomically $ do
m <- takeTMVar (openLocks st)
let !m' = M.insert lckid locker m
putTMVar (openLocks st) m'
keepingLocked :: LockID -> P2PHttpServerState -> IO ()
keepingLocked lckid st = do
m <- atomically $ readTMVar (openLocks st)
case M.lookup lckid m of
Nothing -> return ()
Just locker ->
atomically $ void $
tryPutTMVar (lockerTimeoutDisable locker) ()
dropLock :: LockID -> P2PHttpServerState -> IO ()
dropLock lckid st = do
v <- atomically $ do
m <- takeTMVar (openLocks st)
let (mlocker, !m') =
M.updateLookupWithKey (\_ _ -> Nothing) lckid m
putTMVar (openLocks st) m'
case mlocker of
Nothing -> return Nothing
-- Signal to the locker's thread that it can
-- release the lock.
Just locker -> do
_ <- swapTMVar (lockerVar locker) False
return (Just locker)
case v of
Nothing -> return ()
Just locker -> wait (lockerThread locker)
getAnnexWorkerPool :: (AnnexWorkerPool -> Annex a) -> Annex a
getAnnexWorkerPool a = startConcurrency transferStages $
Annex.getState Annex.workers >>= \case
Nothing -> giveup "Use -Jn or set annex.jobs to configure the number of worker threads."
Just wp -> a wp
inAnnexWorker :: P2PHttpServerState -> Annex a -> IO (Either SomeException a)
inAnnexWorker st = inAnnexWorker' (annexWorkerPool st)
inAnnexWorker' :: AnnexWorkerPool -> Annex a -> IO (Either SomeException a)
inAnnexWorker' poolv annexaction = do
(workerstrd, workerstage) <- atomically $ waitStartWorkerSlot poolv
resv <- newEmptyTMVarIO
aid <- async $ do
(res, strd) <- Annex.run workerstrd annexaction
atomically $ putTMVar resv res
return strd
atomically $ do
pool <- takeTMVar poolv
let !pool' = addWorkerPool (ActiveWorker aid workerstage) pool
putTMVar poolv pool'
(res, workerstrd') <- waitCatch aid >>= \case
Right strd -> do
r <- atomically $ takeTMVar resv
return (Right r, strd)
Left err -> return (Left err, workerstrd)
atomically $ do
pool <- takeTMVar poolv
let !pool' = deactivateWorker pool aid workerstrd'
putTMVar poolv pool'
return res
data ProxyConnection = ProxyConnection
{ proxyConnectionRemoteUUID :: UUID
, proxyConnectionSelector :: Proxy.ProxySelector
, proxyConnectionCloser :: Annex ()
, proxyConnectionConcurrency :: Proxy.ConcurrencyConfig
, proxyConnectionLastUsed :: POSIXTime
}
instance Show ProxyConnection where
show pc = unwords
[ "ProxyConnection"
, show (proxyConnectionRemoteUUID pc)
, show (proxyConnectionLastUsed pc)
]
openedProxyConnection
:: UUID
-> Proxy.ProxySelector
-> Annex ()
-> Proxy.ConcurrencyConfig
-> IO ProxyConnection
openedProxyConnection u selector closer concurrency = do
now <- getPOSIXTime
return $ ProxyConnection u selector closer concurrency now
openProxyConnectionToRemote
:: AnnexWorkerPool
-> P2P.ProtocolVersion
-> P2P.Bypass
-> Remote
-> IO (Either SomeException ProxyConnection)
openProxyConnectionToRemote workerpool clientmaxversion bypass remote =
inAnnexWorker' workerpool $ do
remoteside <- proxyRemoteSide clientmaxversion bypass remote
concurrencyconfig <- Proxy.noConcurrencyConfig
liftIO $ openedProxyConnection (Remote.uuid remote)
(Proxy.singleProxySelector remoteside)
(Proxy.closeRemoteSide remoteside)
concurrencyconfig
type ClusterConcurrency = Int
openProxyConnectionToCluster
:: AnnexWorkerPool
-> P2P.ProtocolVersion
-> P2P.Bypass
-> ClusterUUID
-> ClusterConcurrency
-> IO (Either SomeException ProxyConnection)
openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid concurrency =
inAnnexWorker' workerpool $ do
(proxyselector, closenodes) <-
clusterProxySelector clusteruuid clientmaxversion bypass
concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency
liftIO $ openedProxyConnection (fromClusterUUID clusteruuid)
proxyselector closenodes concurrencyconfig
type ProxyConnectionPool = (Integer, M.Map ProxyConnectionPoolKey [ProxyConnection])
type ProxyConnectionPoolSize = Integer
-- Returns any older ProxyConnection that was evicted from the pool.
putProxyConnectionPool
:: TMVar ProxyConnectionPool
-> ProxyConnectionPoolSize
-> ConnectionParams
-> ProxyConnection
-> STM (Maybe ProxyConnection)
putProxyConnectionPool proxypool maxsz connparams conn = do
(sz, m) <- takeTMVar proxypool
let ((sz', m'), evicted) = case M.lookup k m of
Nothing -> ((succ sz, M.insert k [conn] m), Nothing)
Just [] -> ((succ sz, M.insert k [conn] m), Nothing)
Just cs -> if sz >= maxsz
then ((sz, M.insert k (conn : dropFromEnd 1 cs) m), lastMaybe cs)
else ((sz, M.insert k (conn : cs) m), Nothing)
let ((sz'', m''), evicted') = if sz' > maxsz
then removeOldestProxyConnectionPool (sz', m')
else ((sz', m'), Nothing)
putTMVar proxypool (sz'', m'')
return (evicted <|> evicted')
where
k = proxyConnectionPoolKey connparams
removeOldestProxyConnectionPool :: ProxyConnectionPool -> (ProxyConnectionPool, Maybe ProxyConnection)
removeOldestProxyConnectionPool (sz, m) =
((pred sz, m'), snd <$> headMaybe l)
where
m' = M.fromListWith (++) $ map (\(k', v) -> (k', [v])) (drop 1 l)
l = sortOn (proxyConnectionLastUsed . snd) $
concatMap (\(k', pl) -> map (k', ) pl) $
M.toList m
getProxyConnectionPool
:: TMVar ProxyConnectionPool
-> ConnectionParams
-> STM (Maybe ProxyConnection)
getProxyConnectionPool proxypool connparams = do
(sz, m) <- takeTMVar proxypool
case M.lookup k m of
Just (c:cs) -> do
putTMVar proxypool (sz-1, M.insert k cs m)
return (Just c)
_ -> do
putTMVar proxypool (sz, m)
return Nothing
where
k = proxyConnectionPoolKey connparams
type ProxyConnectionPoolKey = (UUID, UUID, [UUID], P2P.ProtocolVersion)
proxyConnectionPoolKey :: ConnectionParams -> ProxyConnectionPoolKey
proxyConnectionPoolKey connparams =
( connectionServerUUID connparams
, connectionClientUUID connparams
, connectionBypass connparams
, connectionProtocolVersion connparams
)

398
P2P/Http/Types.hs Normal file
View file

@ -0,0 +1,398 @@
{- P2P protocol over HTTP,
- data types for servant not including the servant API
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE CPP #-}
module P2P.Http.Types where
import Annex.Common
import qualified P2P.Protocol as P2P
import Utility.MonotonicClock
#ifdef WITH_SERVANT
import Servant
import Data.Aeson hiding (Key)
import Text.Read (readMaybe)
#endif
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.ByteString as B
import Codec.Binary.Base64Url as B64
import Data.Char
import Control.DeepSeq
import GHC.Generics (Generic)
data V3 = V3 deriving (Show)
data V2 = V2 deriving (Show)
data V1 = V1 deriving (Show)
data V0 = V0 deriving (Show)
class APIVersion v where
protocolVersion :: v -> P2P.ProtocolVersion
instance APIVersion V3 where protocolVersion _ = P2P.ProtocolVersion 3
instance APIVersion V2 where protocolVersion _ = P2P.ProtocolVersion 2
instance APIVersion V1 where protocolVersion _ = P2P.ProtocolVersion 1
instance APIVersion V0 where protocolVersion _ = P2P.ProtocolVersion 0
-- Keys, UUIDs, and filenames can be base64 encoded since Servant uses
-- Text and so needs UTF-8.
newtype B64Key = B64Key Key
deriving (Show)
newtype B64FilePath = B64FilePath RawFilePath
deriving (Show)
associatedFileToB64FilePath :: AssociatedFile -> Maybe B64FilePath
associatedFileToB64FilePath (AssociatedFile Nothing) = Nothing
associatedFileToB64FilePath (AssociatedFile (Just f)) = Just (B64FilePath f)
b64FilePathToAssociatedFile :: Maybe B64FilePath -> AssociatedFile
b64FilePathToAssociatedFile Nothing = AssociatedFile Nothing
b64FilePathToAssociatedFile (Just (B64FilePath f)) = AssociatedFile (Just f)
newtype B64UUID t = B64UUID { fromB64UUID :: UUID }
deriving (Show, Ord, Eq, Generic, NFData)
encodeB64Text :: B.ByteString -> T.Text
encodeB64Text b = case TE.decodeUtf8' b of
Right t
| (snd <$> B.unsnoc b) == Just closebracket
&& (fst <$> B.uncons b) == Just openbracket ->
b64wrapped
| otherwise -> t
Left _ -> b64wrapped
where
b64wrapped = TE.decodeUtf8Lenient $ "[" <> B64.encode b <> "]"
openbracket = fromIntegral (ord '[')
closebracket = fromIntegral (ord ']')
decodeB64Text :: T.Text -> Either T.Text B.ByteString
decodeB64Text t =
case T.unsnoc t of
Just (t', lastc) | lastc == ']' ->
case T.uncons t' of
Just (firstc, t'') | firstc == '[' ->
case B64.decode (TE.encodeUtf8 t'') of
Right b -> Right b
Left _ -> Left "unable to base64 decode [] wrapped value"
_ -> Right (TE.encodeUtf8 t)
_ -> Right (TE.encodeUtf8 t)
-- Phantom types.
data ClientSide
data ServerSide
data Bypass
data Plus
data Lock
type LockID = B64UUID Lock
newtype DataLength = DataLength Integer
deriving (Show)
newtype CheckPresentResult = CheckPresentResult Bool
deriving (Show)
newtype RemoveResult = RemoveResult Bool
deriving (Show)
data RemoveResultPlus = RemoveResultPlus Bool [B64UUID Plus]
deriving (Show)
newtype GetTimestampResult = GetTimestampResult Timestamp
deriving (Show)
newtype PutResult = PutResult Bool
deriving (Eq, Show)
data PutResultPlus = PutResultPlus Bool [B64UUID Plus]
deriving (Show)
data PutOffsetResult
= PutOffsetResult Offset
| PutOffsetResultAlreadyHave
deriving (Show)
data PutOffsetResultPlus
= PutOffsetResultPlus Offset
| PutOffsetResultAlreadyHavePlus [B64UUID Plus]
deriving (Show, Generic, NFData)
newtype Offset = Offset P2P.Offset
deriving (Show, Generic, NFData)
newtype Timestamp = Timestamp MonotonicTimestamp
deriving (Show)
data LockResult = LockResult Bool (Maybe LockID)
deriving (Show, Generic, NFData)
newtype UnlockRequest = UnlockRequest Bool
deriving (Show, Generic, NFData)
-- Not using servant's built-in basic authentication support,
-- because whether authentication is needed depends on server
-- configuration.
data Auth = Auth B.ByteString B.ByteString
deriving (Show, Generic, NFData, Eq, Ord)
#ifdef WITH_SERVANT
instance ToHttpApiData Auth where
toHeader (Auth u p) = "Basic " <> B64.encode (u <> ":" <> p)
toUrlPiece = TE.decodeUtf8Lenient . toHeader
instance FromHttpApiData Auth where
parseHeader h =
let (b, rest) = B.break (isSpace . chr . fromIntegral) h
in if map toLower (decodeBS b) == "basic"
then case B64.decode (B.dropWhile (isSpace . chr . fromIntegral) rest) of
Right v -> case B.split (fromIntegral (ord ':')) v of
(u:ps) -> Right $
Auth u (B.intercalate ":" ps)
_ -> bad
Left _ -> bad
else bad
where
bad = Left "invalid basic auth header"
parseUrlPiece = parseHeader . encodeBS . T.unpack
newtype ConnectionKeepAlive = ConnectionKeepAlive T.Text
connectionKeepAlive :: ConnectionKeepAlive
connectionKeepAlive = ConnectionKeepAlive "Keep-Alive"
newtype KeepAlive = KeepAlive T.Text
keepAlive :: KeepAlive
keepAlive = KeepAlive "timeout=1200"
instance ToHttpApiData ConnectionKeepAlive where
toUrlPiece (ConnectionKeepAlive t) = t
instance FromHttpApiData ConnectionKeepAlive where
parseUrlPiece = Right . ConnectionKeepAlive
instance ToHttpApiData KeepAlive where
toUrlPiece (KeepAlive t) = t
instance FromHttpApiData KeepAlive where
parseUrlPiece = Right . KeepAlive
instance ToHttpApiData V3 where toUrlPiece _ = "v3"
instance ToHttpApiData V2 where toUrlPiece _ = "v2"
instance ToHttpApiData V1 where toUrlPiece _ = "v1"
instance ToHttpApiData V0 where toUrlPiece _ = "v0"
instance FromHttpApiData V3 where parseUrlPiece = parseAPIVersion V3 "v3"
instance FromHttpApiData V2 where parseUrlPiece = parseAPIVersion V2 "v2"
instance FromHttpApiData V1 where parseUrlPiece = parseAPIVersion V1 "v1"
instance FromHttpApiData V0 where parseUrlPiece = parseAPIVersion V0 "v0"
parseAPIVersion :: v -> T.Text -> T.Text -> Either T.Text v
parseAPIVersion v need t
| t == need = Right v
| otherwise = Left "bad version"
instance ToHttpApiData B64Key where
toUrlPiece (B64Key k) = encodeB64Text (serializeKey' k)
instance FromHttpApiData B64Key where
parseUrlPiece t = case decodeB64Text t of
Right b -> maybe (Left "key parse error") (Right . B64Key)
(deserializeKey' b)
Left err -> Left err
instance ToHttpApiData (B64UUID t) where
toUrlPiece (B64UUID u) = encodeB64Text (fromUUID u)
instance FromHttpApiData (B64UUID t) where
parseUrlPiece t = case decodeB64Text t of
Right b -> case toUUID b of
u@(UUID _) -> Right (B64UUID u)
NoUUID -> Left "empty UUID"
Left err -> Left err
instance ToHttpApiData B64FilePath where
toUrlPiece (B64FilePath f) = encodeB64Text f
instance FromHttpApiData B64FilePath where
parseUrlPiece t = case decodeB64Text t of
Right b -> Right (B64FilePath b)
Left err -> Left err
instance ToHttpApiData Offset where
toUrlPiece (Offset (P2P.Offset n)) = T.pack (show n)
instance FromHttpApiData Offset where
parseUrlPiece t = case readMaybe (T.unpack t) of
Nothing -> Left "offset parse error"
Just n -> Right (Offset (P2P.Offset n))
instance ToHttpApiData Timestamp where
toUrlPiece (Timestamp (MonotonicTimestamp n)) = T.pack (show n)
instance FromHttpApiData Timestamp where
parseUrlPiece t = case readMaybe (T.unpack t) of
Nothing -> Left "timestamp parse error"
Just n -> Right (Timestamp (MonotonicTimestamp n))
instance ToHttpApiData DataLength where
toUrlPiece (DataLength n) = T.pack (show n)
instance FromHttpApiData DataLength where
parseUrlPiece t = case readMaybe (T.unpack t) of
Nothing -> Left "X-git-annex-data-length parse error"
Just n -> Right (DataLength n)
instance ToJSON PutResult where
toJSON (PutResult b) =
object ["stored" .= b]
instance FromJSON PutResult where
parseJSON = withObject "PutResult" $ \v -> PutResult
<$> v .: "stored"
instance ToJSON PutResultPlus where
toJSON (PutResultPlus b us) = object
[ "stored" .= b
, "plusuuids" .= plusList us
]
instance FromJSON PutResultPlus where
parseJSON = withObject "PutResultPlus" $ \v -> PutResultPlus
<$> v .: "stored"
<*> v .: "plusuuids"
instance ToJSON CheckPresentResult where
toJSON (CheckPresentResult b) = object
["present" .= b]
instance FromJSON CheckPresentResult where
parseJSON = withObject "CheckPresentResult" $ \v -> CheckPresentResult
<$> v .: "present"
instance ToJSON RemoveResult where
toJSON (RemoveResult b) = object
["removed" .= b]
instance FromJSON RemoveResult where
parseJSON = withObject "RemoveResult" $ \v -> RemoveResult
<$> v .: "removed"
instance ToJSON RemoveResultPlus where
toJSON (RemoveResultPlus b us) = object
[ "removed" .= b
, "plusuuids" .= plusList us
]
instance FromJSON RemoveResultPlus where
parseJSON = withObject "RemoveResultPlus" $ \v -> RemoveResultPlus
<$> v .: "removed"
<*> v .: "plusuuids"
instance ToJSON GetTimestampResult where
toJSON (GetTimestampResult (Timestamp (MonotonicTimestamp t))) = object
["timestamp" .= t]
instance FromJSON GetTimestampResult where
parseJSON = withObject "GetTimestampResult" $ \v ->
GetTimestampResult . Timestamp . MonotonicTimestamp
<$> v .: "timestamp"
instance ToJSON PutOffsetResult where
toJSON (PutOffsetResult (Offset (P2P.Offset o))) = object
["offset" .= o]
toJSON PutOffsetResultAlreadyHave = object
["alreadyhave" .= True]
instance FromJSON PutOffsetResult where
parseJSON = withObject "PutOffsetResult" $ \v ->
(PutOffsetResult
<$> (Offset . P2P.Offset <$> v .: "offset"))
<|> (mkalreadyhave
<$> (v .: "alreadyhave"))
where
mkalreadyhave :: Bool -> PutOffsetResult
mkalreadyhave _ = PutOffsetResultAlreadyHave
instance ToJSON PutOffsetResultPlus where
toJSON (PutOffsetResultPlus (Offset (P2P.Offset o))) = object
[ "offset" .= o ]
toJSON (PutOffsetResultAlreadyHavePlus us) = object
[ "alreadyhave" .= True
, "plusuuids" .= plusList us
]
instance FromJSON PutOffsetResultPlus where
parseJSON = withObject "PutOffsetResultPlus" $ \v ->
(PutOffsetResultPlus
<$> (Offset . P2P.Offset <$> v .: "offset"))
<|> (mkalreadyhave
<$> (v .: "alreadyhave")
<*> (v .: "plusuuids"))
where
mkalreadyhave :: Bool -> [B64UUID Plus] -> PutOffsetResultPlus
mkalreadyhave _ us = PutOffsetResultAlreadyHavePlus us
instance FromJSON (B64UUID t) where
parseJSON (String t) = case decodeB64Text t of
Right s -> pure (B64UUID (toUUID s))
Left _ -> mempty
parseJSON _ = mempty
instance ToJSON LockResult where
toJSON (LockResult v (Just (B64UUID lck))) = object
[ "locked" .= v
, "lockid" .= encodeB64Text (fromUUID lck)
]
toJSON (LockResult v Nothing) = object
[ "locked" .= v
]
instance FromJSON LockResult where
parseJSON = withObject "LockResult" $ \v -> LockResult
<$> v .: "locked"
<*> v .:? "lockid"
instance ToJSON UnlockRequest where
toJSON (UnlockRequest v) = object
["unlock" .= v]
instance FromJSON UnlockRequest where
parseJSON = withObject "UnlockRequest" $ \v -> UnlockRequest
<$> v .: "unlock"
plusList :: [B64UUID Plus] -> [String]
plusList = map (\(B64UUID u) -> fromUUID u)
class PlusClass plus unplus where
dePlus :: plus -> unplus
plus :: unplus -> plus
instance PlusClass RemoveResultPlus RemoveResult where
dePlus (RemoveResultPlus b _) = RemoveResult b
plus (RemoveResult b) = RemoveResultPlus b mempty
instance PlusClass PutResultPlus PutResult where
dePlus (PutResultPlus b _) = PutResult b
plus (PutResult b) = PutResultPlus b mempty
instance PlusClass PutOffsetResultPlus PutOffsetResult where
dePlus (PutOffsetResultPlus o) = PutOffsetResult o
dePlus (PutOffsetResultAlreadyHavePlus _) = PutOffsetResultAlreadyHave
plus (PutOffsetResult o) = PutOffsetResultPlus o
plus PutOffsetResultAlreadyHave = PutOffsetResultAlreadyHavePlus []
#endif

85
P2P/Http/Url.hs Normal file
View file

@ -0,0 +1,85 @@
{- P2P protocol over HTTP, urls
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module P2P.Http.Url where
import Types.UUID
import Utility.FileSystemEncoding
import Utility.PartialPrelude
import Data.List
import Network.URI
import System.FilePath.Posix as P
import qualified Data.UUID as UUID
#ifdef WITH_SERVANT
import Servant.Client (BaseUrl(..), Scheme(..))
import Text.Read
#endif
defaultP2PHttpProtocolPort :: Int
defaultP2PHttpProtocolPort = 9417 -- Git protocol is 9418
isP2PHttpProtocolUrl :: String -> Bool
isP2PHttpProtocolUrl s =
"annex+http://" `isPrefixOf` s ||
"annex+https://" `isPrefixOf` s
data P2PHttpUrl = P2PHttpUrl
{ p2pHttpUrlString :: String
#ifdef WITH_SERVANT
, p2pHttpBaseUrl :: BaseUrl
#endif
}
deriving (Show)
parseP2PHttpUrl :: String -> Maybe P2PHttpUrl
parseP2PHttpUrl us
| isP2PHttpProtocolUrl us = case parseURI (drop prefixlen us) of
Nothing -> Nothing
Just u ->
#ifdef WITH_SERVANT
case uriScheme u of
"http:" -> mkbaseurl Http u
"https:" -> mkbaseurl Https u
_ -> Nothing
#else
Just $ P2PHttpUrl us
#endif
| otherwise = Nothing
where
prefixlen = length "annex+"
#ifdef WITH_SERVANT
mkbaseurl s u = do
auth <- uriAuthority u
port <- if null (uriPort auth)
then Just defaultP2PHttpProtocolPort
else readMaybe (dropWhile (== ':') (uriPort auth))
return $ P2PHttpUrl us $ BaseUrl
{ baseUrlScheme = s
, baseUrlHost = uriRegName auth
, baseUrlPath = basepath u
, baseUrlPort = port
}
-- The servant server uses urls that start with "/git-annex/",
-- and so the servant client adds that to the base url. So remove
-- it from the url that the user provided. However, it may not be
-- present, eg if some other server is speaking the git-annex
-- protocol. The UUID is also removed from the end of the url.
basepath u = case reverse $ P.splitDirectories (uriPath u) of
("git-annex":"/":rest) -> P.joinPath (reverse rest)
rest -> P.joinPath (reverse rest)
#endif
unavailableP2PHttpUrl :: P2PHttpUrl -> P2PHttpUrl
unavailableP2PHttpUrl p = p
#ifdef WITH_SERVANT
{ p2pHttpBaseUrl = (p2pHttpBaseUrl p) { baseUrlHost = "!dne!" } }
#endif

View file

@ -25,6 +25,7 @@ module P2P.IO
, describeProtoFailure
, runNetProto
, runNet
, signalFullyConsumedByteString
) where
import Common
@ -62,6 +63,7 @@ data ProtoFailure
= ProtoFailureMessage String
| ProtoFailureException SomeException
| ProtoFailureIOError IOError
deriving (Show)
describeProtoFailure :: ProtoFailure -> String
describeProtoFailure (ProtoFailureMessage s) = s
@ -79,7 +81,17 @@ mkRunState mk = do
data P2PHandle
= P2PHandle Handle
| P2PHandleTMVar (TMVar (Either L.ByteString Message)) (TMVar ())
| P2PHandleTMVar
(TMVar (Either L.ByteString Message))
(Maybe (TMVar ()))
(TMVar ())
signalFullyConsumedByteString :: P2PHandle -> IO ()
signalFullyConsumedByteString (P2PHandle _) = return ()
signalFullyConsumedByteString (P2PHandleTMVar _ Nothing _) = return ()
signalFullyConsumedByteString (P2PHandleTMVar _ (Just waitv) closedv) =
atomically $ putTMVar waitv ()
`orElse` readTMVar closedv
data P2PConnection = P2PConnection
{ connRepo :: Maybe Repo
@ -91,6 +103,7 @@ data P2PConnection = P2PConnection
-- Identifier for a connection, only used for debugging.
newtype ConnIdent = ConnIdent (Maybe String)
deriving (Show)
data ClosableConnection conn
= OpenConnection conn
@ -138,7 +151,8 @@ closeConnection conn = do
closehandle (connOhdl conn)
where
closehandle (P2PHandle h) = hClose h
closehandle (P2PHandleTMVar _ _) = return ()
closehandle (P2PHandleTMVar _ _ closedv) =
atomically $ void $ tryPutTMVar closedv ()
-- Serves the protocol on a unix socket.
--
@ -188,11 +202,6 @@ runNetProto runst conn = go
go (Free (Local _)) = return $ Left $
ProtoFailureMessage "unexpected annex operation attempted"
data P2PTMVarException = P2PTMVarException String
deriving (Show)
instance Exception P2PTMVarException
-- Interpreter of the Net part of Proto.
--
-- An interpreter of Proto has to be provided, to handle the rest of Proto
@ -206,18 +215,15 @@ runNet runst conn runner f = case f of
P2PHandle h -> tryNonAsync $ do
hPutStrLn h $ unwords (formatMessage m)
hFlush h
P2PHandleTMVar mv _ ->
ifM (atomically (tryPutTMVar mv (Right m)))
( return $ Right ()
, return $ Left $ toException $
P2PTMVarException "TMVar left full"
)
P2PHandleTMVar mv _ closedv -> tryNonAsync $
atomically $ putTMVar mv (Right m)
`orElse` readTMVar closedv
case v of
Left e -> return $ Left $ ProtoFailureException e
Right () -> runner next
ReceiveMessage next ->
let protoerr = return $ Left $
ProtoFailureMessage "protocol error 1"
ProtoFailureMessage "protocol error"
gotmessage m = do
liftIO $ debugMessage conn "P2P <" m
runner (next (Just m))
@ -230,10 +236,13 @@ runNet runst conn runner f = case f of
Right (Just l) -> case parseMessage l of
Just m -> gotmessage m
Nothing -> runner (next Nothing)
P2PHandleTMVar mv _ ->
liftIO (atomically (takeTMVar mv)) >>= \case
Right m -> gotmessage m
Left _b -> protoerr
P2PHandleTMVar mv _ closedv -> do
let recv = (Just <$> takeTMVar mv)
`orElse` (readTMVar closedv >> return Nothing)
liftIO (atomically recv) >>= \case
Just (Right m) -> gotmessage m
Just (Left _b) -> protoerr
Nothing -> runner (next Nothing)
SendBytes len b p next ->
case connOhdl conn of
P2PHandle h -> do
@ -246,11 +255,16 @@ runNet runst conn runner f = case f of
Right False -> return $ Left $
ProtoFailureMessage "short data write"
Left e -> return $ Left $ ProtoFailureException e
P2PHandleTMVar mv waitv -> do
P2PHandleTMVar mv waitv closedv -> do
liftIO $ atomically $ putTMVar mv (Left b)
-- Wait for the whole bytestring to be
-- processed. Necessary due to lazyiness.
liftIO $ atomically $ takeTMVar waitv
`orElse` readTMVar closedv
-- Wait for the whole bytestring to
-- be processed.
case waitv of
Nothing -> noop
Just v -> liftIO $ atomically $
takeTMVar v
`orElse` readTMVar closedv
runner next
ReceiveBytes len p next ->
case connIhdl conn of
@ -260,11 +274,15 @@ runNet runst conn runner f = case f of
Right b -> runner (next b)
Left e -> return $ Left $
ProtoFailureException e
P2PHandleTMVar mv _ ->
liftIO (atomically (takeTMVar mv)) >>= \case
Left b -> runner (next b)
Right _ -> return $ Left $
ProtoFailureMessage "protocol error 2"
P2PHandleTMVar mv _ closedv -> do
let recv = (Just <$> takeTMVar mv)
`orElse` (readTMVar closedv >> return Nothing)
liftIO (atomically recv) >>= \case
Just (Left b) -> runner (next b)
Just (Right _) -> return $ Left $
ProtoFailureMessage "protocol error"
Nothing -> return $ Left $
ProtoFailureMessage "connection closed"
CheckAuthToken _u t next -> do
let authed = connCheckAuth conn t
runner (next authed)
@ -317,12 +335,16 @@ debugMessage conn prefix m = do
-- Must avoid sending too many bytes as it would confuse the other end.
-- This is easily dealt with by truncating it.
--
-- However, the whole ByteString will be evaluated here, even if
-- the end of it does not get sent.
--
-- If too few bytes are sent, the only option is to give up on this
-- connection. False is returned to indicate this problem.
sendExactly :: Len -> L.ByteString -> Handle -> MeterUpdate -> IO Bool
sendExactly (Len n) b h p = do
sent <- meteredWrite' p (B.hPut h) (L.take (fromIntegral n) b)
return (fromBytesProcessed sent == n)
let (x, y) = L.splitAt (fromIntegral n) b
sent <- meteredWrite' p (B.hPut h) x
L.length y `seq` return (fromBytesProcessed sent == n)
receiveExactly :: Len -> Handle -> MeterUpdate -> IO L.ByteString
receiveExactly (Len n) h p = hGetMetered h (Just n) p

View file

@ -42,12 +42,14 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import Data.Char
import Data.Maybe
import Data.Time.Clock.POSIX
import Control.Applicative
import Control.DeepSeq
import Prelude
newtype Offset = Offset Integer
deriving (Show)
deriving (Show, Eq, NFData, Num, Real, Ord, Enum, Integral)
newtype Len = Len Integer
deriving (Show)
@ -61,6 +63,15 @@ defaultProtocolVersion = ProtocolVersion 0
maxProtocolVersion :: ProtocolVersion
maxProtocolVersion = ProtocolVersion 3
-- In order from newest to oldest.
allProtocolVersions :: [ProtocolVersion]
allProtocolVersions =
[ ProtocolVersion 3
, ProtocolVersion 2
, ProtocolVersion 1
, ProtocolVersion 0
]
newtype ProtoAssociatedFile = ProtoAssociatedFile AssociatedFile
deriving (Show)
@ -250,9 +261,9 @@ data NetF c
-- ^ Sends exactly Len bytes of data. (Any more or less will
-- confuse the receiver.)
| ReceiveBytes Len MeterUpdate (L.ByteString -> c)
-- ^ Lazily reads bytes from peer. Stops once Len are read,
-- or if connection is lost, and in either case returns the bytes
-- that were read. This allows resuming interrupted transfers.
-- ^ Streams bytes from peer. Stops once Len are read,
-- or if connection is lost. This allows resuming
-- interrupted transfers.
| CheckAuthToken UUID AuthToken (Bool -> c)
| RelayService Service c
-- ^ Runs a service, relays its output to the peer, and data
@ -308,6 +319,10 @@ data LocalF c
-- content been transferred.
| StoreContentTo FilePath (Maybe IncrementalVerifier) Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c)
-- ^ Like StoreContent, but stores the content to a temp file.
| SendContentWith (L.ByteString -> Annex (Maybe Validity -> Annex Bool)) (Proto L.ByteString) (Proto (Maybe Validity)) (Bool -> c)
-- ^ Reads content from the Proto L.ByteString and sends it to the
-- callback. The callback must consume the whole lazy ByteString,
-- before it returns a validity checker.
| SetPresent Key UUID c
| CheckContentPresent Key (Bool -> c)
-- ^ Checks if the whole content of the key is locally present.
@ -362,7 +377,7 @@ negotiateProtocolVersion preferredversion = do
case r of
Just (VERSION v) -> net $ setProtocolVersion v
-- Old server doesn't know about the VERSION command.
Just (ERROR _) -> return ()
Just (ERROR _) -> net $ setProtocolVersion (ProtocolVersion 0)
_ -> net $ sendMessage (ERROR "expected VERSION")
sendBypass :: Bypass -> Proto ()
@ -414,6 +429,26 @@ remove proof key =
net $ sendMessage (REMOVE key)
checkSuccessFailurePlus
getTimestamp :: Proto (Either String MonotonicTimestamp)
getTimestamp = do
net $ sendMessage GETTIMESTAMP
net receiveMessage >>= \case
Just (TIMESTAMP ts) -> return (Right ts)
Just (ERROR err) -> return (Left err)
_ -> do
net $ sendMessage (ERROR "expected TIMESTAMP")
return (Left "protocol error")
removeBefore :: POSIXTime -> Key -> Proto (Either String Bool, Maybe [UUID])
removeBefore endtime key = getTimestamp >>= \case
Right remotetime ->
canRemoveBefore endtime remotetime (local getLocalCurrentTime) >>= \case
Just remoteendtime ->
removeBeforeRemoteEndTime remoteendtime key
Nothing ->
return (Right False, Nothing)
Left err -> return (Left err, Nothing)
{- The endtime is the last local time at which the key can be removed.
- To tell the remote how long it has to remove the key, get its current
- timestamp, and add to it the number of seconds from the current local
@ -424,25 +459,21 @@ remove proof key =
- response from the remote, that is reflected in the local time, and so
- reduces the allowed time.
-}
removeBefore :: POSIXTime -> Key -> Proto (Either String Bool, Maybe [UUID])
removeBefore endtime key = do
net $ sendMessage GETTIMESTAMP
net receiveMessage >>= \case
Just (TIMESTAMP remotetime) -> do
localtime <- local getLocalCurrentTime
let timeleft = endtime - localtime
let timeleft' = MonotonicTimestamp (floor timeleft)
let remoteendtime = remotetime + timeleft'
if timeleft <= 0
then return (Right False, Nothing)
else do
net $ sendMessage $
REMOVE_BEFORE remoteendtime key
checkSuccessFailurePlus
Just (ERROR err) -> return (Left err, Nothing)
_ -> do
net $ sendMessage (ERROR "expected TIMESTAMP")
return (Right False, Nothing)
canRemoveBefore :: Monad m => POSIXTime -> MonotonicTimestamp -> m POSIXTime -> m (Maybe MonotonicTimestamp)
canRemoveBefore endtime remotetime getlocaltime = do
localtime <- getlocaltime
let timeleft = endtime - localtime
let timeleft' = MonotonicTimestamp (floor timeleft)
let remoteendtime = remotetime + timeleft'
return $ if timeleft <= 0
then Nothing
else Just remoteendtime
removeBeforeRemoteEndTime :: MonotonicTimestamp -> Key -> Proto (Either String Bool, Maybe [UUID])
removeBeforeRemoteEndTime remoteendtime key = do
net $ sendMessage $
REMOVE_BEFORE remoteendtime key
checkSuccessFailurePlus
get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
get dest key iv af m p =
@ -453,17 +484,39 @@ get dest key iv af m p =
storer = storeContentTo dest iv
put :: Key -> AssociatedFile -> MeterUpdate -> Proto (Maybe [UUID])
put key af p = do
put key af p = put' key af $ \offset ->
sendContent key af Nothing offset p
put' :: Key -> AssociatedFile -> (Offset -> Proto (Maybe [UUID])) -> Proto (Maybe [UUID])
put' key af sender = do
net $ sendMessage (PUT (ProtoAssociatedFile af) key)
r <- net receiveMessage
case r of
Just (PUT_FROM offset) -> sendContent key af Nothing offset p
Just (PUT_FROM offset) -> sender offset
Just ALREADY_HAVE -> return (Just [])
Just (ALREADY_HAVE_PLUS uuids) -> return (Just uuids)
_ -> do
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
return Nothing
-- The protocol does not have a way to get the PUT offset
-- without sending DATA, so send an empty bytestring and indicate
-- it is not valid.
getPutOffset :: Key -> AssociatedFile -> Proto (Either [UUID] Offset)
getPutOffset key af = do
net $ sendMessage (PUT (ProtoAssociatedFile af) key)
r <- net receiveMessage
case r of
Just (PUT_FROM offset) -> do
void $ sendContent' nullMeterUpdate (Len 0) L.empty $
return Invalid
return (Right offset)
Just ALREADY_HAVE -> return (Left [])
Just (ALREADY_HAVE_PLUS uuids) -> return (Left uuids)
_ -> do
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
return (Left [])
data ServerHandler a
= ServerGot a
| ServerContinue
@ -471,7 +524,14 @@ data ServerHandler a
-- Server loop, getting messages from the client and handling them
serverLoop :: (Message -> Proto (ServerHandler a)) -> Proto (Maybe a)
serverLoop a = do
serverLoop a = serveOneMessage a serverLoop
-- Get one message from the client and handle it.
serveOneMessage
:: (Message -> Proto (ServerHandler a))
-> ((Message -> Proto (ServerHandler a)) -> Proto (Maybe a))
-> Proto (Maybe a)
serveOneMessage a cont = do
mcmd <- net receiveMessage
case mcmd of
-- When the client sends ERROR to the server, the server
@ -479,16 +539,16 @@ serverLoop a = do
-- is in, and so not possible to recover.
Just (ERROR _) -> return Nothing
-- When the client sends an unparsable message, the server
-- responds with an error message, and loops. This allows
-- responds with an error message, and continues. This allows
-- expanding the protocol with new messages.
Nothing -> do
net $ sendMessage (ERROR "unknown command")
serverLoop a
cont a
Just cmd -> do
v <- a cmd
case v of
ServerGot r -> return (Just r)
ServerContinue -> serverLoop a
ServerContinue -> cont a
-- If the client sends an unexpected message,
-- the server will respond with ERROR, and
-- always continues processing messages.
@ -500,7 +560,7 @@ serverLoop a = do
-- support some new feature, and fall back.
ServerUnexpected -> do
net $ sendMessage (ERROR "unexpected command")
serverLoop a
cont a
-- | Serve the protocol, with an unauthenticated peer. Once the peer
-- successfully authenticates, returns their UUID.
@ -525,11 +585,22 @@ data ServerMode
-- ^ Allow reading, and storing new objects, but not deleting objects.
| ServeReadWrite
-- ^ Full read and write access.
deriving (Eq, Ord)
deriving (Show, Eq, Ord)
-- | Serve the protocol, with a peer that has authenticated.
serveAuthed :: ServerMode -> UUID -> Proto ()
serveAuthed servermode myuuid = void $ serverLoop handler
serveAuthed servermode myuuid = void $ serverLoop $
serverHandler servermode myuuid
-- | Serve a single command in the protocol, the same as serveAuthed,
-- but without looping to handle the next command.
serveOneCommandAuthed :: ServerMode -> UUID -> Proto ()
serveOneCommandAuthed servermode myuuid = fromMaybe () <$>
serveOneMessage (serverHandler servermode myuuid)
(const $ pure Nothing)
serverHandler :: ServerMode -> UUID -> Message -> Proto (ServerHandler ())
serverHandler servermode myuuid = handler
where
handler (VERSION theirversion) = do
let v = min theirversion maxProtocolVersion
@ -650,16 +721,21 @@ sendContent key af o offset@(Offset n) p = go =<< local (contentSize key)
else local $ readContent key af o offset $
sender (Len len)
-- Content not available to send. Indicate this by sending
-- empty data and indlicate it's invalid.
-- empty data and indicate it's invalid.
go Nothing = sender (Len 0) L.empty (return Invalid)
sender len content validitycheck = do
let p' = offsetMeterUpdate p (toBytesProcessed n)
net $ sendMessage (DATA len)
net $ sendBytes len content p'
ver <- net getProtocolVersion
when (ver >= ProtocolVersion 1) $
net . sendMessage . VALIDITY =<< validitycheck
checkSuccessPlus
sender = sendContent' p'
p' = offsetMeterUpdate p (toBytesProcessed n)
sendContent' :: MeterUpdate -> Len -> L.ByteString -> Proto Validity -> Proto (Maybe [UUID])
sendContent' p len content validitycheck = do
net $ sendMessage (DATA len)
net $ sendBytes len content p
ver <- net getProtocolVersion
when (ver >= ProtocolVersion 1) $
net . sendMessage . VALIDITY =<< validitycheck
checkSuccessPlus
receiveContent
:: Observable t

View file

@ -45,6 +45,9 @@ data RemoteSide = RemoteSide
, remoteSideId :: RemoteSideId
}
instance Show RemoteSide where
show rs = show (remote rs)
mkRemoteSide :: Remote -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide
mkRemoteSide r remoteconnect = RemoteSide
<$> pure r
@ -76,7 +79,6 @@ closeRemoteSide remoteside =
data ProxySelector = ProxySelector
{ proxyCHECKPRESENT :: Key -> Annex (Maybe RemoteSide)
, proxyLOCKCONTENT :: Key -> Annex (Maybe RemoteSide)
, proxyUNLOCKCONTENT :: Annex (Maybe RemoteSide)
, proxyREMOVE :: Key -> Annex [RemoteSide]
-- ^ remove from all of these remotes
, proxyGETTIMESTAMP :: Annex [RemoteSide]
@ -91,7 +93,6 @@ singleProxySelector :: RemoteSide -> ProxySelector
singleProxySelector r = ProxySelector
{ proxyCHECKPRESENT = const (pure (Just r))
, proxyLOCKCONTENT = const (pure (Just r))
, proxyUNLOCKCONTENT = pure (Just r)
, proxyREMOVE = const (pure [r])
, proxyGETTIMESTAMP = pure [r]
, proxyGET = const (pure (Just r))
@ -200,86 +201,88 @@ mkProxyState = ProxyState
<$> newTVarIO mempty
<*> newTVarIO Nothing
data ProxyParams = ProxyParams
{ proxyMethods :: ProxyMethods
, proxyState :: ProxyState
, proxyServerMode :: ServerMode
, proxyClientSide :: ClientSide
, proxyUUID :: UUID
, proxySelector :: ProxySelector
, proxyConcurrencyConfig :: ConcurrencyConfig
, proxyClientProtocolVersion :: ProtocolVersion
-- ^ The remote(s) may speak an earlier version, or the same
-- version, but not a later version.
}
{- Proxy between the client and the remote. This picks up after
- sendClientProtocolVersion.
-}
proxy
:: Annex r
-> ProxyMethods
-> ProxyState
-> ServerMode
-> ClientSide
-> UUID
-> ProxySelector
-> ConcurrencyConfig
-> ProtocolVersion
-- ^ Protocol version being spoken between the proxy and the
-- client. When there are multiple remotes, some may speak an
-- earlier version.
-> ProxyParams
-> Maybe Message
-- ^ non-VERSION message that was received from the client when
-- negotiating protocol version, and has not been responded to yet
-> ProtoErrorHandled r
proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clientconn) remoteuuid proxyselector concurrencyconfig (ProtocolVersion protocolversion) othermsg protoerrhandler = do
proxy proxydone proxyparams othermsg protoerrhandler = do
case othermsg of
Nothing -> proxynextclientmessage ()
Just message -> proxyclientmessage (Just message)
where
client = liftIO . runNetProto clientrunst clientconn
proxyclientmessage Nothing = proxydone
proxyclientmessage (Just message) = proxyRequest
proxydone proxyparams proxynextclientmessage
message protoerrhandler
proxynextclientmessage () = protoerrhandler proxyclientmessage $
client (net receiveMessage)
client = liftIO . runNetProto clientrunst clientconn
ClientSide clientrunst clientconn = proxyClientSide proxyparams
servermodechecker c a = c servermode $ \case
Nothing -> a
Just notallowed ->
protoerrhandler proxynextclientmessage $
client notallowed
proxyclientmessage Nothing = proxydone
proxyclientmessage (Just message) = case message of
CHECKPRESENT k -> proxyCHECKPRESENT proxyselector k >>= \case
{- Handles proxying a single request between the client and remote. -}
proxyRequest
:: Annex r
-> ProxyParams
-> (() -> Annex r) -- ^ called once the request has been handled
-> Message
-> ProtoErrorHandled r
proxyRequest proxydone proxyparams requestcomplete requestmessage protoerrhandler =
case requestmessage of
CHECKPRESENT k -> proxyCHECKPRESENT (proxySelector proxyparams) k >>= \case
Just remoteside ->
proxyresponse remoteside message
(const proxynextclientmessage)
proxyresponse remoteside requestmessage
(const requestcomplete)
Nothing ->
protoerrhandler proxynextclientmessage $
protoerrhandler requestcomplete $
client $ net $ sendMessage FAILURE
LOCKCONTENT k -> proxyLOCKCONTENT proxyselector k >>= \case
LOCKCONTENT k -> proxyLOCKCONTENT (proxySelector proxyparams) k >>= \case
Just remoteside ->
proxyresponse remoteside message
(const proxynextclientmessage)
handleLOCKCONTENT remoteside requestmessage
Nothing ->
protoerrhandler proxynextclientmessage $
protoerrhandler requestcomplete $
client $ net $ sendMessage FAILURE
UNLOCKCONTENT -> proxyUNLOCKCONTENT proxyselector >>= \case
Just remoteside ->
proxynoresponse remoteside message
proxynextclientmessage
Nothing -> proxynextclientmessage ()
REMOVE k -> do
remotesides <- proxyREMOVE proxyselector k
remotesides <- proxyREMOVE (proxySelector proxyparams) k
servermodechecker checkREMOVEServerMode $
handleREMOVE remotesides k message
handleREMOVE remotesides k requestmessage
REMOVE_BEFORE _ k -> do
remotesides <- proxyREMOVE proxyselector k
remotesides <- proxyREMOVE (proxySelector proxyparams) k
servermodechecker checkREMOVEServerMode $
handleREMOVE remotesides k message
handleREMOVE remotesides k requestmessage
GETTIMESTAMP -> do
remotesides <- proxyGETTIMESTAMP proxyselector
remotesides <- proxyGETTIMESTAMP (proxySelector proxyparams)
handleGETTIMESTAMP remotesides
GET _ _ k -> proxyGET proxyselector k >>= \case
Just remoteside -> handleGET remoteside message
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
ERROR "content not present"
GET _ _ k -> proxyGET (proxySelector proxyparams) k >>= \case
Just remoteside -> handleGET remoteside requestmessage
Nothing -> handleGETNoRemoteSide
PUT paf k -> do
af <- getassociatedfile paf
remotesides <- proxyPUT proxyselector af k
remotesides <- proxyPUT (proxySelector proxyparams) af k
servermodechecker checkPUTServerMode $
handlePUT remotesides k message
BYPASS _ -> proxynextclientmessage ()
handlePUT remotesides k requestmessage
BYPASS _ -> requestcomplete ()
-- These messages involve the git repository, not the
-- annex. So they affect the git repository of the proxy,
-- not the remote.
@ -298,6 +301,7 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
FAILURE_PLUS _ -> protoerr
DATA _ -> protoerr
VALIDITY _ -> protoerr
UNLOCKCONTENT -> protoerr
-- If the client errors out, give up.
ERROR msg -> giveup $ "client error: " ++ msg
-- Messages that only the server should send.
@ -312,6 +316,16 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
-- Early messages that the client should not send now.
AUTH _ _ -> protoerr
VERSION _ -> protoerr
where
client = liftIO . runNetProto clientrunst clientconn
ClientSide clientrunst clientconn = proxyClientSide proxyparams
servermodechecker c a = c (proxyServerMode proxyparams) $ \case
Nothing -> a
Just notallowed ->
protoerrhandler requestcomplete $
client notallowed
-- Send a message to the remote, send its response back to the
-- client, and pass it to the continuation.
@ -320,11 +334,6 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
protoerrhandler (a resp) $
client $ net $ sendMessage resp
-- Send a message to the remote, that it will not respond to.
proxynoresponse remoteside message a =
protoerrhandler a $
runRemoteSide remoteside $ net $ sendMessage message
-- Send a message to the endpoint and get back its response.
getresponse endpoint message handleresp =
protoerrhandler (withresp handleresp) $
@ -340,26 +349,33 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
-- Read a message from one party, send it to the other,
-- and then pass the message to the continuation.
relayonemessage from to cont =
flip protoerrhandler (from $ net $ receiveMessage) $
flip protoerrhandler (from $ net receiveMessage) $
withresp $ \message ->
protoerrhandler (cont message) $
to $ net $ sendMessage message
protoerr = do
_ <- client $ net $ sendMessage (ERROR "protocol error X")
giveup "protocol error M"
_ <- client $ net $ sendMessage (ERROR "protocol error")
giveup "protocol error"
handleLOCKCONTENT remoteside msg =
proxyresponse remoteside msg $ \r () -> case r of
SUCCESS -> relayonemessage client
(runRemoteSide remoteside)
(const requestcomplete)
FAILURE -> requestcomplete ()
_ -> requestcomplete ()
-- When there is a single remote, reply with its timestamp,
-- to avoid needing timestamp translation.
handleGETTIMESTAMP (remoteside:[]) = do
liftIO $ hPutStrLn stderr "!!!! single remote side"
liftIO $ atomically $ do
writeTVar (proxyRemoteLatestTimestamps proxystate)
writeTVar (proxyRemoteLatestTimestamps (proxyState proxyparams))
mempty
writeTVar (proxyRemoteLatestLocalTimestamp proxystate)
writeTVar (proxyRemoteLatestLocalTimestamp (proxyState proxyparams))
Nothing
proxyresponse remoteside GETTIMESTAMP
(const proxynextclientmessage)
(const requestcomplete)
-- When there are multiple remotes, reply with our local timestamp,
-- and do timestamp translation when sending REMOVE-FROM.
handleGETTIMESTAMP remotesides = do
@ -371,14 +387,14 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
remotetimes <- (M.fromList . mapMaybe join) <$> getremotetimes
localtime <- liftIO currentMonotonicTimestamp
liftIO $ atomically $ do
writeTVar (proxyRemoteLatestTimestamps proxystate)
writeTVar (proxyRemoteLatestTimestamps (proxyState proxyparams))
remotetimes
writeTVar (proxyRemoteLatestLocalTimestamp proxystate)
writeTVar (proxyRemoteLatestLocalTimestamp (proxyState proxyparams))
(Just localtime)
protoerrhandler proxynextclientmessage $
protoerrhandler requestcomplete $
client $ net $ sendMessage (TIMESTAMP localtime)
where
getremotetimes = forMC concurrencyconfig remotesides $ \r ->
getremotetimes = forMC (proxyConcurrencyConfig proxyparams) remotesides $ \r ->
runRemoteSideOrSkipFailed r $ do
net $ sendMessage GETTIMESTAMP
net receiveMessage >>= return . \case
@ -395,14 +411,14 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
handleREMOVE [] _ _ =
-- When no places are provided to remove from,
-- don't report a successful remote.
protoerrhandler proxynextclientmessage $
protoerrhandler requestcomplete $
client $ net $ sendMessage FAILURE
handleREMOVE remotesides k message = do
tsm <- liftIO $ readTVarIO $
proxyRemoteLatestTimestamps proxystate
proxyRemoteLatestTimestamps (proxyState proxyparams)
oldlocaltime <- liftIO $ readTVarIO $
proxyRemoteLatestLocalTimestamp proxystate
v <- forMC concurrencyconfig remotesides $ \r ->
proxyRemoteLatestLocalTimestamp (proxyState proxyparams)
v <- forMC (proxyConcurrencyConfig proxyparams) remotesides $ \r ->
runRemoteSideOrSkipFailed r $ do
case message of
REMOVE_BEFORE ts _ -> do
@ -427,11 +443,11 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
_ -> Nothing
let v' = map join v
let us = concatMap snd $ catMaybes v'
mapM_ (\u -> removedContent proxymethods u k) us
protoerrhandler proxynextclientmessage $
mapM_ (\u -> removedContent (proxyMethods proxyparams) u k) us
protoerrhandler requestcomplete $
client $ net $ sendMessage $
let nonplussed = all (== remoteuuid) us
|| protocolversion < 2
let nonplussed = all (== proxyUUID proxyparams) us
|| proxyClientProtocolVersion proxyparams < ProtocolVersion 2
in if all (maybe False (fst . fst)) v'
then if nonplussed
then SUCCESS
@ -441,19 +457,28 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
[] -> FAILURE
(err:_) -> ERROR err
else FAILURE_PLUS us
-- Send an empty DATA and indicate it was invalid.
handleGETNoRemoteSide = protoerrhandler requestcomplete $
client $ net $ do
sendMessage $ DATA (Len 0)
sendBytes (Len 0) mempty nullMeterUpdate
when (proxyClientProtocolVersion proxyparams /= ProtocolVersion 0) $
sendMessage (VALIDITY Invalid)
void $ receiveMessage
handleGET remoteside message = getresponse (runRemoteSide remoteside) message $
withDATA (relayGET remoteside) $ \case
ERROR err -> protoerrhandler proxynextclientmessage $
ERROR err -> protoerrhandler requestcomplete $
client $ net $ sendMessage (ERROR err)
_ -> protoerr
handlePUT (remoteside:[]) k message
| Remote.uuid (remote remoteside) == remoteuuid =
| Remote.uuid (remote remoteside) == proxyUUID proxyparams =
getresponse (runRemoteSide remoteside) message $ \resp -> case resp of
ALREADY_HAVE -> protoerrhandler proxynextclientmessage $
ALREADY_HAVE -> protoerrhandler requestcomplete $
client $ net $ sendMessage resp
ALREADY_HAVE_PLUS _ -> protoerrhandler proxynextclientmessage $
ALREADY_HAVE_PLUS _ -> protoerrhandler requestcomplete $
client $ net $ sendMessage resp
PUT_FROM _ ->
getresponse client resp $
@ -462,7 +487,7 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
(const protoerr)
_ -> protoerr
handlePUT [] _ _ =
protoerrhandler proxynextclientmessage $
protoerrhandler requestcomplete $
client $ net $ sendMessage ALREADY_HAVE
handlePUT remotesides k message =
handlePutMulti remotesides k message
@ -474,8 +499,8 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
relayDATACore len (runRemoteSide remoteside) client $
relayDATAFinish (runRemoteSide remoteside) client $
relayonemessage client (runRemoteSide remoteside) $
const proxynextclientmessage
const requestcomplete
relayPUT remoteside k len = relayDATAStart (runRemoteSide remoteside) $
relayDATACore len client (runRemoteSide remoteside) $
relayDATAFinish client (runRemoteSide remoteside) $
@ -483,15 +508,15 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
where
finished resp () = do
void $ relayPUTRecord k remoteside resp
proxynextclientmessage ()
requestcomplete ()
relayPUTRecord k remoteside SUCCESS = do
addedContent proxymethods (Remote.uuid (remote remoteside)) k
addedContent (proxyMethods proxyparams) (Remote.uuid (remote remoteside)) k
return $ Just [Remote.uuid (remote remoteside)]
relayPUTRecord k remoteside (SUCCESS_PLUS us) = do
let us' = (Remote.uuid (remote remoteside)) : us
forM_ us' $ \u ->
addedContent proxymethods u k
addedContent (proxyMethods proxyparams) u k
return $ Just us'
relayPUTRecord _ _ _ =
return Nothing
@ -513,14 +538,14 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
let alreadyhave = \case
Right (Left _) -> True
_ -> False
l <- forMC concurrencyconfig remotesides initiate
l <- forMC (proxyConcurrencyConfig proxyparams) remotesides initiate
if all alreadyhave l
then if protocolversion < 2
then protoerrhandler proxynextclientmessage $
then if proxyClientProtocolVersion proxyparams < ProtocolVersion 2
then protoerrhandler requestcomplete $
client $ net $ sendMessage ALREADY_HAVE
else protoerrhandler proxynextclientmessage $
else protoerrhandler requestcomplete $
client $ net $ sendMessage $ ALREADY_HAVE_PLUS $
filter (/= remoteuuid) $
filter (/= proxyUUID proxyparams) $
map (Remote.uuid . remote) (lefts (rights l))
else if null (rights l)
-- no response from any remote
@ -533,10 +558,9 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
(const protoerr)
relayPUTMulti minoffset remotes k (Len datalen) _ = do
let totallen = datalen + minoffset
-- Tell each remote how much data to expect, depending
-- on the remote's offset.
rs <- forMC concurrencyconfig remotes $ \r@(remoteside, remoteoffset) ->
rs <- forMC (proxyConcurrencyConfig proxyparams) remotes $ \r@(remoteside, remoteoffset) ->
runRemoteSideOrSkipFailed remoteside $ do
net $ sendMessage $ DATA $ Len $
totallen - remoteoffset
@ -544,6 +568,8 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
protoerrhandler (send (catMaybes rs) minoffset) $
client $ net $ receiveBytes (Len datalen) nullMeterUpdate
where
totallen = datalen + minoffset
chunksize = fromIntegral defaultChunkSize
-- Stream the lazy bytestring out to the remotes in chunks.
@ -553,7 +579,7 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
let (chunk, b') = L.splitAt chunksize b
let chunklen = fromIntegral (L.length chunk)
let !n' = n + chunklen
rs' <- forMC concurrencyconfig rs $ \r@(remoteside, remoteoffset) ->
rs' <- forMC (proxyConcurrencyConfig proxyparams) rs $ \r@(remoteside, remoteoffset) ->
if n >= remoteoffset
then runRemoteSideOrSkipFailed remoteside $ do
net $ sendBytes (Len chunklen) chunk nullMeterUpdate
@ -568,13 +594,21 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
return r
else return (Just r)
if L.null b'
then sent (catMaybes rs')
then do
-- If we didn't receive as much
-- data as expected, close
-- connections to all the remotes,
-- because they are still waiting
-- on the rest of the data.
when (n' /= totallen) $
mapM_ (closeRemoteSide . fst) rs
sent (catMaybes rs')
else send (catMaybes rs') n' b'
sent [] = proxydone
sent rs = relayDATAFinishMulti k (map fst rs)
runRemoteSideOrSkipFailed remoteside a =
runRemoteSideOrSkipFailed remoteside a =
runRemoteSide remoteside a >>= \case
Right v -> return (Just v)
Left _ -> do
@ -594,16 +628,16 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
y $ net $ sendBytes len b nullMeterUpdate
relayDATAFinish x y sendsuccessfailure ()
| protocolversion == 0 = sendsuccessfailure
| proxyClientProtocolVersion proxyparams == ProtocolVersion 0 = sendsuccessfailure
-- Protocol version 1 has a VALID or
-- INVALID message after the data.
| otherwise = relayonemessage x y (\_ () -> sendsuccessfailure)
relayDATAFinishMulti k rs
| protocolversion == 0 =
| proxyClientProtocolVersion proxyparams == ProtocolVersion 0 =
finish $ net receiveMessage
| otherwise =
flip protoerrhandler (client $ net $ receiveMessage) $
flip protoerrhandler (client $ net receiveMessage) $
withresp $ \message ->
finish $ do
-- Relay VALID or INVALID message
@ -615,17 +649,17 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
net receiveMessage
where
finish a = do
storeduuids <- forMC concurrencyconfig rs $ \r ->
storeduuids <- forMC (proxyConcurrencyConfig proxyparams) rs $ \r ->
runRemoteSideOrSkipFailed r a >>= \case
Just (Just resp) ->
relayPUTRecord k r resp
_ -> return Nothing
protoerrhandler proxynextclientmessage $
protoerrhandler requestcomplete $
client $ net $ sendMessage $
case concat (catMaybes storeduuids) of
[] -> FAILURE
us
| protocolversion < 2 -> SUCCESS
| proxyClientProtocolVersion proxyparams < ProtocolVersion 2 -> SUCCESS
| otherwise -> SUCCESS_PLUS us
-- The associated file received from the P2P protocol
@ -640,10 +674,13 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
data ConcurrencyConfig = ConcurrencyConfig Int (MSem.MSem Int)
noConcurrencyConfig :: Annex ConcurrencyConfig
noConcurrencyConfig = liftIO $ ConcurrencyConfig 1 <$> MSem.new 1
noConcurrencyConfig = mkConcurrencyConfig 1
getConcurrencyConfig :: Annex ConcurrencyConfig
getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case
mkConcurrencyConfig :: Int -> Annex ConcurrencyConfig
mkConcurrencyConfig n = liftIO $ ConcurrencyConfig n <$> MSem.new n
concurrencyConfigJobs :: Annex ConcurrencyConfig
concurrencyConfigJobs = (annexJobs <$> Annex.getGitConfig) >>= \case
NonConcurrent -> noConcurrencyConfig
Concurrent n -> go n
ConcurrentPerCpu -> go =<< liftIO getNumProcessors
@ -653,8 +690,7 @@ getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case
when (n > c) $
liftIO $ setNumCapabilities n
setConcurrency (ConcurrencyGitConfig (Concurrent n))
msem <- liftIO $ MSem.new n
return (ConcurrencyConfig n msem)
mkConcurrencyConfig n
forMC :: ConcurrencyConfig -> [a] -> (a -> Annex b) -> Annex [b]
forMC _ (x:[]) a = do

View file

@ -57,7 +57,10 @@ import qualified Remote.GCrypt
import qualified Remote.GitLFS
import qualified Remote.P2P
import qualified Remote.Helper.P2P as P2PHelper
import qualified P2P.Protocol as P2P
import P2P.Address
import P2P.Http.Url
import P2P.Http.Client
import Annex.Path
import Creds
import Types.NumCopies
@ -103,14 +106,20 @@ list autoinit = do
proxied <- listProxied proxies rs'
return (proxied++rs')
where
annexurl r = remoteConfig r "annexurl"
tweakurl c r = do
let n = fromJust $ Git.remoteName r
case M.lookup (annexurl r) c of
Nothing -> return r
Just url -> inRepo $ \g ->
Git.Construct.remoteNamed n $
Git.Construct.fromRemoteLocation (Git.fromConfigValue url) False g
case getAnnexUrl r c of
Just url | not (isP2PHttpProtocolUrl url) ->
inRepo $ \g -> Git.Construct.remoteNamed n $
Git.Construct.fromRemoteLocation url
False g
_ -> return r
getAnnexUrl :: Git.Repo -> M.Map Git.ConfigKey Git.ConfigValue -> Maybe String
getAnnexUrl r c = Git.fromConfigValue <$> M.lookup (annexUrlConfigKey r) c
annexUrlConfigKey :: Git.Repo -> Git.ConfigKey
annexUrlConfigKey r = remoteConfig r "annexurl"
isGitRemoteAnnex :: Git.Repo -> Bool
isGitRemoteAnnex r = "annex::" `isPrefixOf` Git.repoLocation r
@ -160,8 +169,9 @@ enableRemote Nothing _ = giveup "unable to enable git remote with no specified u
- done each time git-annex is run in a way that uses remotes, unless
- annex-checkuuid is false.
-
- Conversely, the config of an URL remote is only read when there is no
- cached UUID value. -}
- The config of other URL remotes is only read when there is no
- cached UUID value.
-}
configRead :: Bool -> Git.Repo -> Annex Git.Repo
configRead autoinit r = do
gc <- Annex.getRemoteGitConfig r
@ -177,7 +187,6 @@ configRead autoinit r = do
Just r' -> return r'
_ -> return r
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs
-- Remote.GitLFS may be used with a repo that is also encrypted
@ -236,7 +245,7 @@ defaultRepoCost r
| otherwise = expensiveRemoteCost
unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
unavailable r = gen r'
unavailable r u c gc = gen r' u c gc'
where
r' = case Git.location r of
Git.Local { Git.gitdir = d } ->
@ -247,6 +256,10 @@ unavailable r = gen r'
in r { Git.location = Git.Url (url { uriAuthority = Just auth' })}
Nothing -> r { Git.location = Git.Unknown }
_ -> r -- already unavailable
gc' = gc
{ remoteAnnexP2PHttpUrl =
unavailableP2PHttpUrl <$> remoteAnnexP2PHttpUrl gc
}
{- Tries to read the config for a specified remote, updates state, and
- returns the updated repo. -}
@ -308,6 +321,12 @@ tryGitConfigRead autoinit r hasuuid
-- optimisation.
unless (fromMaybe False $ Git.Config.isBare r') $
setremote setRemoteBare False
-- When annex.url is set to a P2P http url,
-- store in remote.name.annexUrl
case Git.fromConfigValue <$> Git.Config.getMaybe (annexConfig "url") r' of
Just u | isP2PHttpProtocolUrl u ->
setremote (setConfig . annexUrlConfigKey) u
_ -> noop
return r'
Left err -> do
set_ignore "not usable by git-annex" False
@ -405,10 +424,12 @@ inAnnex rmt st key = do
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
inAnnex' repo rmt st@(State connpool duc _ _ _) key
| isP2PHttp rmt = checkp2phttp
| Git.repoIsHttp repo = checkhttp
| Git.repoIsUrl repo = checkremote
| otherwise = checklocal
where
checkp2phttp = p2pHttpClient rmt giveup (clientCheckPresent key)
checkhttp = do
gc <- Annex.getGitConfig
Url.withUrlOptionsPromptingCreds $ \uo ->
@ -445,15 +466,24 @@ dropKey r st proof key = do
dropKey' :: Git.Repo -> Remote -> State -> Maybe SafeDropProof -> Key -> Annex ()
dropKey' repo r st@(State connpool duc _ _ _) proof key
| isP2PHttp r =
clientRemoveWithProof proof key unabletoremove r >>= \case
RemoveResultPlus True fanoutuuids ->
storefanout fanoutuuids
RemoveResultPlus False fanoutuuids -> do
storefanout fanoutuuids
unabletoremove
| not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (giveup "cannot access remote") removelocal
, giveup "remote does not have expected annex.uuid value"
)
| Git.repoIsHttp repo = giveup "dropping from http remote not supported"
| Git.repoIsHttp repo = giveup "dropping from this remote is not supported"
| otherwise = P2PHelper.remove (uuid r) p2prunner proof key
where
p2prunner = Ssh.runProto r connpool (return (Right False, Nothing))
unabletoremove = giveup "removing content from remote failed"
-- It could take a long time to eg, automount a drive containing
-- the repo, so check the proof for expiry again after locking the
-- content for removal.
@ -475,14 +505,24 @@ dropKey' repo r st@(State connpool duc _ _ _) proof key
)
unless proofunexpired
safeDropProofExpired
storefanout = P2PHelper.storeFanout key InfoMissing (uuid r) . map fromB64UUID
lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey r st key callback = do
lockKey r st key callback = do
repo <- getRepo r
lockKey' repo r st key callback
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey' repo r st@(State connpool duc _ _ _) key callback
| isP2PHttp r = do
showLocking r
p2pHttpClient r giveup (clientLockContent key) >>= \case
LockResult True (Just lckid) ->
p2pHttpClient r failedlock $
clientKeepLocked lckid (uuid r)
failedlock callback
_ -> failedlock
| not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo failedlock $ do
inorigrepo <- Annex.makeRunner
@ -509,7 +549,8 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' repo r st key file dest meterupdate vc
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
| isP2PHttp r = copyp2phttp
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $
@ -519,8 +560,6 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
| not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do
u <- getUUID
hardlink <- wantHardLink
let bwlimit = remoteAnnexBwLimitDownload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
-- run copy from perspective of remote
onLocalFast st $ Annex.Content.prepSendAnnex' key Nothing >>= \case
Just (object, _sz, check) -> do
@ -529,7 +568,7 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
Nothing -> return True
copier <- mkFileCopier hardlink st
(ok, v) <- runTransfer (Transfer Download u (fromKey id key))
Nothing file Nothing stdRetry $ \p ->
Nothing af Nothing stdRetry $ \p ->
metered (Just (combineMeterUpdate p meterupdate)) key bwlimit $ \_ p' ->
copier object dest key p' checksuccess vc
if ok
@ -540,8 +579,26 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
P2PHelper.retrieve
(gitconfig r)
(Ssh.runProto r connpool (return (False, UnVerified)))
key file dest meterupdate vc
| otherwise = giveup "copying from non-ssh, non-http remote not supported"
key af dest meterupdate vc
| otherwise = giveup "copying from this remote is not supported"
where
bwlimit = remoteAnnexBwLimitDownload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
copyp2phttp = verifyKeyContentIncrementally vc key $ \iv -> do
startsz <- liftIO $ tryWhenExists $
getFileSize (toRawFilePath dest)
bracketIO (openBinaryFile dest ReadWriteMode) (hClose) $ \h -> do
metered (Just meterupdate) key bwlimit $ \_ p -> do
p' <- case startsz of
Just startsz' -> liftIO $ do
resumeVerifyFromOffset startsz' iv p h
_ -> return p
let consumer = meteredWrite' p'
(writeVerifyChunk iv h)
p2pHttpClient r giveup (clientGet key af consumer startsz) >>= \case
Valid -> return ()
Invalid -> giveup "Transfer failed"
copyFromRemoteCheap :: State -> Git.Repo -> Maybe (Key -> AssociatedFile -> FilePath -> Annex ())
#ifndef mingw32_HOST_OS
@ -568,9 +625,10 @@ copyToRemote r st key af o meterupdate = do
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> Maybe FilePath -> MeterUpdate -> Annex ()
copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
| isP2PHttp r = prepsendwith copyp2phttp
| not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (giveup "cannot access remote") $ commitOnCleanup repo r st $
copylocal =<< Annex.Content.prepSendAnnex' key o
prepsendwith copylocal
, giveup "remote does not have expected annex.uuid value"
)
| Git.repoIsSsh repo =
@ -578,18 +636,24 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
(Ssh.runProto r connpool (return Nothing))
key af o meterupdate
| otherwise = giveup "copying to non-ssh repo not supported"
| otherwise = giveup "copying to this remote is not supported"
where
copylocal Nothing = giveup "content not available"
copylocal (Just (object, sz, check)) = do
prepsendwith a = Annex.Content.prepSendAnnex' key o >>= \case
Nothing -> giveup "content not available"
Just v -> a v
bwlimit = remoteAnnexBwLimitUpload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
failedsend = giveup "failed to send content to remote"
copylocal (object, sz, check) = do
-- The check action is going to be run in
-- the remote's Annex, but it needs access to the local
-- Annex monad's state.
checkio <- Annex.withCurrentState check
u <- getUUID
hardlink <- wantHardLink
let bwlimit = remoteAnnexBwLimitUpload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
-- run copy from perspective of remote
res <- onLocalFast st $ ifM (Annex.Content.inAnnex key)
( return True
@ -605,7 +669,30 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
copier object (fromRawFilePath dest) key p' checksuccess verify
)
unless res $
giveup "failed to send content to remote"
failedsend
copyp2phttp (object, sz, check) =
let check' = check >>= \case
Just s -> do
warning (UnquotedString s)
return False
Nothing -> return True
in p2pHttpClient r (const $ pure $ PutOffsetResultPlus (Offset 0)) (clientPutOffset key) >>= \case
PutOffsetResultPlus (offset@(Offset (P2P.Offset n))) ->
metered (Just meterupdate) key bwlimit $ \_ p -> do
let p' = offsetMeterUpdate p (BytesProcessed n)
res <- p2pHttpClient r giveup $
clientPut p' key (Just offset) af object sz check'
case res of
PutResultPlus False fanoutuuids -> do
storefanout fanoutuuids
failedsend
PutResultPlus True fanoutuuids ->
storefanout fanoutuuids
PutOffsetResultAlreadyHavePlus fanoutuuids ->
storefanout fanoutuuids
storefanout = P2PHelper.storeFanout key InfoPresent (uuid r) . map fromB64UUID
fsckOnRemote :: Git.Repo -> [CommandParam] -> Annex (IO Bool)
fsckOnRemote r params
@ -865,7 +952,7 @@ listProxied proxies rs = concat <$> mapM go rs
adduuid ck = M.insert ck
[Git.ConfigValue $ fromUUID $ proxyRemoteUUID p]
addurl = M.insert (remoteConfig renamedr (remoteGitConfigKey UrlField))
addurl = M.insert (mkRemoteConfigKey renamedr (remoteGitConfigKey UrlField))
[Git.ConfigValue $ encodeBS $ Git.repoLocation r]
addproxiedby = case remoteAnnexUUID gc of
@ -893,7 +980,7 @@ listProxied proxies rs = concat <$> mapM go rs
proxieduuids = S.map proxyRemoteUUID proxied
addremoteannexfield f = M.insert
(remoteAnnexConfig renamedr (remoteGitConfigKey f))
(mkRemoteConfigKey renamedr (remoteGitConfigKey f))
inheritconfigs c = foldl' inheritconfig c proxyInheritedFields
@ -901,8 +988,8 @@ listProxied proxies rs = concat <$> mapM go rs
(Nothing, Just v) -> M.insert dest v c
_ -> c
where
src = remoteAnnexConfig r k
dest = remoteAnnexConfig renamedr k
src = mkRemoteConfigKey r k
dest = mkRemoteConfigKey renamedr k
-- When the git config has anything set for a remote,
-- avoid making a proxied remote with the same name.
@ -919,7 +1006,15 @@ listProxied proxies rs = concat <$> mapM go rs
-- Proxing is also yet supported for remotes using P2P
-- addresses.
canproxy gc r
| isP2PHttp' gc = True
| remoteAnnexGitLFS gc = False
| Git.GCrypt.isEncrypted r = False
| Git.repoIsLocal r || Git.repoIsLocalUnknown r = False
| otherwise = isNothing (repoP2PAddress r)
isP2PHttp :: Remote -> Bool
isP2PHttp = isP2PHttp' . gitconfig
isP2PHttp' :: RemoteGitConfig -> Bool
isP2PHttp' = isJust . remoteAnnexP2PHttpUrl

View file

@ -42,16 +42,17 @@ store remoteuuid gc runner k af o p = do
let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc
metered (Just p) sizer bwlimit $ \_ p' ->
runner (P2P.put k af p') >>= \case
Just (Just fanoutuuids) -> do
-- Storing on the remote can cause it
-- to be stored on additional UUIDs,
-- so record those.
forM_ fanoutuuids $ \u ->
when (u /= remoteuuid) $
logChange k u InfoPresent
Just (Just fanoutuuids) ->
storeFanout k InfoPresent remoteuuid fanoutuuids
Just Nothing -> giveup "Transfer failed"
Nothing -> remoteUnavail
storeFanout :: Key -> LogStatus -> UUID -> [UUID] -> Annex ()
storeFanout k logstatus remoteuuid us =
forM_ us $ \u ->
when (u /= remoteuuid) $
logChange k u logstatus
retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
retrieve gc runner k af dest p verifyconfig = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
@ -64,20 +65,16 @@ retrieve gc runner k af dest p verifyconfig = do
remove :: UUID -> ProtoRunner (Either String Bool, Maybe [UUID]) -> Maybe SafeDropProof -> Key -> Annex ()
remove remoteuuid runner proof k = runner (P2P.remove proof k) >>= \case
Just (Right True, alsoremoveduuids) -> note alsoremoveduuids
Just (Right True, alsoremoveduuids) ->
storeFanout k InfoMissing remoteuuid
(fromMaybe [] alsoremoveduuids)
Just (Right False, alsoremoveduuids) -> do
note alsoremoveduuids
storeFanout k InfoMissing remoteuuid
(fromMaybe [] alsoremoveduuids)
giveup "removing content from remote failed"
Just (Left err, _) -> do
giveup (safeOutput err)
Nothing -> remoteUnavail
where
-- The remote reports removal from other UUIDs than its own,
-- so record those.
note alsoremoveduuids =
forM_ (fromMaybe [] alsoremoveduuids) $ \u ->
when (u /= remoteuuid) $
logChange k u InfoMissing
checkpresent :: ProtoRunner (Either String Bool) -> Key -> Annex Bool
checkpresent runner k =

View file

@ -44,12 +44,13 @@ toRepo cs r gc remotecmd = do
git_annex_shell :: ConsumeStdin -> Git.Repo -> String -> [CommandParam] -> [(Field, String)] -> Annex (Maybe (FilePath, [CommandParam]))
git_annex_shell cs r command params fields
| not $ Git.repoIsUrl r = do
shellopts <- getshellopts
dir <- liftIO $ absPath (Git.repoPath r)
shellopts <- getshellopts dir
return $ Just (shellcmd, shellopts ++ fieldopts)
| Git.repoIsSsh r = do
gc <- Annex.getRemoteGitConfig r
u <- getRepoUUID r
shellopts <- getshellopts
shellopts <- getshellopts (Git.repoPath r)
let sshcmd = unwords $
fromMaybe shellcmd (remoteAnnexShell gc)
: map shellEscape (toCommand shellopts) ++
@ -58,9 +59,8 @@ git_annex_shell cs r command params fields
Just <$> toRepo cs r gc sshcmd
| otherwise = return Nothing
where
dir = Git.repoPath r
shellcmd = "git-annex-shell"
getshellopts = do
getshellopts dir = do
debugenabled <- Annex.getRead Annex.debugenabled
debugselector <- Annex.getRead Annex.debugselector
let params' = case (debugenabled, debugselector) of

View file

@ -25,8 +25,12 @@ module Types.GitConfig (
RemoteGitConfigField(..),
remoteGitConfigKey,
proxyInheritedFields,
MkRemoteConfigKey,
mkRemoteConfigKey,
) where
import Debug.Trace
import Common
import qualified Git
import qualified Git.Config
@ -55,6 +59,7 @@ import Utility.StatelessOpenPGP (SOPCmd(..), SOPProfile(..))
import Utility.ThreadScheduler (Seconds(..))
import Utility.Url (Scheme, mkScheme)
import Network.Socket (PortNumber)
import P2P.Http.Url
import Control.Concurrent.STM
import qualified Data.Set as S
@ -104,6 +109,7 @@ data GitConfig = GitConfig
, annexSyncMigrations :: Bool
, annexDebug :: Bool
, annexDebugFilter :: Maybe String
, annexUrl :: Maybe String
, annexWebOptions :: [String]
, annexYoutubeDlOptions :: [String]
, annexYoutubeDlCommand :: Maybe String
@ -199,6 +205,7 @@ extractGitConfig configsource r = GitConfig
, annexSyncMigrations = getbool (annexConfig "syncmigrations") True
, annexDebug = getbool (annexConfig "debug") False
, annexDebugFilter = getmaybe (annexConfig "debugfilter")
, annexUrl = getmaybe (annexConfig "url")
, annexWebOptions = getwords (annexConfig "web-options")
, annexYoutubeDlOptions = getwords (annexConfig "youtube-dl-options")
, annexYoutubeDlCommand = getmaybe (annexConfig "youtube-dl-command")
@ -395,6 +402,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteAnnexClusterGateway :: [ClusterUUID]
, remoteUrl :: Maybe String
, remoteAnnexP2PHttpUrl :: Maybe P2PHttpUrl
{- These settings are specific to particular types of remotes
- including special remotes. -}
@ -487,12 +495,17 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexClusterGateway = fromMaybe [] $
(mapMaybe (mkClusterUUID . toUUID) . words)
<$> getmaybe ClusterGatewayField
, remoteUrl =
case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of
, remoteUrl = traceShow (mkRemoteConfigKey remotename (remoteGitConfigKey UrlField)) $
case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b)
| B.null b -> Nothing
| otherwise -> Just (decodeBS b)
_ -> Nothing
, remoteAnnexP2PHttpUrl =
case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey AnnexUrlField)) r of
Just (ConfigValue b) ->
parseP2PHttpUrl (decodeBS b)
_ -> Nothing
, remoteAnnexShell = getmaybe ShellField
, remoteAnnexSshOptions = getoptions SshOptionsField
, remoteAnnexRsyncOptions = getoptions RsyncOptionsField
@ -527,8 +540,8 @@ extractRemoteGitConfig r remotename = do
getmaybe' :: RemoteGitConfigField -> Maybe ConfigValue
getmaybe' f =
let k = remoteGitConfigKey f
in Git.Config.getMaybe (remoteAnnexConfig remotename k) r
<|> Git.Config.getMaybe (annexConfig k) r
in Git.Config.getMaybe (mkRemoteConfigKey remotename k) r
<|> Git.Config.getMaybe (mkAnnexConfigKey k) r
getoptions k = fromMaybe [] $ words <$> getmaybe k
data RemoteGitConfigField
@ -569,6 +582,7 @@ data RemoteGitConfigField
| ClusterNodeField
| ClusterGatewayField
| UrlField
| AnnexUrlField
| ShellField
| SshOptionsField
| RsyncOptionsField
@ -594,86 +608,89 @@ data RemoteGitConfigField
| ExternalTypeField
deriving (Enum, Bounded)
remoteGitConfigField :: RemoteGitConfigField -> (UnqualifiedConfigKey, ProxyInherited)
remoteGitConfigField :: RemoteGitConfigField -> (MkRemoteConfigKey, ProxyInherited)
remoteGitConfigField = \case
-- Hard to know the true cost of accessing eg a slow special
-- remote via the proxy. The cost of the proxy is the best guess
-- so do inherit it.
CostField -> inherited "cost"
CostCommandField -> inherited "cost-command"
IgnoreField -> inherited "ignore"
IgnoreCommandField -> inherited "ignore-command"
SyncField -> inherited "sync"
SyncCommandField -> inherited "sync-command"
PullField -> inherited "pull"
PushField -> inherited "push"
ReadOnlyField -> inherited "readonly"
CheckUUIDField -> uninherited "checkuuid"
VerifyField -> inherited "verify"
TrackingBranchField -> uninherited "tracking-branch"
ExportTrackingField -> uninherited "export-tracking"
TrustLevelField -> uninherited "trustlevel"
StartCommandField -> uninherited "start-command"
StopCommandField -> uninherited "stop-command"
SpeculatePresentField -> inherited "speculate-present"
BareField -> inherited "bare"
RetryField -> inherited "retry"
ForwardRetryField -> inherited "forward-retry"
RetryDelayField -> inherited "retrydelay"
StallDetectionField -> inherited "stalldetection"
StallDetectionUploadField -> inherited "stalldetection-upload"
StallDetectionDownloadField -> inherited "stalldetection-download"
BWLimitField -> inherited "bwlimit"
BWLimitUploadField -> inherited "bwlimit-upload"
BWLimitDownloadField -> inherited "bwlimit-upload"
UUIDField -> uninherited "uuid"
ConfigUUIDField -> uninherited "config-uuid"
SecurityAllowUnverifiedDownloadsField -> inherited "security-allow-unverified-downloads"
MaxGitBundlesField -> inherited "max-git-bundles"
AllowEncryptedGitRepoField -> inherited "allow-encrypted-gitrepo"
CostField -> inherited True "cost"
CostCommandField -> inherited True "cost-command"
IgnoreField -> inherited True "ignore"
IgnoreCommandField -> inherited True "ignore-command"
SyncField -> inherited True "sync"
SyncCommandField -> inherited True "sync-command"
PullField -> inherited True "pull"
PushField -> inherited True "push"
ReadOnlyField -> inherited True "readonly"
CheckUUIDField -> uninherited True "checkuuid"
VerifyField -> inherited True "verify"
TrackingBranchField -> uninherited True "tracking-branch"
ExportTrackingField -> uninherited True "export-tracking"
TrustLevelField -> uninherited True "trustlevel"
StartCommandField -> uninherited True "start-command"
StopCommandField -> uninherited True "stop-command"
SpeculatePresentField -> inherited True "speculate-present"
BareField -> inherited True "bare"
RetryField -> inherited True "retry"
ForwardRetryField -> inherited True "forward-retry"
RetryDelayField -> inherited True "retrydelay"
StallDetectionField -> inherited True "stalldetection"
StallDetectionUploadField -> inherited True "stalldetection-upload"
StallDetectionDownloadField -> inherited True "stalldetection-download"
BWLimitField -> inherited True "bwlimit"
BWLimitUploadField -> inherited True "bwlimit-upload"
BWLimitDownloadField -> inherited True "bwlimit-upload"
UUIDField -> uninherited True "uuid"
ConfigUUIDField -> uninherited True "config-uuid"
SecurityAllowUnverifiedDownloadsField -> inherited True "security-allow-unverified-downloads"
MaxGitBundlesField -> inherited True "max-git-bundles"
AllowEncryptedGitRepoField -> inherited True "allow-encrypted-gitrepo"
-- Allow proxy chains.
ProxyField -> inherited "proxy"
ProxiedByField -> uninherited "proxied-by"
ClusterNodeField -> uninherited "cluster-node"
ClusterGatewayField -> uninherited "cluster-gateway"
UrlField -> uninherited "url"
ShellField -> inherited "shell"
SshOptionsField -> inherited "ssh-options"
RsyncOptionsField -> inherited "rsync-options"
RsyncDownloadOptionsField -> inherited "rsync-download-options"
RsyncUploadOptionsField -> inherited "rsync-upload-options"
RsyncTransportField -> inherited "rsync-transport"
GnupgOptionsField -> inherited "gnupg-options"
GnupgDecryptOptionsField -> inherited "gnupg-decrypt-options"
SharedSOPCommandField -> inherited "shared-sop-command"
SharedSOPProfileField -> inherited "shared-sop-profile"
RsyncUrlField -> uninherited "rsyncurl"
BupRepoField -> uninherited "buprepo"
BorgRepoField -> uninherited "borgrepo"
TahoeField -> uninherited "tahoe"
BupSplitOptionsField -> uninherited "bup-split-options"
DirectoryField -> uninherited "directory"
AndroidDirectoryField -> uninherited "androiddirectory"
AndroidSerialField -> uninherited "androidserial"
GCryptField -> uninherited "gcrypt"
GitLFSField -> uninherited "git-lfs"
DdarRepoField -> uninherited "ddarrepo"
HookTypeField -> uninherited "hooktype"
ExternalTypeField -> uninherited "externaltype"
ProxyField -> inherited True "proxy"
ProxiedByField -> uninherited True "proxied-by"
ClusterNodeField -> uninherited True "cluster-node"
ClusterGatewayField -> uninherited True "cluster-gateway"
UrlField -> uninherited False "url"
AnnexUrlField -> inherited False "annexurl"
ShellField -> inherited True "shell"
SshOptionsField -> inherited True "ssh-options"
RsyncOptionsField -> inherited True "rsync-options"
RsyncDownloadOptionsField -> inherited True "rsync-download-options"
RsyncUploadOptionsField -> inherited True "rsync-upload-options"
RsyncTransportField -> inherited True "rsync-transport"
GnupgOptionsField -> inherited True "gnupg-options"
GnupgDecryptOptionsField -> inherited True "gnupg-decrypt-options"
SharedSOPCommandField -> inherited True "shared-sop-command"
SharedSOPProfileField -> inherited True "shared-sop-profile"
RsyncUrlField -> uninherited True "rsyncurl"
BupRepoField -> uninherited True "buprepo"
BorgRepoField -> uninherited True "borgrepo"
TahoeField -> uninherited True "tahoe"
BupSplitOptionsField -> uninherited True "bup-split-options"
DirectoryField -> uninherited True "directory"
AndroidDirectoryField -> uninherited True "androiddirectory"
AndroidSerialField -> uninherited True "androidserial"
GCryptField -> uninherited True "gcrypt"
GitLFSField -> uninherited True "git-lfs"
DdarRepoField -> uninherited True "ddarrepo"
HookTypeField -> uninherited True "hooktype"
ExternalTypeField -> uninherited True "externaltype"
where
inherited f = (f, ProxyInherited True)
uninherited f = (f, ProxyInherited False)
inherited True f = (MkRemoteAnnexConfigKey f, ProxyInherited True)
inherited False f = (MkRemoteConfigKey f, ProxyInherited True)
uninherited True f = (MkRemoteAnnexConfigKey f, ProxyInherited False)
uninherited False f = (MkRemoteConfigKey f, ProxyInherited False)
newtype ProxyInherited = ProxyInherited Bool
-- All remote config fields that are inherited from a proxy.
proxyInheritedFields :: [UnqualifiedConfigKey]
proxyInheritedFields :: [MkRemoteConfigKey]
proxyInheritedFields =
map fst $
filter (\(_, ProxyInherited p) -> p) $
map remoteGitConfigField [minBound..maxBound]
remoteGitConfigKey :: RemoteGitConfigField -> UnqualifiedConfigKey
remoteGitConfigKey :: RemoteGitConfigField -> MkRemoteConfigKey
remoteGitConfigKey = fst . remoteGitConfigField
notempty :: Maybe String -> Maybe String
@ -685,13 +702,23 @@ dummyRemoteGitConfig :: IO RemoteGitConfig
dummyRemoteGitConfig = atomically $
extractRemoteGitConfig Git.Construct.fromUnknown "dummy"
type UnqualifiedConfigKey = B.ByteString
data MkRemoteConfigKey
= MkRemoteAnnexConfigKey B.ByteString
| MkRemoteConfigKey B.ByteString
mkRemoteConfigKey :: RemoteNameable r => r -> MkRemoteConfigKey -> ConfigKey
mkRemoteConfigKey r (MkRemoteAnnexConfigKey b) = remoteAnnexConfig r b
mkRemoteConfigKey r (MkRemoteConfigKey b) = remoteConfig r b
mkAnnexConfigKey :: MkRemoteConfigKey -> ConfigKey
mkAnnexConfigKey (MkRemoteAnnexConfigKey b) = annexConfig b
mkAnnexConfigKey (MkRemoteConfigKey b) = annexConfig b
annexConfigPrefix :: B.ByteString
annexConfigPrefix = "annex."
{- A global annex setting in git config. -}
annexConfig :: UnqualifiedConfigKey -> ConfigKey
annexConfig :: B.ByteString -> ConfigKey
annexConfig key = ConfigKey (annexConfigPrefix <> key)
class RemoteNameable r where
@ -704,13 +731,13 @@ instance RemoteNameable RemoteName where
getRemoteName = id
{- A per-remote annex setting in git config. -}
remoteAnnexConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey
remoteAnnexConfig :: RemoteNameable r => r -> B.ByteString -> ConfigKey
remoteAnnexConfig r = remoteConfig r . remoteAnnexConfigEnd
remoteAnnexConfigEnd :: UnqualifiedConfigKey -> UnqualifiedConfigKey
remoteAnnexConfigEnd :: B.ByteString -> B.ByteString
remoteAnnexConfigEnd key = "annex-" <> key
{- A per-remote setting in git config. -}
remoteConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey
remoteConfig :: RemoteNameable r => r -> B.ByteString -> ConfigKey
remoteConfig r key = ConfigKey $
"remote." <> encodeBS (getRemoteName r) <> "." <> key

View file

@ -16,6 +16,7 @@ import qualified Data.UUID as U
import Data.Maybe
import Data.String
import Data.ByteString.Builder
import Control.DeepSeq
import qualified Data.Semigroup as Sem
import Git.Types (ConfigValue(..))
@ -28,6 +29,10 @@ import qualified Utility.SimpleProtocol as Proto
data UUID = NoUUID | UUID B.ByteString
deriving (Eq, Ord, Show, Read)
instance NFData UUID where
rnf NoUUID = ()
rnf (UUID b) = rnf b
class FromUUID a where
fromUUID :: UUID -> a

3
debian/control vendored
View file

@ -85,6 +85,9 @@ Build-Depends:
libghc-git-lfs-dev (>= 1.2.0),
libghc-criterion-dev,
libghc-clock-dev,
libghc-servant-dev,
libghc-servant-server-dev,
libghc-servant-client-dev,
lsof [linux-any],
ikiwiki,
libimage-magick-perl,

View file

@ -114,8 +114,10 @@ the client sends:
LOCKCONTENT Key
The server responds with either SUCCESS or FAILURE.
The former indicates the content is locked. It will remain
locked until the client sends:
The former indicates the content is locked.
After SUCCESS, the content will remain locked until the
client sends its next message, which must be:
UNLOCKCONTENT Key
@ -182,7 +184,7 @@ whitespace.)
The server may respond with ALREADY-HAVE if it already
had the content of that key.
In protocol version 2, the server can optionally reply with
In protocol version 2 and above, the server can optionally reply with
ALREADY-HAVE-PLUS. The subsequent list of UUIDs are additional
UUIDs where the content is stored, in addition to the UUID where
the client was going to send it.
@ -197,9 +199,9 @@ the client to start. This allows resuming transfers.
The client then sends a DATA message with content of the file from
the offset to the end of file.
In protocol version 1, after the data, the client sends an additional
message, to indicate if the content of the file has changed while it
was being sent.
In protocol version 1 and above, after the data, the client sends an
additional message, to indicate if the content of the file has changed
while it was being sent.
INVALID
VALID
@ -207,8 +209,8 @@ was being sent.
If the server successfully receives the data and stores the content,
it replies with SUCCESS. Otherwise, FAILURE.
In protocol version 2, the server can optionally reply with SUCCESS-PLUS
and a list of UUIDs where the content was stored.
In protocol version 2 and above, the server can optionally reply with
SUCCESS-PLUS and a list of UUIDs where the content was stored.
## Getting content from the server
@ -223,7 +225,7 @@ See description of AssociatedFile above.
The server then sends a DATA message with the content of the file
from the offset to end of file.
In protocol version 1, after the data, the server sends an additional
In protocol version 1 and above, after the data, the server sends an additional
message, to indicate if the content of the file has changed while it
was being sent.

View file

@ -1,153 +1,437 @@
[[!toc ]]
## motivation
## introduction
The [[P2P protocol]] is a custom protocol that git-annex speaks over a ssh
connection (mostly). This is a design working on supporting the P2P
protocol over HTTP.
connection (mostly). This is a translation of that protocol to HTTP.
Upload of annex objects to git remotes that use http is currently not
supported by git-annex, and this would be a generally very useful addition.
[[git-annex-p2phttp]] serves this protocol.
For use cases such as OpenNeuro's javascript client, ssh is too difficult
to support, so they currently use a special remote that talks to a http
endpoint in order to upload objects. Implementing this would let them
talk to git-annex over http.
To indicate that an url uses this protocol, use
`annex+http` or `annex+https` as the url scheme. Such an url uses
port 9417 by default, although another port can be specified.
For example, "annex+http://example.com/git-annex/"
With the [[passthrough_proxy]], this would let clients configure a single
http remote that accesses a more complicated network of git-annex
repositories.
## base64 encoding of keys, uuids, and filenames
## integration with git
A git-annex key can contain text in any encoding. So can a filename,
and it's even possible, though unlikely, that the UUID of a git-annex
repository might.
A webserver that is configured to serve a git repository either serves the
files in the repository with dumb http, or uses the git-http-backend CGI
program for url paths under eg `/git/`.
But this protocol requires that UTF-8 be used throughout, except
where bodies use `Content-Type: application/octet-stream`.
To integrate with that, git-annex would need a git-annex-http-backend CGI
program, that the webserver is configured to run for url paths under
`/git/.*/annex/`.
So this protocol allows using
[base64url](https://datatracker.ietf.org/doc/html/rfc4648#section-5)
encoding for such values. Any key, filename, or UUID wrapped in square
brackets is a base64url encoded value.
For example, "[Zm9v]" is the same as "foo".
So, for a remote with an url `http://example.com/git/foo`, git-annex would
use paths under `http://example.com/git/foo/annex/` to run its CGI.
A filename like "[foo]" will need to itself be encoded that way: "[W2Zvb10=]"
But, the CGI interface is a poor match for the P2P protocol.
## authentication
A particular problem is that `LOCKCONTENT` would need to be in one CGI
request, followed by another request to `UNLOCKCONTENT`. Unless
git-annex-http-backend forked a daemon to keep the content locked, it would
not be able to retain a file lock across the 2 requests. While the 10
minute retention lock would paper over that, UNLOCKCONTENT would not be
able to delete the retention lock, because there is no way to know if
another LOCKCONTENT was received later. So LOCKCONTENT would always lock
content for 10 minutes. Which would result in some undesirable behaviors.
Some requests need authentication. Which requests do depends on the
configuration of the HTTP server. When a request needs authentication,
it will fail with 401 Unauthorized.
Another problem is with proxies and clusters. The CGI would need to open
ssh (or http) connections to the proxied repositories and cluster nodes
each time it is run. That would add a lot of latency to every request.
Authentication is done using HTTP basic auth. The realm to use when
authenticating is "git-annex". The charset is UTF-8.
And running a git-annex process once per CGI request also makes git-annex's
own startup speed, which is ok but not great, add latency. And each time
the CGI needed to change the git-annex branch, it would have to commit on
shutdown. Lots of time and space optimisations would be prevented by using
the CGI interface.
When authentication is successful but does not allow a request to be
performed, it will fail with 403 Forbidden.
So, rather than having the CGI program do anything in the repository
itself, have it pass each request through to a long-running server.
(This does have the downside that files would get double-copied
through the CGI, which adds some overhead.)
A reasonable way to do that would be to have a webserver speaking a
HTTP version of the git-annex P2P protocol and the CGI just talks to that.
Note that HTTP basic auth is not encrypted so is only secure when used
over HTTPS.
The CGI program then becomes tiny, and just needs to know the url to
connect to the git-annex HTTP server.
## protocol version
Alternatively, a remote's configuration could include that url, and
then we don't need the complication and overhead of the CGI program at all.
Eg:
Requests are versioned. The versions correspond to
P2P protocol versions. The version is part of the request path,
eg "v3"
git config remote.origin.annex-url http://example.com:8080/
If the server does not support a particular protocol version, the
request will fail with a 404, and the client should fall
back to an earlier protocol version.
So, the rest of this design will focus on implementing that. The CGI
program can be added later if desired, so avoid users needing to configure
an additional thing.
## common request parameters
Note that, one nice benefit of having a separate annex-url is it allows
having remote.origin.url on eg github, but with an annex-url configured
that remote can also be used as a git-annex repository.
Every request supports this parameter, and unless documented
otherwise, it is required to be included.
## approach 1: websockets
* `clientuuid`
The client connects to the server over a websocket. From there on,
the protocol is encapsulated in websockets.
The value is the UUID of the git-annex repository of the client.
This seems nice and simple to implement, but not very web native. Anyone
wanting to talk to this web server would need to understand the P2P
protocol. Just to upload a file would need to deal with AUTH,
AUTH-SUCCESS, AUTH-FAILURE, VERSION, PUT, ALREADY-HAVE, PUT-FROM, DATA,
INVALID, VALID, SUCCESS, and FAILURE messages. Seems like a lot.
Any request may also optionally include these parameters:
Some requests like `LOCKCONTENT` do need full duplex communication like
websockets provide. But, it might be more web native to only use websockets
for that request, and not for everything.
* `bypass`
## approach 2: web-native API
The value is the UUID of a cluster gateway, which the server should avoid
connecting to when serving a cluster. This is the equivilant of the
`BYPASS` message in the [[P2P_Protocol]].
Another approach is to define a web-native API with endpoints that
correspond to each action in the P2P protocol.
This parameter can be given multiple times to list several cluster
gateway UUIDs.
Something like this:
This parameter is only available for v2 and above.
> POST /git-annex/v1/AUTH?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.0
< AUTH-SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
[Internally, git-annex can use these common parameters, plus the protocol
version, and remote UUID, to create a P2P session. The P2P session is
driven through the AUTH, VERSION, and BYPASS messages, leaving the session
ready to service requests.]
> POST /git-annex/v1/CHECKPRESENT?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0
> SUCCESS
## requests
> POST /git-annex/v1/PUT-FROM?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0
< PUT-FROM 0
### GET /git-annex/$uuid/key/$key
> POST /git-annex/v1/PUT?key=SHA1--foo&associatedfile=bar&put-from=0&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0
> Content-Type: application/octet-stream
> Content-Length: 20
> foo
> {"valid": true}
< {"stored": true}
This is a simple, unversioned interface to get the content of a key
from a repository.
(In the last example above "foo" is the content, it is followed by a line of json.
This seems better than needing an entire other request to indicate validitity.)
It is not part of the P2P protocol per se, but is provided to let
other clients than git-annex easily download the content of keys from the
http server.
This needs a more complex spec. But it's easier for others to implement,
especially since it does not need a session identifier, so the HTTP server can
be stateless.
When the key is not present on the server, it will respond
with 404 Not Found.
A full draft protocol for this is being developed at [[p2p_protocol_over_http/draft1]].
Note that the common parameters bypass and clientuuid, while
accepted, have no effect. Both are optional for this request.
## HTTP GET
### GET /git-annex/$uuid/v3/key/$key
It should be possible to support a regular HTTP get of a key, with
no additional parameters, so that annex objects can be served to other clients
from this web server.
Get the content of a key from the repository with the specified uuid.
> GET /git-annex/key/SHA1--foo HTTP/1.0
Example:
> GET /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/key/SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< X-git-annex-data-length: 3
< Content-Type: application/octet-stream
<
< foo
Although this would be a special case, not used by git-annex, because the P2P
protocol's GET has the complication of offsets, and of the server sending
VALID/INVALID after the content, and of needing to know the client's UUID in
order to update the location log.
All parameters are optional, including the common parameters, and these:
## Problem: CONNECT
* `associatedfile`
The CONNECT message allows both sides of the P2P protocol to send DATA
messages in any order. This seems difficult to encapsulate in HTTP.
The name of a file in the git repository, for informational purposes
only.
Probably this can be not implemented, it's probably not needed for a HTTP
remote? This is used to tunnel git protocol over the P2P protocol, but for
a HTTP remote the git repository can be accessed over HTTP as well.
* `offset`
## security
Number of bytes to skip sending from the beginning of the file.
Should support HTTPS and/or be limited to only HTTPS.
Request headers are currently ignored, so eg Range requests are
not supported. (This would be possible to implement, up to a point.)
Authentication via http basic auth?
The body of the request is empty.
The server's response will have a `Content-Type` header of
`application/octet-stream`.
The server's response will have a `X-git-annex-data-length`
header that indicates the number of bytes of content that are expected to
be sent. Note that there is no Content-Length header.
The body of the response is the content of the key.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
When the content is not present, the server will respond with
422 Unprocessable Content.
### GET /git-annex/$uuid/v2/key/$key
Identical to v3.
### GET /git-annex/$uuid/v1/key/$key
Identical to v3.
### GET /git-annex/$uuid/v0/key/$key
Same as v3, except the X-git-annex-data-length header is not used.
Additional checking client-side will be required to validate the data.
### POST /git-annex/$uuid/v3/checkpresent
Checks if a key is currently present on the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/checkpresent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"present": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "present" field that is true
if the key is present, or false if it is not present.
### POST /git-annex/$uuid/v2/checkpresent
Identical to v3.
### POST /git-annex/$uuid/v1/checkpresent
Identical to v3.
### POST /git-annex/$uuid/v0/checkpresent
Identical to v3.
### POST /git-annex/$uuid/v3/lockcontent
Locks the content of a key on the server, preventing it from being removed.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/lockcontent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"locked": true, "lockid": "foo"}
There is one required additional parameter, `key`.
The server will reply with `{"locked": true}` if it was able
to lock the key, or `{"locked": false}` if it was not.
The key will remain locked for 10 minutes. But, usually `keeplocked`
is used to control the lifetime of the lock, using the "lockid"
parameter from the server's reply. (See below.)
### POST /git-annex/$uuid/v2/lockcontent
Identical to v3.
### POST /git-annex/$uuid/v1/lockcontent
Identical to v3.
### POST /git-annex/$uuid/v0/lockcontent
Identical to v3.
### POST /git-annex/$uuid/v3/keeplocked
Controls the lifetime of a lock on a key that was earlier obtained
with `lockcontent`.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/keeplocked?lockid=foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
> Connection: Keep-Alive
> Keep-Alive: timeout=1200
[some time later]
> {"unlock": true}
< {"locked": false}
There is one required additional parameter, `lockid`.
This uses long polling. So it's important to use
Connection and Keep-Alive headers.
This keeps an active lock from expiring until the client sends
`{"unlock": true}`, and then it immediately unlocks it.
The client can send `{"unlock": false}` any number of times first.
This has no effect, but may be useful to keep the connection alive.
This must be called within ten minutes of `lockcontent`, otherwise
the lock will have already expired when this runs. Note that this
does not indicate if the lock expired, it always returns
`{"locked": false}`.
If the connection is closed before the client sends `{"unlock": true},
or even if the web server gets shut down, the content will remain
locked for 10 minutes from the time it was first locked.
Note that the common parameters bypass and clientuuid, while
accepted, have no effect.
### POST /git-annex/$uuid/v2/keeplocked
Identical to v3.
### POST /git-annex/$uuid/v1/keeplocked
Identical to v3.
### POST /git-annex/$uuid/v0/keeplocked
Identical to v3.
### POST /git-annex/$uuid/v3/remove
Remove a key's content from the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/remove?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"removed": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "removed" field that is true
if the key was removed (or was not present on the server),
or false if the key was not able to be removed.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was removed from.
### POST /git-annex/$uuid/v2/remove
Identical to v3.
### POST /git-annex/$uuid/v1/remove
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/$uuid/v0/remove
Identical to v1.
## POST /git-annex/$uuid/v3/remove-before
Remove a key's content from the server, but only before a specified time.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/remove-before?timestamp=4949292929&key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"removed": true}
This is the same as the `remove` request, but with an additional parameter,
`timestamp`.
If the server's monotonic clock is past the specified timestamp, the
removal will fail and the server will respond with: `{"removed": false}`
This is used to avoid removing content after a point in
time where it is no longer locked in other repostitories.
## POST /git-annex/$uuid/v3/gettimestamp
Gets the current timestamp from the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/gettimestamp?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"timestamp": 59459392}
The body of the request is empty.
The server responds with JSON object with a timestmap field that has the
current value of its monotonic clock, as a number of seconds.
Important: If multiple servers are serving this protocol for the same
repository, they MUST all use the same monotonic clock.
### POST /git-annex/$uuid/v3/put
Store content on the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/put?key=SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
> Content-Type: application/octet-stream
> X-git-annex-data-length: 3
>
> foo
< {"stored": true}
There is one required additional parameter, `key`.
There are are also these optional parameters:
* `associatedfile`
The name of a file in the git repository, for informational purposes
only.
* `offset`
Number of bytes that have been omitted from the beginning of the file.
Usually this will be determined by making a `putoffset` request.
The `Content-Type` header should be `application/octet-stream`.
The `X-git-annex-data-length` must be included. It indicates the number
of bytes of content that are expected to be sent.
Note that there is no need to send a Content-Length header.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
The server responds with a JSON object with a field "stored"
that is true if it received the data and stored the content.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was stored to.
### POST /git-annex/$uuid/v2/put
Identical to v3.
### POST /git-annex/$uuid/v1/put
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/$uuid/v0/put
Same as v1, except additional checking is done to validate the data.
### POST /git-annex/$uuid/v3/putoffset
Asks the server what `offset` can be used in a `put` of a key.
This should usually be used right before sending a `put` request.
The offset may not be valid after some point in time, which could result in
the `put` request failing.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/putoffset?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"offset": 10}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with an "offset" field that
is the largest allowable offset.
If the server already has the content of the key, it will respond instead
with a JSON object with an "alreadyhave" field that is set to true. This JSON
object may also have a field "plusuuids" that lists
the UUIDs of other repositories where the content is stored, in addition to
the serveruuid.
[Implementation note: This will be implemented by sending `PUT` and
returning the `PUT-FROM` offset. To avoid leaving the P2P protocol stuck
part way through a `PUT`, a synthetic empty `DATA` followed by `INVALID`
will be used to get the P2P protocol back into a state where it will accept
any request.]
### POST /git-annex/$uuid/v2/putoffset
Identical to v3.
### POST /git-annex/$uuid/v1/putoffset
Same as v3, except the JSON will not include "plusuuids".
## parts of P2P protocol that are not supported over HTTP
`NOTIFYCHANGE` is not supported, but it would be possible to extend
this HTTP protocol to support it.
`CONNECT` is not supported, and due to the bi-directional message passing
nature of it, it cannot easily be done over HTTP (would need websockets).
It should not be necessary anyway, because the git repository itself can be
accessed over HTTP.

View file

@ -1,389 +0,0 @@
[[!toc ]]
Draft 1 of a complete [[P2P_protocol]] over HTTP.
## authentication
A git-annex protocol endpoint can optionally operate in readonly mode without
authentication.
Authentication is required to make any changes.
Authentication is done using HTTP basic auth.
The user is recommended to only authenticate over HTTPS, since otherwise
HTTP basic auth (as well as git-annex data) can be snooped. But some users
may want git-annex to use HTTP in eg a LAN.
## protocol version
Each request in the protocol is versioned. The versions correspond
to P2P protocol versions.
The protocol version comes before the request. Eg: `/git-annex/v3/put`
If the server does not support a particular protocol version, the
request will fail with a 404, and the client should fall back to an earlier
protocol version.
## common request parameters
Every request supports these common parameters, and unless documented
otherwise, a request requires both of them to be included.
* `clientuuid`
The value is the UUID of the git-annex repository of the client.
* `serveruuid`
The value is the UUID of the git-annex repository that the server
should serve.
Any request may also optionally include these parameters:
* `bypass`
The value is the UUID of a cluster gateway, which the server should avoid
connecting to when serving a cluster. This is the equivilant of the
`BYPASS` message in the [[P2P_Protocol]].
This parameter can be given multiple times to list several cluster
gateway UUIDs.
This parameter is only available for v3 and above.
[Internally, git-annex can use these common parameters, plus the protocol
version, to create a P2P session. The P2P session is driven through
the AUTH, VERSION, and BYPASS messages, leaving the session ready to
service requests.]
## requests
### GET /git-annex/key/$key
This is a simple, unversioned interface to get a key from the server.
It is not part of the P2P protocol per se, but is provided to let
other clients than git-annex easily download the content of keys from the
http server.
This behaves almost the same as `GET /git-annex/v3/key/$key`, although its
behavior may change in later versions.
When the key is not present on the server, this returns a 404 Not Found.
### GET /git-annex/v3/key/$key
Get the content of a key from the server.
This is designed so it can be used both by a peer in the P2P protocol,
and by a regular HTTP client that just wants to download a file.
Example:
> GET /git-annex/v3/key/SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< X-git-annex-data-length: 3
< Content-Type: application/octet-stream
<
< foo
The key to get is the part of the url after "/git-annex/vN/key/"
and before any url parameters.
All parameters are optional, including the common parameters, and these:
* `associatedfile`
The name of a file in the git repository, for informational purposes
only.
* `offset`
Number of bytes to skip sending from the beginning of the file.
Request headers are currently ignored, so eg Range requests are
not supported. (This would be possible to implement, up to a point.)
The body of the request is empty.
The server's response will have a `Content-Type` header of
`application/octet-stream`.
The server's response will have a `X-git-annex-data-length`
header that indicates the number of bytes of content that are expected to
be sent. Note that there is no Content-Length header.
The body of the response is the content of the key.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
When the content is not present, the server will respond with
422 Unprocessable Content.
### GET /git-annex/v2/key/$key
Identical to v3.
### GET /git-annex/v1/key/$key
Identical to v3.
### GET /git-annex/v0/key/$key
Same as v3, except there is no X-git-annex-data-length header.
Additional checking client-side will be required to validate the data.
### POST /git-annex/v3/checkpresent
Checks if a key is currently present on the server.
Example:
> POST /git-annex/v3/checkpresent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"present": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "present" field that is true
if the key is present, or false if it is not present.
### POST /git-annex/v2/checkpresent
Identical to v3.
### POST /git-annex/v1/checkpresent
Identical to v3.
### POST /git-annex/v0/checkpresent
Identical to v3.
### POST /git-annex/v3/lockcontent
Locks the content of a key on the server, preventing it from being removed.
Example:
> POST /git-annex/v3/lockcontent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
[websocket protocol follows]
< SUCCESS
> UNLOCKCONTENT
There is one required additional parameter, `key`.
This request opens a websocket between the client and the server.
The server sends "SUCCESS" over the websocket once it has locked
the content. Or it sends "FAILURE" if it is unable to lock the content.
Once the server has sent "SUCCESS", the content remains locked
until the client sends "UNLOCKCONTENT" over the websocket.
If the client disconnects without sending "UNLOCKCONTENT", or the web
server gets shut down before it can receive that, the content will remain
locked for at least 10 minutes from when the server sent "SUCCESS".
### POST /git-annex/v2/lockcontent
Identical to v3.
### POST /git-annex/v1/lockcontent
Identical to v3.
### POST /git-annex/v0/lockcontent
Identical to v3.
### POST /git-annex/v3/remove
Remove a key's content from the server.
Example:
> POST /git-annex/v3/remove?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"removed": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "removed" field that is true
if the key was removed (or was not present on the server),
or false if the key was not able to be removed.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was removed from.
If the server does not allow removing the key due to a policy
(eg due to being read-only or append-only), it will respond with a JSON
object with an "error" field that has an error message as its value.
### POST /git-annex/v2/remove
Identical to v3.
### POST /git-annex/v1/remove
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/v0/remove
Identival to v1.
## POST /git-annex/v3/remove-before
Remove a key's content from the server, but only before a specified time.
Example:
> POST /git-annex/v3/remove-before?timestamp=4949292929&key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"removed": true}
This is the same as the `remove` request, but with an additional parameter,
`timestamp`.
If the server's monotonic clock is past the specified timestamp, the
removal will fail and the server will respond with: `{"removed": false}`
This is used to avoid removing content after a point in
time where it is no longer locked in other repostitories.
## POST /git-annex/v3/gettimestamp
Gets the current timestamp from the server.
Example:
> POST /git-annex/v3/gettimestamp?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"timestamp": 59459392}
The body of the request is empty.
The server responds with JSON object with a timestmap field that has the
current value of its monotonic clock, as a number of seconds.
Important: If multiple servers are serving this protocol for the same
repository, they MUST all use the same monotonic clock.
### POST /git-annex/v3/put
Store content on the server.
Example:
> POST /git-annex/v3/put?key=SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
> Content-Type: application/octet-stream
> X-git-annex-object-size: 3
>
> foo
< {"stored": true}
There is one required additional parameter, `key`.
There are are also these optional parameters:
* `associatedfile`
The name of a file in the git repository, for informational purposes
only.
* `offset`
Number of bytes that have been omitted from the beginning of the file.
Usually this will be determined by making a `putoffset` request.
The `Content-Type` header should be `application/octet-stream`.
The `X-git-annex-data-length` must be included. It indicates the number
of bytes of content that are expected to be sent.
Note that there is no need to send a Content-Length header.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
The server responds with a JSON object with a field "stored"
that is true if it received the data and stored the
content.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was stored to.
If the server does not allow storing the key due eg to a policy
(eg due to being read-only or append-only), or due to the data being
invalid, or because it ran out of disk space, it will respond with a
JSON object with an "error" field that has an error message as its value.
### POST /git-annex/v2/put
Identical to v3.
### POST /git-annex/v1/put
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/v0/put
Same as v1, except there is no X-git-annex-data-length header.
Additional checking client-side will be required to validate the data.
### POST /git-annex/v3/putoffset
Asks the server what `offset` can be used in a `put` of a key.
This should usually be used right before sending a `put` request.
The offset may not be valid after some point in time, which could result in
the `put` request failing.
Example:
> POST /git-annex/v3/putoffset?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"offset": 10}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with an "offset" field that
is the largest allowable offset.
If the server already has the content of the key, it will respond with a
JSON object with an "alreadyhave" field that is set to true. This JSON
object may also have a field "plusuuids" that lists
the UUIDs of other repositories where the content is stored, in addition to
the serveruuid.
If the server does not allow storing the key due to a policy
(eg due to being read-only or append-only), it will respond with a JSON
object with an "error" field that has an error message as its value.
[Implementation note: This will be implemented by sending `PUT` and
returning the `PUT-FROM` offset. To avoid leaving the P2P protocol stuck
part way through a `PUT`, a synthetic empty `DATA` followed by `INVALID`
will be used to get the P2P protocol back into a state where it will accept
any request.]
### POST /git-annex/v2/putoffset
Identical to v3.
### POST /git-annex/v1/putoffset
Same as v3, except the JSON will not include "plusuuids".
## parts of P2P protocol that are not supported over HTTP
`NOTIFYCHANGE` is not supported, but it would be possible to extend
this HTTP protocol to support it.
`CONNECT` is not supported, and due to the bi-directional message passing
nature of it, it cannot easily be done over HTTP (would need websockets).
It should not be necessary anyway, because the git repository itself can be
accessed over HTTP.

View file

@ -565,26 +565,41 @@ Tentative design for exporttree=yes with proxies:
* Configure annex-tracking-branch for the proxy in the git-annex branch.
(For the proxy as a whole, or for specific exporttree=yes repos behind
it?)
* Then the user's workflow is simply: `git-annex push proxy`
* Then the user's workflow is simply: `git-annex push`
* sync/push need to first push any updated annex-tracking-branch to the
proxy before sending content to it. (Currently sync only pushes at the
end.)
* If proxied remotes are all exporttree=yes, the proxy rejects any
transfers of a key that is not in the annex-tracking-branch that it
currently knows about. If there is any other proxied remote, the proxy
can direct such transfers to it.
puts of a key that is not in the annex-tracking-branch that it
currently knows about.
* Upon receiving a new annex-tracking-branch or any transfer of a key
used in the current annex-tracking-branch, the proxy can update
the exporttree=yes remotes. This needs to happen incrementally,
the exporttree=yes remote. This needs to happen incrementally,
eg upon receiving a key, just proxy it on to the exporttree=yes remote,
and update the export database. Once all keys are received, update
the git-annex branch to indicate a new tree has been exported.
* Upon receiving a git push of the annex-tracking-branch, a proxy might
be able to get all the changed objects from non-exporttree=yes proxied
remotes that contain them. If so it can update the exporttree=yes
remote automatically and inexpensively. At the same time, a
`git-annex push` will be attempting to send those same objects.
So somehow the proxy will need to manage this situation.
A difficulty is that a put of a key to a proxied exporttree=yes remote
can remove another key from it. Eg, a new version of a file. Consider a
case where two files swapped content. The put of key B would drop
key A that was stored in that file. Since the user's git-annex would not
realize that, it would not upload key A again. So this would leave the
exporttree=yes remote without a cooy of key A until the git-annex branch is
synced and then the situation can be noticed. While doing renames first
would avoid this, [[todo/export_paired_rename_innefficenctcy]] is a
situation where it could still be a problem.
A similar difficulty is that a push of the annex-tracking-branch can
remove a file from the proxied exporttree=yes remote. If a second push
of the annex-tracking-branch adds the file back, but the git-annex branch
has not been fetched, it won't know that the file was removed, so it won't
try to send it, leaving the export incomplete.
A possibile solution to all of these problems would be to have a
.git/annex/objects directory in the exporttree=yes remove. Rather than
deleting any key from it, the proxy can mode a key into that directory.
(git-remote-annex already uses such a directory for storing its keys on
exporttree=yes remotes).
## possible enhancement: indirect uploads

153
doc/git-annex-p2phttp.mdwn Normal file
View file

@ -0,0 +1,153 @@
# NAME
git-annex-p2phttp - HTTP server for the git-annex API
# SYNOPSIS
git-annex p2phttp
# DESCRIPTION
This is a HTTP server for the git-annex API.
It is the git-annex equivilant of git-http-backend(1), for serving
a repository over HTTP with write access for authenticated users.
This does not serve the git repository over HTTP, only the git-annex
API.
Typically a remote will have `remote.name.url` set to a http url
as usual, and `remote.name.annexUrl` set to an annex+http url such as
"annex+http://example.com/git-annex/". The annex+http url is
served by this server, and uses port 9417 by default.
As well as serving the git-annex HTTP API, this server provides a
convenient way to download the content of any key, by using the path
"/git-annex/$uuid/$key". For example:
$ curl http://example.com:9417/git-annex/f11773f0-11e1-45b2-9805-06db16768efe/key/SHA256E-s6--5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03
hello
# OPTIONS
* `--jobs=N` `-JN`
This or annex.jobs must be set to configure the number of worker
threads that serve connections to the webserver.
Since the webserver itself also uses one of these threads,
this needs to be set to 2 or more.
A good choice is often one worker per CPU core: `--jobs=cpus`
* `--proxyconnections=N`
When this command is run in a repository that is configured to act as a
proxy for some of its remotes, this is the maximum number of idle
connections to keep open to proxied remotes.
The default is 1.
* `--clusterjobs=N`
When this command is run in a repository that is a gateway for a cluster,
this is the number of concurrent jobs to use to access nodes of the
cluster, per connection to the webserver.
The default is 1.
A good choice for this will be a balance between the number of nodes
in the cluster and the value of `--jobs`.
For example, if the cluster has 4 nodes, and `--jobs=4`, using
`--clusterjobs=4` will make all nodes in the cluster be accessed
concurrently, which is often optimal. But around 20 cores can be needed
when the webserver is busy.
* `--port=N`
Port to listen on. The default is port 9417, which is the default
port used for an annex+http or annex+https url.
It is not recommended to run this command as root in order to
use a low port like port 80. It will not drop permissions when run as
root.
* `--bind=address`
What address to bind to. The default is to bind to all addresses.
* `--certfile=filename`
TLS certificate file to use. Combining this with `--privatekeyfile`
makes the server use HTTPS.
* `--privatekeyfile=filename`
TLS private key file to use. Combining this with `--certfile`
makes the server use HTTPS.
* `--chainfile=filename`
TLS chain file to use. This option can be repeated any number of times.
* `--authenv`
Allows users to be authenticated with a username and password.
For security, this only allows authentication when the user connects over
HTTPS.
To configure the passwords, set environment variables
like `GIT_ANNEX_P2PHTTP_PASSWORD_alice=foo123`
The permissions of users can also be configured by setting
environment variables like
`GIT_ANNEX_P2PHTTP_PERMISSIONS_alice=readonly`. The value
can be either "readonly" or "appendonly". When this is not set,
the default is to give the user full read+write+remove access.
* `--authenv-http`
Like `--authenv`, but allows authentication when the user connects
over HTTP. This is not secure, since HTTP basic authentication is not
encrypted.
* `--unauth-readonly`
Allows unauthenticated users to read the repository, but not make
modifications to it.
* `--unauth-appendonly`
Allows unauthenticated users to read the repository, and store data in
it, but not remove data from it.
* `--wideopen`
Gives unauthenticated users full read+write+remove access to the
repository.
Please think carefully before enabling this option.
# SEE ALSO
[[git-annex]](1)
git-http-backend(1)
[[git-annex-shell]](1)
[[git-annex-updateproxy]](1)
[[git-annex-initcluster]](1)
[[git-annex-updatecluster]](1)
<https://git-annex.branchable.com/design/p2p_protocol_over_http/>
# AUTHOR
Joey Hess <id@joeyh.name>
<http://git-annex.branchable.com/>
Warning: Automatically converted into a man page by mdwn2man. Edit with care

View file

@ -26,7 +26,7 @@ it. Then after pulling from "work", git-annex will know about an
additional remote, "work-foo". That remote will be accessed using "work" as
a proxy.
Proxies can only be accessed via ssh.
Proxies can only be accessed via ssh or by an annex+http url.
# OPTIONS

View file

@ -212,6 +212,13 @@ content from the key-value store.
See [[git-annex-webapp]](1) for details.
* `p2phttp`
Allows a git-annex repository to be accessed over HTTP using git-annex
p2p protocol.
See [[git-annex-p2phttp]](1) for details.
* `remotedaemon`
Persistant communication with remotes.
@ -1235,6 +1242,13 @@ repository, using [[git-annex-config]]. See its man page for a list.)
After changing this config, you need to re-run `git-annex init` for it
to take effect.
* `annex.url`
When a remote has a http url, the first time git-annex uses the remote
it will check if it can download its `.git/config` file. If it is able
to, and the file has this config set to an "annex+http" or "annex+https"
url, that url will be copied into `remote.<name>.annexUrl`.
* `annex.resolvemerge`
Set to false to prevent merge conflicts in the checked out branch
@ -1530,9 +1544,14 @@ Remotes are configured using these settings in `.git/config`.
* `remote.<name>.annexUrl`
Can be used to specify a different url than the regular `remote.<name>.url`
for git-annex to use when talking with the remote. Similar to the `pushUrl`
for git-annex to use for accessing the remote. Similar to the `pushUrl`
used by git-push.
When this is set to an annex+http or annex+https url, that url is used
for git-annex operations only, and the `remote.<name>.url` is used for
git operations. This allows using [[git-annex-p2phttp]] to serve a
git-annex repository over http.
* `remote.<name>.annex-uuid`
git-annex caches UUIDs of remote repositories here.

View file

@ -12,8 +12,8 @@ special remotes.
## using a cluster
To use a cluster, your repository needs to have its gateway configured as a
remote. Clusters can currently only be accessed via ssh. This gateway
remote is added the same as any other git remote:
remote. Clusters can currently only be accessed via ssh or by a annex+http
url. This gateway remote is added the same as any other git remote:
$ git remote add bigserver me@bigserver:annex
@ -121,6 +121,9 @@ in the git-annex branch. That tells other repositories about the cluster.
Started proxying for node2
Started proxying for node3
The cluster will now be accessible over ssh. To also let the cluster be
accessed over http, you would need to set up a [[tips/smart_http_server]].
Operations that affect multiple nodes of a cluster can often be sped up by
configuring annex.jobs in the gateway repository.
In the example above, the nodes are all disk bound, so operating

View file

@ -28,4 +28,14 @@ Here's how I set it up. --[[Joey]]
When users clone over http, and run git-annex, it will
automatically learn all about your repository and be able to download files
right out of it, also using http.
right out of it, also using http.
----
The above is a simple way to set that up, but it's not necessarily the
*best* way. Both git and git-annex will be accessing the repository using
dumb http, which can be innefficient. And it doesn't allow write access.
For something smarter, you may want to also set up
[git smart http](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP),
and the git-annex equivilant, a [[smart_http_server]].

View file

@ -0,0 +1,39 @@
git-annex can access a remote using any web server,
as shown in the tip [[setup_a_public_repository_on_a_web_site]].
That's limited to basic read-only repository access though. Git
has [smart HTTP](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP)
that can be used to allow pushes over http. And git-annex has an
equivilant, the [[git annex-p2phttp command|/git-annex-p2phttp]].
As well as allowing write access to authorized users over http,
`git-annex p2phttp` also allows accessing [[clusters]], and other proxied
remotes over http.
You will still need to run a web server to serve the git repository.
`git-annex p2phttp` only serves git-annex's own
[[API|design/p2p_protocol_over_http]], and it does it
on a different port (9417 by default).
You will need to arrange to run `git-annex p2phttp` in your repository as a
daemon or service. Note that it should not be run as root, but as whatever
user owns the repository. It has several options you can use to configure
it, including controlling who can access the repository.
So there are two web servers, and thus two different urls.
A remote will have `remote.name.url` set to the http url
that git will use, and also have `remote.name.annexUrl` set to the url
that git-annex will use to talk to `git-annex p2phttp`. That url
looks like this:
annex+http://example.com/git-annex/
The "annex+http" (or "annex+https") indicates that it's a git-annex API
url, which defaults to being on port 9417 unless a different port is set.
It would be annoying if every user who cloned your repository
had to set `remote.name.annexUrl` manually. So there's a way to automate it.
In the git config file of the repository, set `annex.url` to the "annex+http"
(or "annex+https") url. The first time it uses a http remote, git-annex
downloads its git config file, and sets `remote.name.annexUrl` to the value
of the remote's `annex.url`.

View file

@ -28,19 +28,60 @@ Planned schedule of work:
## work notes
* Next step: Ready to begin implementing in servant. I have a file
`servant.hs` in the httpproto branch that works through some of the
bytestring streaming issues.
* An interrupted PUT to cluster that has a node that is a special remote
over http leaves open the connection to the cluster, so the next request
opens another one.
* Perhaps: Support cgi program that proxies over to a webserver
speaking the http protocol.
So does an interrupted PUT directly to the proxied ;
special remote over http.
* When part of a file has been sent to a cluster via the http server,
the transfer interrupted, and another node is added to the cluster,
and the transfer of the file performed again, there is a failure
sending to the node that had an incomplete copy. It fails like this:
//home/joey/tmp/bench/c3/.git/annex/tmp/SHA256E-s1048576000--09d7e19983a65682138fa5944f135e4fc593330c2693c41d22cc7881443d6060: withBinaryFile: illegal operation
git-annex: transfer already in progress, or unable to take transfer lock
p2pstdio: 1 failed
When using ssh and not the http server, the node that had the incomplete
copy also doesn't get the file, altough no error is displayed.
* When proxying a PUT to a special remote, no verification of the received
content is done, it's just written to a file and that is sent to the
special remote. This violates a usual invariant that any data being
received into a repository gets verified in passing. Although on the
other hand, when sending data to a special remote normally, there is also
no verification.
## items deferred until later for p2p protocol over http
* `git-annex p2phttp` should support serving several repositories at the same
time (not as proxied remotes), so that eg, every git-annex repository
on a server can be served on the same port.
* Support proxying to git remotes that use annex+http urls.
* `git-annex p2phttp` could support systemd socket activation. This would
allow making a systemd unit that listens on port 80.
## completed items for July's work on p2p protocol over http
* HTTP P2P protocol design [[design/p2p_protocol_over_http]].
* addressed [[doc/todo/P2P_locking_connection_drop_safety]]
* finalized HTTP P2P protocol draft 1,
[[design/p2p_protocol_over_http/draft1]]
* implemented server and client for HTTP P2P protocol
* added git-annex p2phttp command to serve HTTP P2P protocol
* Make git-annex p2phttp support https.
* Allow using annex+http urls in remote.name.annexUrl
* Make http server support proxying.
* Make http server support serving a cluster.
## items deferred until later for [[design/passthrough_proxy]]
@ -86,6 +127,14 @@ Planned schedule of work:
* Optimise proxy speed. See design for ideas.
* Speed: A proxy to a local git repository spawns git-annex-shell
to communicate with it. It would be more efficient to operate
directly on the Remote. Especially when transferring content to/from it.
But: When a cluster has several nodes that are local git repositories,
and is sending data to all of them, this would need an alternate
interface than `storeKey`, which supports streaming, of chunks
of a ByteString.
* Use `sendfile()` to avoid data copying overhead when
`receiveBytes` is being fed right into `sendBytes`.
Library to use:

View file

@ -173,6 +173,9 @@ Flag MagicMime
Flag Crypton
Description: Use the crypton library rather than the no longer maintained cryptonite
Flag Servant
Description: Use the servant library, enabling using annex+http urls and git-annex p2phttp
Flag Benchmark
Description: Enable benchmarking
Default: True
@ -312,6 +315,21 @@ Executable git-annex
else
Build-Depends: cryptonite (>= 0.23)
if flag(Servant)
Build-Depends:
servant,
servant-server,
servant-client,
servant-client-core,
warp (>= 3.2.8),
warp-tls (>= 3.2.2)
CPP-Options: -DWITH_SERVANT
Other-Modules:
Command.P2PHttp
P2P.Http
P2P.Http.Server
P2P.Http.State
if (os(windows))
Build-Depends:
Win32 ((>= 2.6.1.0 && < 2.12.0.0) || >= 2.13.4.0),
@ -878,6 +896,9 @@ Executable git-annex
P2P.Address
P2P.Annex
P2P.Auth
P2P.Http.Types
P2P.Http.Client
P2P.Http.Url
P2P.IO
P2P.Protocol
P2P.Proxy

View file

@ -10,6 +10,7 @@ flags:
debuglocks: false
benchmark: true
crypton: true
servant: true
packages:
- '.'
resolver: lts-22.9