Merge remote-tracking branch 'origin/httpproto'

This commit is contained in:
Joey Hess 2024-07-29 11:25:27 -04:00
commit 74f81ebd04
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
46 changed files with 4090 additions and 1024 deletions

View file

@ -115,7 +115,8 @@ newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a
-- Values that can be read, but not modified by an Annex action. -- Values that can be read, but not modified by an Annex action.
data AnnexRead = AnnexRead data AnnexRead = AnnexRead
{ activekeys :: TVar (M.Map Key ThreadId) { branchstate :: MVar BranchState
, activekeys :: TVar (M.Map Key ThreadId)
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Keys.DbHandle , keysdbhandle :: Keys.DbHandle
, sshstalecleaned :: TMVar Bool , sshstalecleaned :: TMVar Bool
@ -137,6 +138,7 @@ data AnnexRead = AnnexRead
newAnnexRead :: GitConfig -> IO AnnexRead newAnnexRead :: GitConfig -> IO AnnexRead
newAnnexRead c = do newAnnexRead c = do
bs <- newMVar startBranchState
emptyactivekeys <- newTVarIO M.empty emptyactivekeys <- newTVarIO M.empty
emptyactiveremotes <- newMVar M.empty emptyactiveremotes <- newMVar M.empty
kh <- Keys.newDbHandle kh <- Keys.newDbHandle
@ -146,7 +148,8 @@ newAnnexRead c = do
cm <- newTMVarIO M.empty cm <- newTMVarIO M.empty
cc <- newTMVarIO (CredentialCache M.empty) cc <- newTMVarIO (CredentialCache M.empty)
return $ AnnexRead return $ AnnexRead
{ activekeys = emptyactivekeys { branchstate = bs
, activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes , activeremotes = emptyactiveremotes
, keysdbhandle = kh , keysdbhandle = kh
, sshstalecleaned = sc , sshstalecleaned = sc
@ -180,7 +183,6 @@ data AnnexState = AnnexState
, output :: MessageState , output :: MessageState
, concurrency :: ConcurrencySetting , concurrency :: ConcurrencySetting
, daemon :: Bool , daemon :: Bool
, branchstate :: BranchState
, repoqueue :: Maybe (Git.Queue.Queue Annex) , repoqueue :: Maybe (Git.Queue.Queue Annex)
, catfilehandles :: CatFileHandles , catfilehandles :: CatFileHandles
, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle) , hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
@ -235,7 +237,6 @@ newAnnexState c r = do
, output = o , output = o
, concurrency = ConcurrencyCmdLine NonConcurrent , concurrency = ConcurrencyCmdLine NonConcurrent
, daemon = False , daemon = False
, branchstate = startBranchState
, repoqueue = Nothing , repoqueue = Nothing
, catfilehandles = catFileHandlesNonConcurrent , catfilehandles = catFileHandlesNonConcurrent
, hashobjecthandle = Nothing , hashobjecthandle = Nothing

View file

@ -262,7 +262,7 @@ updateTo' pairs = do
else commitIndex jl branchref merge_desc commitrefs else commitIndex jl branchref merge_desc commitrefs
) )
addMergedRefs tomerge addMergedRefs tomerge
invalidateCache invalidateCacheAll
stagejournalwhen dirty jl a stagejournalwhen dirty jl a
| dirty = stageJournal jl a | dirty = stageJournal jl a
@ -487,7 +487,7 @@ set jl ru f c = do
-- evaluating a Journalable Builder twice, which is not very -- evaluating a Journalable Builder twice, which is not very
-- efficient. Instead, assume that it's not common to need to read -- efficient. Instead, assume that it's not common to need to read
-- a log file immediately after writing it. -- a log file immediately after writing it.
invalidateCache invalidateCache f
{- Appends content to the journal file. -} {- Appends content to the journal file. -}
append :: Journalable content => JournalLocked -> RawFilePath -> AppendableJournalFile -> content -> Annex () append :: Journalable content => JournalLocked -> RawFilePath -> AppendableJournalFile -> content -> Annex ()
@ -495,7 +495,7 @@ append jl f appendable toappend = do
journalChanged journalChanged
appendJournalFile jl appendable toappend appendJournalFile jl appendable toappend
fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f) fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f)
invalidateCache invalidateCache f
{- Commit message used when making a commit of whatever data has changed {- Commit message used when making a commit of whatever data has changed
- to the git-annex branch. -} - to the git-annex branch. -}

View file

@ -2,7 +2,7 @@
- -
- Runtime state about the git-annex branch, and a small cache. - Runtime state about the git-annex branch, and a small cache.
- -
- Copyright 2011-2022 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -16,14 +16,18 @@ import qualified Annex
import Logs import Logs
import qualified Git import qualified Git
import Control.Concurrent
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
getState :: Annex BranchState getState :: Annex BranchState
getState = Annex.getState Annex.branchstate getState = do
v <- Annex.getRead Annex.branchstate
liftIO $ readMVar v
changeState :: (BranchState -> BranchState) -> Annex () changeState :: (BranchState -> BranchState) -> Annex ()
changeState changer = Annex.changeState $ \s -> changeState changer = do
s { Annex.branchstate = changer (Annex.branchstate s) } v <- Annex.getRead Annex.branchstate
liftIO $ modifyMVar_ v $ return . changer
{- Runs an action to check that the index file exists, if it's not been {- Runs an action to check that the index file exists, if it's not been
- checked before in this run of git-annex. -} - checked before in this run of git-annex. -}
@ -130,5 +134,11 @@ getCache file state = go (cachedFileContents state)
| f == file && not (needInteractiveAccess state) = Just c | f == file && not (needInteractiveAccess state) = Just c
| otherwise = go rest | otherwise = go rest
invalidateCache :: Annex () invalidateCache :: RawFilePath -> Annex ()
invalidateCache = changeState $ \s -> s { cachedFileContents = [] } invalidateCache f = changeState $ \s -> s
{ cachedFileContents = filter (\(f', _) -> f' /= f)
(cachedFileContents s)
}
invalidateCacheAll :: Annex ()
invalidateCacheAll = changeState $ \s -> s { cachedFileContents = [] }

View file

@ -18,6 +18,7 @@ import P2P.Protocol
import P2P.IO import P2P.IO
import Annex.Proxy import Annex.Proxy
import Annex.UUID import Annex.UUID
import Annex.BranchState
import Logs.Location import Logs.Location
import Logs.PreferredContent import Logs.PreferredContent
import Types.Command import Types.Command
@ -38,22 +39,17 @@ proxyCluster
-> (forall a. Annex () -> ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform)) -> (forall a. Annex () -> ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform))
-> CommandPerform -> CommandPerform
proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
enableInteractiveBranchAccess
getClientProtocolVersion (fromClusterUUID clusteruuid) clientside getClientProtocolVersion (fromClusterUUID clusteruuid) clientside
withclientversion (protoerrhandler noop) withclientversion (protoerrhandler noop)
where where
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
withclientversion (Just (clientmaxversion, othermsg)) = do withclientversion (Just (clientmaxversion, othermsg)) = do
-- The protocol versions supported by the nodes are not -- The protocol versions supported by the nodes are not
-- known at this point, and would be too expensive to -- known at this point, and would be too expensive to
-- determine. Instead, pick the newest protocol version -- determine. Instead, pick the newest protocol version
-- that we and the client both speak. The proxy code -- that we and the client both speak. The proxy code
-- checks protocol versions when operating on multiple -- checks protocol versions of remotes, so nodes can
-- nodes, and allows nodes to have different protocol -- have different protocol versions.
-- versions.
let protocolversion = min maxProtocolVersion clientmaxversion let protocolversion = min maxProtocolVersion clientmaxversion
sendClientProtocolVersion clientside othermsg protocolversion sendClientProtocolVersion clientside othermsg protocolversion
(getclientbypass protocolversion) (protoerrhandler noop) (getclientbypass protocolversion) (protoerrhandler noop)
@ -64,16 +60,29 @@ proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
(withclientbypass protocolversion) (protoerrhandler noop) (withclientbypass protocolversion) (protoerrhandler noop)
withclientbypass protocolversion (bypassuuids, othermsg) = do withclientbypass protocolversion (bypassuuids, othermsg) = do
(selectnode, closenodes) <- clusterProxySelector clusteruuid (selectnode, closenodes) <-
protocolversion bypassuuids clusterProxySelector clusteruuid
concurrencyconfig <- getConcurrencyConfig protocolversion bypassuuids
proxystate <- liftIO mkProxyState proxystate <- liftIO mkProxyState
proxy proxydone proxymethods proxystate servermode clientside concurrencyconfig <- concurrencyConfigJobs
(fromClusterUUID clusteruuid) let proxyparams = ProxyParams
selectnode concurrencyconfig protocolversion { proxyMethods = mkProxyMethods
othermsg (protoerrhandler closenodes) , proxyState = proxystate
, proxyServerMode = servermode
, proxyClientSide = clientside
, proxyUUID = fromClusterUUID clusteruuid
, proxySelector = selectnode
, proxyConcurrencyConfig = concurrencyconfig
, proxyClientProtocolVersion = protocolversion
}
proxy proxydone proxyparams othermsg
(protoerrhandler closenodes)
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Bypass -> Annex (ProxySelector, Annex ()) clusterProxySelector
:: ClusterUUID
-> ProtocolVersion
-> Bypass
-> Annex (ProxySelector, Annex ())
clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs) nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters <$> getClusters
@ -113,7 +122,6 @@ clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
-- instead it can be locked on individual nodes that are -- instead it can be locked on individual nodes that are
-- proxied to the client. -- proxied to the client.
, proxyLOCKCONTENT = const (pure Nothing) , proxyLOCKCONTENT = const (pure Nothing)
, proxyUNLOCKCONTENT = pure Nothing
} }
return (proxyselector, closenodes) return (proxyselector, closenodes)
where where

View file

@ -181,11 +181,13 @@ data GetPrivate = GetPrivate Bool
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFileStale (GetPrivate getprivate) file = do getJournalFileStale (GetPrivate getprivate) file = do
st <- Annex.getState id st <- Annex.getState id
let repo = Annex.repo st
bs <- getState
liftIO $ liftIO $
if getprivate && privateUUIDsKnown' st if getprivate && privateUUIDsKnown' st
then do then do
x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) x <- getfrom (gitAnnexJournalDir bs repo)
getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case getfrom (gitAnnexPrivateJournalDir bs repo) >>= \case
Nothing -> return $ case x of Nothing -> return $ case x of
Nothing -> NoJournalledContent Nothing -> NoJournalledContent
Just b -> JournalledContent b Just b -> JournalledContent b
@ -195,7 +197,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
-- happens in a merge of two -- happens in a merge of two
-- git-annex branches. -- git-annex branches.
Just x' -> x' <> y Just x' -> x' <> y
else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case else getfrom (gitAnnexJournalDir bs repo) >>= return . \case
Nothing -> NoJournalledContent Nothing -> NoJournalledContent
Just b -> JournalledContent b Just b -> JournalledContent b
where where
@ -223,8 +225,9 @@ discardIncompleteAppend v
- journal is staged as it is run. -} - journal is staged as it is run. -}
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath] getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale getjournaldir = do getJournalledFilesStale getjournaldir = do
st <- Annex.getState id bs <- getState
let d = getjournaldir (Annex.branchstate st) (Annex.repo st) repo <- Annex.gitRepo
let d = getjournaldir bs repo
fs <- liftIO $ catchDefaultIO [] $ fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents (fromRawFilePath d) getDirectoryContents (fromRawFilePath d)
return $ filter (`notElem` [".", ".."]) $ return $ filter (`notElem` [".", ".."]) $
@ -233,8 +236,9 @@ getJournalledFilesStale getjournaldir = do
{- Directory handle open on a journal directory. -} {- Directory handle open on a journal directory. -}
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle getjournaldir a = do withJournalHandle getjournaldir a = do
st <- Annex.getState id bs <- getState
let d = getjournaldir (Annex.branchstate st) (Annex.repo st) repo <- Annex.gitRepo
let d = getjournaldir bs repo
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a) bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
where where
-- avoid overhead of creating the journal directory when it already -- avoid overhead of creating the journal directory when it already

View file

@ -8,23 +8,31 @@
module Annex.Proxy where module Annex.Proxy where
import Annex.Common import Annex.Common
import P2P.Proxy import qualified Annex
import P2P.Protocol
import P2P.IO
import qualified Remote import qualified Remote
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import qualified Remote.Git import qualified Remote.Git
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection) import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Content import Annex.Content
import Annex.Concurrent import Annex.Concurrent
import Annex.Tmp import Annex.Tmp
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Logs.Location
import Utility.Tmp.Dir import Utility.Tmp.Dir
import Utility.Metered import Utility.Metered
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Concurrent.Async import Control.Concurrent.Async
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import qualified Data.Map as M
import qualified Data.Set as S
proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxyRemoteSide clientmaxversion bypass r proxyRemoteSide clientmaxversion bypass r
@ -53,18 +61,19 @@ proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
ohdl <- liftIO newEmptyTMVarIO ohdl <- liftIO newEmptyTMVarIO
iwaitv <- liftIO newEmptyTMVarIO iwaitv <- liftIO newEmptyTMVarIO
owaitv <- liftIO newEmptyTMVarIO owaitv <- liftIO newEmptyTMVarIO
endv <- liftIO newEmptyTMVarIO iclosedv <- liftIO newEmptyTMVarIO
oclosedv <- liftIO newEmptyTMVarIO
worker <- liftIO . async =<< forkState worker <- liftIO . async =<< forkState
(proxySpecialRemote protoversion r ihdl ohdl owaitv endv) (proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv)
let remoteconn = P2PConnection let remoteconn = P2PConnection
{ connRepo = Nothing { connRepo = Nothing
, connCheckAuth = const False , connCheckAuth = const False
, connIhdl = P2PHandleTMVar ihdl iwaitv , connIhdl = P2PHandleTMVar ihdl (Just iwaitv) iclosedv
, connOhdl = P2PHandleTMVar ohdl owaitv , connOhdl = P2PHandleTMVar ohdl (Just owaitv) oclosedv
, connIdent = ConnIdent (Just (Remote.name r)) , connIdent = ConnIdent (Just (Remote.name r))
} }
let closeremoteconn = do let closeremoteconn = do
liftIO $ atomically $ putTMVar endv () liftIO $ atomically $ putTMVar oclosedv ()
join $ liftIO (wait worker) join $ liftIO (wait worker)
return $ Just return $ Just
( remoterunst ( remoterunst
@ -81,7 +90,7 @@ proxySpecialRemote
-> TMVar () -> TMVar ()
-> TMVar () -> TMVar ()
-> Annex () -> Annex ()
proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
where where
go :: Annex () go :: Annex ()
go = liftIO receivemessage >>= \case go = liftIO receivemessage >>= \case
@ -114,23 +123,28 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
liftIO $ sendmessage $ liftIO $ sendmessage $
ERROR "NOTIFYCHANGE unsupported for a special remote" ERROR "NOTIFYCHANGE unsupported for a special remote"
go go
Just _ -> giveup "protocol error M" Just _ -> giveup "protocol error"
Nothing -> return () Nothing -> return ()
getnextmessageorend = receivemessage = liftIO (atomically recv) >>= \case
liftIO $ atomically $
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar endv)
receivemessage = getnextmessageorend >>= \case
Right (Right m) -> return (Just m) Right (Right m) -> return (Just m)
Right (Left _b) -> giveup "unexpected ByteString received from P2P MVar" Right (Left _b) -> giveup "unexpected ByteString received from P2P MVar"
Left () -> return Nothing Left () -> return Nothing
where
recv =
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar oclosedv)
receivebytestring = atomically (takeTMVar ohdl) >>= \case receivebytestring = atomically recv >>= \case
Left b -> return b Right (Left b) -> return b
Right _m -> giveup "did not receive ByteString from P2P MVar" Right (Right _m) -> giveup "did not receive ByteString from P2P MVar"
Left () -> giveup "connection closed"
where
recv =
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar oclosedv)
sendmessage m = atomically $ putTMVar ihdl (Right m) sendmessage m = atomically $ putTMVar ihdl (Right m)
@ -155,21 +169,40 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
Right () -> liftIO $ sendmessage SUCCESS Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err Left err -> liftIO $ propagateerror err
liftIO receivemessage >>= \case liftIO receivemessage >>= \case
Just (DATA (Len _)) -> do Just (DATA (Len len)) -> do
b <- liftIO receivebytestring h <- liftIO $ openFile (fromRawFilePath tmpfile) WriteMode
liftIO $ L.writeFile (fromRawFilePath tmpfile) b liftIO $ receivetofile h len
-- Signal that the whole bytestring liftIO $ hClose h
-- has been received.
liftIO $ atomically $ putTMVar owaitv ()
if protoversion > ProtocolVersion 1 if protoversion > ProtocolVersion 1
then liftIO receivemessage >>= \case then liftIO receivemessage >>= \case
Just (VALIDITY Valid) -> Just (VALIDITY Valid) ->
store store
Just (VALIDITY Invalid) -> Just (VALIDITY Invalid) ->
return () liftIO $ sendmessage FAILURE
_ -> giveup "protocol error N" _ -> giveup "protocol error"
else store else store
_ -> giveup "protocol error O" _ -> giveup "protocol error"
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
receivetofile h n = do
b <- liftIO receivebytestring
liftIO $ atomically $
putTMVar owaitv ()
`orElse`
readTMVar oclosedv
n' <- storetofile h n (L.toChunks b)
-- Normally all the data is sent in a single
-- lazy bytestring. However, when the special
-- remote is a node in a cluster, a PUT is
-- streamed to it in multiple chunks.
if n' == 0
then return ()
else receivetofile h n'
storetofile _ n [] = pure n
storetofile h n (b:bs) = do
B.hPut h b
storetofile h (n - fromIntegral (B.length b)) bs
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
-- Don't verify the content from the remote, -- Don't verify the content from the remote,
@ -206,6 +239,70 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv endv = go
receivemessage >>= \case receivemessage >>= \case
Just SUCCESS -> return () Just SUCCESS -> return ()
Just FAILURE -> return () Just FAILURE -> return ()
Just _ -> giveup "protocol error P" Just _ -> giveup "protocol error"
Nothing -> return () Nothing -> return ()
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid ouruuid = do
ourproxies <- M.lookup ouruuid <$> getProxies
checkCanProxy' ourproxies remoteuuid >>= \case
Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True
Left Nothing -> return False
Left (Just err) -> giveup err
checkCanProxy' :: Maybe (S.Set Proxy) -> UUID -> Annex (Either (Maybe String) (Either ClusterUUID Remote))
checkCanProxy' Nothing _ = return (Left Nothing)
checkCanProxy' (Just proxies) remoteuuid =
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = Remote.uuid r == remoteuuid
let samename r p = Remote.name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> return (Right (Right r))
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch causing unexpected proxying for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then return (Right (Left cu))
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> return $ Left $ Just $
"not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing
mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}

View file

@ -1,6 +1,6 @@
{- verification {- verification
- -
- Copyright 2010-2022 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -21,6 +21,8 @@ module Annex.Verify (
finishVerifyKeyContentIncrementally, finishVerifyKeyContentIncrementally,
verifyKeyContentIncrementally, verifyKeyContentIncrementally,
IncrementalVerifier(..), IncrementalVerifier(..),
writeVerifyChunk,
resumeVerifyFromOffset,
tailVerify, tailVerify,
) where ) where
@ -32,6 +34,7 @@ import qualified Types.Backend
import qualified Backend import qualified Backend
import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..)) import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..))
import Utility.Hash (IncrementalVerifier(..)) import Utility.Hash (IncrementalVerifier(..))
import Utility.Metered
import Annex.WorkerPool import Annex.WorkerPool
import Types.WorkerPool import Types.WorkerPool
import Types.Key import Types.Key
@ -213,6 +216,44 @@ verifyKeyContentIncrementally verifyconfig k a = do
a miv a miv
snd <$> finishVerifyKeyContentIncrementally miv snd <$> finishVerifyKeyContentIncrementally miv
writeVerifyChunk :: Maybe IncrementalVerifier -> Handle -> S.ByteString -> IO ()
writeVerifyChunk (Just iv) h c = do
S.hPut h c
updateIncrementalVerifier iv c
writeVerifyChunk Nothing h c = S.hPut h c
{- Given a file handle that is open for reading (and likely also for writing),
- and an offset, feeds the current content of the file up to the offset to
- the IncrementalVerifier. Leaves the file seeked to the offset.
- Returns the meter with the offset applied. -}
resumeVerifyFromOffset
:: Integer
-> Maybe IncrementalVerifier
-> MeterUpdate
-> Handle
-> IO MeterUpdate
resumeVerifyFromOffset o incrementalverifier meterupdate h
| o /= 0 = do
maybe noop (`go` o) incrementalverifier
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return offsetmeterupdate
| otherwise = return meterupdate
where
offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o)
go iv n
| n == 0 = return ()
| otherwise = do
let c = if n > fromIntegral defaultChunkSize
then defaultChunkSize
else fromIntegral n
b <- S.hGet h c
updateIncrementalVerifier iv b
unless (b == S.empty) $
go iv (n - fromIntegral (S.length b))
-- | Runs a writer action that retrieves to a file. In another thread, -- | Runs a writer action that retrieves to a file. In another thread,
-- reads the file as it grows, and feeds it to the incremental verifier. -- reads the file as it grows, and feeds it to the incremental verifier.
-- --

View file

@ -97,13 +97,11 @@ changeStageTo mytid tv getnewstage = liftIO $
-- | Waits until there's an idle StartStage worker in the worker pool, -- | Waits until there's an idle StartStage worker in the worker pool,
-- removes it from the pool, and returns its state. -- removes it from the pool, and returns its state.
-- waitStartWorkerSlot :: TMVar (WorkerPool t) -> STM (t, WorkerStage)
-- If the worker pool is not already allocated, returns Nothing.
waitStartWorkerSlot :: TMVar (WorkerPool t) -> STM (Maybe (t, WorkerStage))
waitStartWorkerSlot tv = do waitStartWorkerSlot tv = do
pool <- takeTMVar tv pool <- takeTMVar tv
v <- go pool v <- go pool
return $ Just (v, StartStage) return (v, StartStage)
where where
go pool = case spareVals pool of go pool = case spareVals pool of
[] -> retry [] -> retry

View file

@ -52,6 +52,11 @@ buildFlags = filter (not . null)
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
, "MagicMime" , "MagicMime"
#endif #endif
#ifdef WITH_SERVANT
, "Servant"
#else
#warning Building without servant, will not support annex+http urls or git-annex p2phttp.
#endif
#ifdef WITH_BENCHMARK #ifdef WITH_BENCHMARK
, "Benchmark" , "Benchmark"
#endif #endif

View file

@ -1,5 +1,13 @@
git-annex (10.20240702) UNRELEASED; urgency=medium git-annex (10.20240702) UNRELEASED; urgency=medium
* New HTTP API that is equivilant to the P2P protocol.
* annex+http and annex+https urls can be configured for
remote.name.annexUrl to use the HTTP API to communicate with a server.
This supports writable repositories, as well as accessing clusters
and proxied remotes over HTTP.
* New p2phttp command to serve the HTTP API.
* Added a build flag for servant, enabling annex+http urls and
git-annex p2phttp.
* assistant: Fix a race condition that could cause a pointer file to * assistant: Fix a race condition that could cause a pointer file to
get ingested into the annex. get ingested into the annex.
* Avoid potential data loss in situations where git-annex-shell or * Avoid potential data loss in situations where git-annex-shell or

View file

@ -87,9 +87,8 @@ commandAction start = do
runconcurrent sizelimit Nothing = runnonconcurrent sizelimit runconcurrent sizelimit Nothing = runnonconcurrent sizelimit
runconcurrent sizelimit (Just tv) = runconcurrent sizelimit (Just tv) =
liftIO (atomically (waitStartWorkerSlot tv)) >>= maybe liftIO (atomically (waitStartWorkerSlot tv))
(runnonconcurrent sizelimit) >>= runconcurrent' sizelimit tv
(runconcurrent' sizelimit tv)
runconcurrent' sizelimit tv (workerstrd, workerstage) = do runconcurrent' sizelimit tv (workerstrd, workerstage) = do
aid <- liftIO $ async $ snd aid <- liftIO $ async $ snd
<$> Annex.run workerstrd <$> Annex.run workerstrd

View file

@ -1,6 +1,6 @@
{- git-annex main program {- git-annex main program
- -
- Copyright 2010-2022 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -118,6 +118,9 @@ import qualified Command.Upgrade
import qualified Command.Forget import qualified Command.Forget
import qualified Command.OldKeys import qualified Command.OldKeys
import qualified Command.P2P import qualified Command.P2P
#ifdef WITH_SERVANT
import qualified Command.P2PHttp
#endif
import qualified Command.Proxy import qualified Command.Proxy
import qualified Command.DiffDriver import qualified Command.DiffDriver
import qualified Command.Smudge import qualified Command.Smudge
@ -245,6 +248,9 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.Forget.cmd , Command.Forget.cmd
, Command.OldKeys.cmd , Command.OldKeys.cmd
, Command.P2P.cmd , Command.P2P.cmd
#ifdef WITH_SERVANT
, Command.P2PHttp.cmd
#endif
, Command.Proxy.cmd , Command.Proxy.cmd
, Command.DiffDriver.cmd , Command.DiffDriver.cmd
, Command.Smudge.cmd , Command.Smudge.cmd

View file

@ -8,7 +8,6 @@
module CmdLine.GitAnnexShell where module CmdLine.GitAnnexShell where
import Annex.Common import Annex.Common
import qualified Annex
import qualified Git.Construct import qualified Git.Construct
import qualified Git.Config import qualified Git.Config
import CmdLine import CmdLine
@ -20,11 +19,7 @@ import CmdLine.GitAnnexShell.Fields
import Remote.GCrypt (getGCryptUUID) import Remote.GCrypt (getGCryptUUID)
import P2P.Protocol (ServerMode(..)) import P2P.Protocol (ServerMode(..))
import Git.Types import Git.Types
import qualified Types.Remote as R import Annex.Proxy
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Remote
import qualified Command.ConfigList import qualified Command.ConfigList
import qualified Command.NotifyChanges import qualified Command.NotifyChanges
@ -36,7 +31,6 @@ import qualified Command.SendKey
import qualified Command.DropKey import qualified Command.DropKey
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
cmdsMap :: M.Map ServerMode [Command] cmdsMap :: M.Map ServerMode [Command]
cmdsMap = M.fromList $ map mk cmdsMap = M.fromList $ map mk
@ -90,7 +84,7 @@ commonShellOptions =
check u check u
| u == toUUID expected = noop | u == toUUID expected = noop
| otherwise = | otherwise =
unlessM (checkProxy (toUUID expected) u) $ unlessM (checkCanProxy (toUUID expected) u) $
unexpectedUUID expected u unexpectedUUID expected u
checkGCryptUUID expected = check =<< getGCryptUUID True =<< gitRepo checkGCryptUUID expected = check =<< getGCryptUUID True =<< gitRepo
@ -184,61 +178,3 @@ checkField (field, val)
| field == fieldName remoteUUID = fieldCheck remoteUUID val | field == fieldName remoteUUID = fieldCheck remoteUUID val
| field == fieldName autoInit = fieldCheck autoInit val | field == fieldName autoInit = fieldCheck autoInit val
| otherwise = False | otherwise = False
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkProxy :: UUID -> UUID -> Annex Bool
checkProxy remoteuuid ouruuid = M.lookup ouruuid <$> getProxies >>= \case
Nothing -> return False
-- This repository has (or had) proxying enabled. So it's
-- ok to display error messages that talk about proxies.
Just proxies ->
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . byCost <$> remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = uuid r == remoteuuid
let samename r p = name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Right r) }
return True
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch making git-annex-shell unexpectedly
-- proxy for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (R.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (R.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . R.gitconfig) $
filter (\p -> R.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Left cu) }
return True
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> giveup $ "not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return False

View file

@ -50,7 +50,7 @@ start cu clustername gatewayremote = starting "extendcluster" ai si $ do
let setcus f = setConfig f (fromUUID (fromClusterUUID cu)) let setcus f = setConfig f (fromUUID (fromClusterUUID cu))
unless (M.member clustername myclusters) $ do unless (M.member clustername myclusters) $ do
setcus $ annexConfig ("cluster." <> encodeBS clustername) setcus $ annexConfig ("cluster." <> encodeBS clustername)
setcus $ remoteAnnexConfig gatewayremote $ setcus $ mkRemoteConfigKey gatewayremote $
remoteGitConfigKey ClusterGatewayField remoteGitConfigKey ClusterGatewayField
next $ return True next $ return True
where where

173
Command/P2PHttp.hs Normal file
View file

@ -0,0 +1,173 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
module Command.P2PHttp where
import Command
import P2P.Http.Server
import P2P.Http.Url
import qualified P2P.Protocol as P2P
import Utility.Env
import Servant
import qualified Network.Wai.Handler.Warp as Warp
import qualified Network.Wai.Handler.WarpTLS as Warp
import Network.Socket (PortNumber)
import qualified Data.Map as M
import Data.String
cmd :: Command
cmd = withAnnexOptions [jobsOption] $ command "p2phttp" SectionPlumbing
"communicate in P2P protocol over http"
paramNothing (seek <$$> optParser)
data Options = Options
{ portOption :: Maybe PortNumber
, bindOption :: Maybe String
, certFileOption :: Maybe FilePath
, privateKeyFileOption :: Maybe FilePath
, chainFileOption :: [FilePath]
, authEnvOption :: Bool
, authEnvHttpOption :: Bool
, unauthReadOnlyOption :: Bool
, unauthAppendOnlyOption :: Bool
, wideOpenOption :: Bool
, proxyConnectionsOption :: Maybe Integer
, clusterJobsOption :: Maybe Int
}
optParser :: CmdParamsDesc -> Parser Options
optParser _ = Options
<$> optional (option auto
( long "port" <> metavar paramNumber
<> help "specify port to listen on"
))
<*> optional (strOption
( long "bind" <> metavar paramAddress
<> help "specify address to bind to"
))
<*> optional (strOption
( long "certfile" <> metavar paramFile
<> help "TLS certificate file for HTTPS"
))
<*> optional (strOption
( long "privatekeyfile" <> metavar paramFile
<> help "TLS private key file for HTTPS"
))
<*> many (strOption
( long "chainfile" <> metavar paramFile
<> help "TLS chain file"
))
<*> switch
( long "authenv"
<> help "authenticate users from environment (https only)"
)
<*> switch
( long "authenv-http"
<> help "authenticate users from environment (including http)"
)
<*> switch
( long "unauth-readonly"
<> help "allow unauthenticated users to read the repository"
)
<*> switch
( long "unauth-appendonly"
<> help "allow unauthenticated users to read and append to the repository"
)
<*> switch
( long "wideopen"
<> help "give unauthenticated users full read+write access"
)
<*> optional (option auto
( long "proxyconnections" <> metavar paramNumber
<> help "maximum number of idle connections when proxying"
))
<*> optional (option auto
( long "clusterjobs" <> metavar paramNumber
<> help "number of concurrent node accesses per connection"
))
seek :: Options -> CommandSeek
seek o = getAnnexWorkerPool $ \workerpool ->
withP2PConnections workerpool
(fromMaybe 1 $ proxyConnectionsOption o)
(fromMaybe 1 $ clusterJobsOption o)
(go workerpool)
where
go workerpool acquireconn = liftIO $ do
authenv <- getAuthEnv
st <- mkP2PHttpServerState acquireconn workerpool $
mkGetServerMode authenv o
let settings = Warp.setPort port $ Warp.setHost host $
Warp.defaultSettings
case (certFileOption o, privateKeyFileOption o) of
(Nothing, Nothing) -> Warp.runSettings settings (p2pHttpApp st)
(Just certfile, Just privatekeyfile) -> do
let tlssettings = Warp.tlsSettingsChain
certfile (chainFileOption o) privatekeyfile
Warp.runTLS tlssettings settings (p2pHttpApp st)
_ -> giveup "You must use both --certfile and --privatekeyfile options to enable HTTPS."
port = maybe
(fromIntegral defaultP2PHttpProtocolPort)
fromIntegral
(portOption o)
host = maybe
(fromString "*") -- both ipv4 and ipv6
fromString
(bindOption o)
mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode
mkGetServerMode _ o _ Nothing
| wideOpenOption o = Just P2P.ServeReadWrite
| unauthAppendOnlyOption o = Just P2P.ServeAppendOnly
| unauthReadOnlyOption o = Just P2P.ServeReadOnly
| otherwise = Nothing
mkGetServerMode authenv o issecure (Just auth) =
case (issecure, authEnvOption o, authEnvHttpOption o) of
(Secure, True, _) -> checkauth
(NotSecure, _, True) -> checkauth
_ -> noauth
where
checkauth = case M.lookup auth authenv of
Just servermode -> Just servermode
Nothing -> noauth
noauth = mkGetServerMode authenv o issecure Nothing
getAuthEnv :: IO (M.Map Auth P2P.ServerMode)
getAuthEnv = do
environ <- getEnvironment
let permmap = M.fromList (mapMaybe parseperms environ)
return $ M.fromList $
map (addperms permmap) $
mapMaybe parseusername environ
where
parseperms (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PERMISSIONS_" k of
Nothing -> Nothing
Just username -> case v of
"readonly" -> Just
(encodeBS username, P2P.ServeReadOnly)
"appendonly" -> Just
(encodeBS username, P2P.ServeAppendOnly)
_ -> Nothing
parseusername (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PASSWORD_" k of
Nothing -> Nothing
Just username -> Just $ Auth (encodeBS username) (encodeBS v)
deprefix prefix s
| prefix `isPrefixOf` s = Just (drop (length prefix) s)
| otherwise = Nothing
addperms permmap auth@(Auth user _) =
case M.lookup user permmap of
Nothing -> (auth, P2P.ServeReadWrite)
Just perms -> (auth, perms)

View file

@ -16,7 +16,6 @@ import qualified Annex
import Annex.Proxy import Annex.Proxy
import Annex.UUID import Annex.UUID
import qualified CmdLine.GitAnnexShell.Checks as Checks import qualified CmdLine.GitAnnexShell.Checks as Checks
import Logs.Location
import Logs.Cluster import Logs.Cluster
import Annex.Cluster import Annex.Cluster
import qualified Remote import qualified Remote
@ -61,7 +60,7 @@ performLocal theiruuid servermode = do
performProxy :: UUID -> P2P.ServerMode -> Remote -> CommandPerform performProxy :: UUID -> P2P.ServerMode -> Remote -> CommandPerform
performProxy clientuuid servermode r = do performProxy clientuuid servermode r = do
clientside <- proxyClientSide clientuuid clientside <- mkProxyClientSide clientuuid
getClientProtocolVersion (Remote.uuid r) clientside getClientProtocolVersion (Remote.uuid r) clientside
(withclientversion clientside) (withclientversion clientside)
(p2pErrHandler noop) (p2pErrHandler noop)
@ -77,29 +76,29 @@ performProxy clientuuid servermode r = do
p2pDone p2pDone
let errhandler = p2pErrHandler (closeRemoteSide remoteside) let errhandler = p2pErrHandler (closeRemoteSide remoteside)
proxystate <- liftIO mkProxyState proxystate <- liftIO mkProxyState
let runproxy othermsg' = proxy closer let proxyparams = ProxyParams
proxymethods proxystate { proxyMethods = mkProxyMethods
servermode clientside , proxyState = proxystate
(Remote.uuid r) , proxyServerMode = servermode
(singleProxySelector remoteside) , proxyClientSide = clientside
concurrencyconfig , proxyUUID = Remote.uuid r
protocolversion othermsg' errhandler , proxySelector = singleProxySelector remoteside
, proxyConcurrencyConfig = concurrencyconfig
, proxyClientProtocolVersion = protocolversion
}
let runproxy othermsg' = proxy closer proxyparams
othermsg' errhandler
sendClientProtocolVersion clientside othermsg protocolversion sendClientProtocolVersion clientside othermsg protocolversion
runproxy errhandler runproxy errhandler
withclientversion _ Nothing = p2pDone withclientversion _ Nothing = p2pDone
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
performProxyCluster :: UUID -> ClusterUUID -> P2P.ServerMode -> CommandPerform performProxyCluster :: UUID -> ClusterUUID -> P2P.ServerMode -> CommandPerform
performProxyCluster clientuuid clusteruuid servermode = do performProxyCluster clientuuid clusteruuid servermode = do
clientside <- proxyClientSide clientuuid clientside <- mkProxyClientSide clientuuid
proxyCluster clusteruuid p2pDone servermode clientside p2pErrHandler proxyCluster clusteruuid p2pDone servermode clientside p2pErrHandler
proxyClientSide :: UUID -> Annex ClientSide mkProxyClientSide :: UUID -> Annex ClientSide
proxyClientSide clientuuid = do mkProxyClientSide clientuuid = do
clientrunst <- liftIO (mkRunState $ Serving clientuuid Nothing) clientrunst <- liftIO (mkRunState $ Serving clientuuid Nothing)
ClientSide clientrunst <$> liftIO (stdioP2PConnectionDupped Nothing) ClientSide clientrunst <$> liftIO (stdioP2PConnectionDupped Nothing)

View file

@ -42,7 +42,7 @@ getBasicAuthFromCredential r ccv u = do
Just c -> go (const noop) c Just c -> go (const noop) c
Nothing -> do Nothing -> do
let storeincache = \c -> atomically $ do let storeincache = \c -> atomically $ do
(CredentialCache cc') <- takeTMVar ccv CredentialCache cc' <- takeTMVar ccv
putTMVar ccv (CredentialCache (M.insert bu c cc')) putTMVar ccv (CredentialCache (M.insert bu c cc'))
go storeincache =<< getUrlCredential u r go storeincache =<< getUrlCredential u r
Nothing -> go (const noop) =<< getUrlCredential u r Nothing -> go (const noop) =<< getUrlCredential u r
@ -113,7 +113,9 @@ data CredentialCache = CredentialCache (M.Map CredentialBaseURL Credential)
-- when credential.useHttpPath is false, one Credential is cached -- when credential.useHttpPath is false, one Credential is cached
-- for each git repo accessed, and there are a reasonably small number of -- for each git repo accessed, and there are a reasonably small number of
-- those, so the cache will not grow too large. -- those, so the cache will not grow too large.
data CredentialBaseURL = CredentialBaseURL URI data CredentialBaseURL
= CredentialBaseURI URI
| CredentialBaseURL String
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
mkCredentialBaseURL :: Repo -> URLString -> Maybe CredentialBaseURL mkCredentialBaseURL :: Repo -> URLString -> Maybe CredentialBaseURL
@ -123,4 +125,4 @@ mkCredentialBaseURL r s = do
Config.get (ConfigKey "credential.useHttpPath") (ConfigValue "") r Config.get (ConfigKey "credential.useHttpPath") (ConfigValue "") r
if usehttppath if usehttppath
then Nothing then Nothing
else Just $ CredentialBaseURL $ u { uriPath = "" } else Just $ CredentialBaseURI $ u { uriPath = "" }

View file

@ -29,7 +29,6 @@ import Annex.Verify
import Control.Monad.Free import Control.Monad.Free
import Control.Concurrent.STM import Control.Concurrent.STM
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
import qualified Data.ByteString as S
-- Full interpreter for Proto, that can receive and send objects. -- Full interpreter for Proto, that can receive and send objects.
runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a) runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a)
@ -101,6 +100,26 @@ runLocal runst runner a = case a of
Left e -> return $ Left $ ProtoFailureException e Left e -> return $ Left $ ProtoFailureException e
Right (Left e) -> return $ Left e Right (Left e) -> return $ Left e
Right (Right ok) -> runner (next ok) Right (Right ok) -> runner (next ok)
SendContentWith consumer getb validitycheck next -> do
v <- tryNonAsync $ do
let fallback = return $ Left $
ProtoFailureMessage "Transfer failed"
let consumer' b ti = do
validator <- consumer b
indicatetransferred ti
return validator
runner getb >>= \case
Left e -> giveup $ describeProtoFailure e
Right b -> checktransfer (\ti -> Right <$> consumer' b ti) fallback >>= \case
Left e -> return (Left e)
Right validator ->
runner validitycheck >>= \case
Right v -> Right <$> validator v
_ -> Right <$> validator Nothing
case v of
Left e -> return $ Left $ ProtoFailureException e
Right (Left e) -> return $ Left e
Right (Right ok) -> runner (next ok)
SetPresent k u next -> do SetPresent k u next -> do
v <- tryNonAsync $ logChange k u InfoPresent v <- tryNonAsync $ logChange k u InfoPresent
case v of case v of
@ -171,43 +190,13 @@ runLocal runst runner a = case a of
-- a client. -- a client.
Client _ -> ta nullMeterUpdate Client _ -> ta nullMeterUpdate
resumefromoffset o incrementalverifier p h
| o /= 0 = do
p' <- case incrementalverifier of
Just iv -> do
go iv o
return p
_ -> return $ offsetMeterUpdate p (toBytesProcessed o)
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return p'
| otherwise = return p
where
go iv n
| n == 0 = return ()
| otherwise = do
let c = if n > fromIntegral defaultChunkSize
then defaultChunkSize
else fromIntegral n
b <- S.hGet h c
updateIncrementalVerifier iv b
unless (b == S.empty) $
go iv (n - fromIntegral (S.length b))
storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do
v <- runner getb v <- runner getb
case v of case v of
Right b -> do Right b -> do
liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do
p' <- resumefromoffset o incrementalverifier p h p' <- resumeVerifyFromOffset o incrementalverifier p h
let writechunk = case incrementalverifier of meteredWrite p' (writeVerifyChunk incrementalverifier h) b
Nothing -> \c -> S.hPut h c
Just iv -> \c -> do
S.hPut h c
updateIncrementalVerifier iv c
meteredWrite p' writechunk b
indicatetransferred ti indicatetransferred ti
rightsize <- do rightsize <- do

184
P2P/Http.hs Normal file
View file

@ -0,0 +1,184 @@
{- P2P protocol over HTTP
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module P2P.Http (
module P2P.Http,
module P2P.Http.Types,
) where
import P2P.Http.Types
import Servant
import qualified Data.ByteString as B
type P2PHttpAPI
= "git-annex" :> SU :> PV3 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV2 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV1 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV0 :> "key" :> GetAPI
:<|> "git-annex" :> SU :> PV3 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV2 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV1 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV0 :> "checkpresent" :> CheckPresentAPI
:<|> "git-annex" :> SU :> PV3 :> "remove" :> RemoveAPI RemoveResultPlus
:<|> "git-annex" :> SU :> PV2 :> "remove" :> RemoveAPI RemoveResultPlus
:<|> "git-annex" :> SU :> PV1 :> "remove" :> RemoveAPI RemoveResult
:<|> "git-annex" :> SU :> PV0 :> "remove" :> RemoveAPI RemoveResult
:<|> "git-annex" :> SU :> PV3 :> "remove-before" :> RemoveBeforeAPI
:<|> "git-annex" :> SU :> PV3 :> "gettimestamp" :> GetTimestampAPI
:<|> "git-annex" :> SU :> PV3 :> "put" :> PutAPI PutResultPlus
:<|> "git-annex" :> SU :> PV2 :> "put" :> PutAPI PutResultPlus
:<|> "git-annex" :> SU :> PV1 :> "put" :> PutAPI PutResult
:<|> "git-annex" :> SU :> PV0 :> "put" :> PutAPI PutResult
:<|> "git-annex" :> SU :> PV3 :> "putoffset"
:> PutOffsetAPI PutOffsetResultPlus
:<|> "git-annex" :> SU :> PV2 :> "putoffset"
:> PutOffsetAPI PutOffsetResultPlus
:<|> "git-annex" :> SU :> PV1 :> "putoffset"
:> PutOffsetAPI PutOffsetResult
:<|> "git-annex" :> SU :> PV3 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV2 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV1 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV0 :> "lockcontent" :> LockContentAPI
:<|> "git-annex" :> SU :> PV3 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> PV2 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> PV1 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> PV0 :> "keeplocked" :> KeepLockedAPI
:<|> "git-annex" :> SU :> "key" :> GetGenericAPI
p2pHttpAPI :: Proxy P2PHttpAPI
p2pHttpAPI = Proxy
type GetGenericAPI
= CaptureKey
:> CU Optional
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> StreamGet NoFraming OctetStream
(Headers '[DataLengthHeader] (SourceIO B.ByteString))
type GetAPI
= CaptureKey
:> CU Required
:> BypassUUIDs
:> AssociatedFileParam
:> OffsetParam
:> IsSecure
:> AuthHeader
:> StreamGet NoFraming OctetStream
(Headers '[DataLengthHeader] (SourceIO B.ByteString))
type CheckPresentAPI
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] CheckPresentResult
type RemoveAPI result
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] result
type RemoveBeforeAPI
= KeyParam
:> CU Required
:> BypassUUIDs
:> QueryParam' '[Required] "timestamp" Timestamp
:> IsSecure
:> AuthHeader
:> Post '[JSON] RemoveResultPlus
type GetTimestampAPI
= CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] GetTimestampResult
type PutAPI result
= DataLengthHeaderRequired
:> KeyParam
:> CU Required
:> BypassUUIDs
:> AssociatedFileParam
:> OffsetParam
:> StreamBody NoFraming OctetStream (SourceIO B.ByteString)
:> IsSecure
:> AuthHeader
:> Post '[JSON] result
type PutOffsetAPI result
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] result
type LockContentAPI
= KeyParam
:> CU Required
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Post '[JSON] LockResult
type KeepLockedAPI
= LockIDParam
:> CU Optional
:> BypassUUIDs
:> IsSecure
:> AuthHeader
:> Header "Connection" ConnectionKeepAlive
:> Header "Keep-Alive" KeepAlive
:> StreamBody NewlineFraming JSON (SourceIO UnlockRequest)
:> Post '[JSON] LockResult
type SU = Capture "serveruuid" (B64UUID ServerSide)
type CU req = QueryParam' '[req] "clientuuid" (B64UUID ClientSide)
type BypassUUIDs = QueryParams "bypass" (B64UUID Bypass)
type CaptureKey = Capture "key" B64Key
type KeyParam = QueryParam' '[Required] "key" B64Key
type AssociatedFileParam = QueryParam "associatedfile" B64FilePath
type OffsetParam = QueryParam "offset" Offset
type DataLengthHeader = Header DataLengthHeader' DataLength
type DataLengthHeaderRequired = Header' '[Required] DataLengthHeader' DataLength
type DataLengthHeader' = "X-git-annex-data-length"
type LockIDParam = QueryParam' '[Required] "lockid" LockID
type AuthHeader = Header "Authorization" Auth
type PV3 = Capture "v3" V3
type PV2 = Capture "v2" V2
type PV1 = Capture "v1" V1
type PV0 = Capture "v0" V0

535
P2P/Http/Client.hs Normal file
View file

@ -0,0 +1,535 @@
{- P2P protocol over HTTP, client
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds, TypeApplications #-}
{-# LANGUAGE CPP #-}
module P2P.Http.Client (
module P2P.Http.Client,
module P2P.Http.Types,
Validity(..),
) where
import Types
import P2P.Http.Types
import P2P.Protocol hiding (Offset, Bypass, auth, FileSize)
import Utility.Metered
import Utility.FileSize
import Types.NumCopies
#ifdef WITH_SERVANT
import qualified Annex
import Annex.UUID
import Annex.Url
import Types.Remote
import P2P.Http
import P2P.Http.Url
import Annex.Common
import Annex.Concurrent
import Utility.Url (BasicAuth(..))
import Utility.HumanTime
import qualified Git.Credential as Git
import Servant hiding (BasicAuthData(..))
import Servant.Client.Streaming
import qualified Servant.Types.SourceT as S
import Network.HTTP.Types.Status
import Network.HTTP.Client
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy.Internal as LI
import qualified Data.Map as M
import Data.Time.Clock.POSIX
import Control.Concurrent.STM
import Control.Concurrent.Async
import Control.Concurrent
import System.IO.Unsafe
#endif
import qualified Data.ByteString.Lazy as L
type ClientAction a
#ifdef WITH_SERVANT
= ClientEnv
-> ProtocolVersion
-> B64UUID ServerSide
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Maybe Auth
-> Annex (Either ClientError a)
#else
= ()
#endif
p2pHttpClient
:: Remote
-> (String -> Annex a)
-> ClientAction a
-> Annex a
p2pHttpClient rmt fallback clientaction =
p2pHttpClientVersions (const True) rmt fallback clientaction >>= \case
Just res -> return res
Nothing -> fallback "git-annex HTTP API server is missing an endpoint"
p2pHttpClientVersions
:: (ProtocolVersion -> Bool)
-> Remote
-> (String -> Annex a)
-> ClientAction a
-> Annex (Maybe a)
#ifdef WITH_SERVANT
p2pHttpClientVersions allowedversion rmt fallback clientaction =
case p2pHttpBaseUrl <$> remoteAnnexP2PHttpUrl (gitconfig rmt) of
Nothing -> error "internal"
Just baseurl -> do
mgr <- httpManager <$> getUrlOptions
let clientenv = mkClientEnv mgr baseurl
ccv <- Annex.getRead Annex.gitcredentialcache
Git.CredentialCache cc <- liftIO $ atomically $
readTMVar ccv
case M.lookup (Git.CredentialBaseURL credentialbaseurl) cc of
Nothing -> go clientenv Nothing False Nothing versions
Just cred -> go clientenv (Just cred) True (credauth cred) versions
where
versions = filter allowedversion allProtocolVersions
go clientenv mcred credcached mauth (v:vs) = do
myuuid <- getUUID
res <- clientaction clientenv v
(B64UUID (uuid rmt))
(B64UUID myuuid)
[]
mauth
case res of
Right resp -> do
unless credcached $ cachecred mcred
return (Just resp)
Left (FailureResponse _ resp)
| statusCode (responseStatusCode resp) == 404 && not (null vs) ->
go clientenv mcred credcached mauth vs
| statusCode (responseStatusCode resp) == 401 ->
case mcred of
Nothing -> authrequired clientenv (v:vs)
Just cred -> do
inRepo $ Git.rejectUrlCredential cred
Just <$> fallback (showstatuscode resp)
| otherwise -> Just <$> fallback (showstatuscode resp)
Left (ConnectionError ex) -> case fromException ex of
Just (HttpExceptionRequest _ (ConnectionFailure err)) -> Just <$> fallback
("unable to connect to HTTP server: " ++ show err)
_ -> Just <$> fallback (show ex)
Left clienterror -> Just <$> fallback
("git-annex HTTP API server returned an unexpected response: " ++ show clienterror)
go _ _ _ _ [] = return Nothing
authrequired clientenv vs = do
cred <- prompt $
inRepo $ Git.getUrlCredential credentialbaseurl
go clientenv (Just cred) False (credauth cred) vs
showstatuscode resp =
show (statusCode (responseStatusCode resp))
++ " " ++
decodeBS (statusMessage (responseStatusCode resp))
credentialbaseurl = case p2pHttpUrlString <$> remoteAnnexP2PHttpUrl (gitconfig rmt) of
Nothing -> error "internal"
Just url -> url
credauth cred = do
ba <- Git.credentialBasicAuth cred
return $ Auth
(encodeBS (basicAuthUser ba))
(encodeBS (basicAuthPassword ba))
cachecred mcred = case mcred of
Just cred -> do
inRepo $ Git.approveUrlCredential cred
ccv <- Annex.getRead Annex.gitcredentialcache
liftIO $ atomically $ do
Git.CredentialCache cc <- takeTMVar ccv
putTMVar ccv $ Git.CredentialCache $
M.insert (Git.CredentialBaseURL credentialbaseurl) cred cc
Nothing -> noop
#else
p2pHttpClient _rmt fallback () = fallback
"This remote uses an annex+http url, but this version of git-annex is not build with support for that."
#endif
clientGet
:: Key
-> AssociatedFile
-> (L.ByteString -> IO BytesProcessed)
-- ^ Must consume the entire ByteString before returning its
-- total size.
-> Maybe FileSize
-- ^ Size of existing file, when resuming.
-> ClientAction Validity
#ifdef WITH_SERVANT
clientGet k af consumer startsz clientenv (ProtocolVersion ver) su cu bypass auth = liftIO $ do
let offset = fmap (Offset . fromIntegral) startsz
withClientM (cli (B64Key k) cu bypass baf offset auth) clientenv $ \case
Left err -> return (Left err)
Right respheaders -> do
b <- S.unSourceT (getResponse respheaders) gather
BytesProcessed len <- consumer b
let DataLength dl = case lookupResponseHeader @DataLengthHeader' respheaders of
Header hdr -> hdr
_ -> error "missing data length header"
return $ Right $
if dl == len then Valid else Invalid
where
cli =case ver of
3 -> v3 su V3
2 -> v2 su V2
1 -> v1 su V1
0 -> v0 su V0
_ -> error "unsupported protocol version"
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
gather = unsafeInterleaveIO . gather'
gather' S.Stop = return LI.Empty
gather' (S.Error err) = giveup err
gather' (S.Skip s) = gather' s
gather' (S.Effect ms) = ms >>= gather'
gather' (S.Yield v s) = LI.Chunk v <$> unsafeInterleaveIO (gather' s)
baf = associatedFileToB64FilePath af
#else
clientGet _ _ _ _ = ()
#endif
clientCheckPresent :: Key -> ClientAction Bool
#ifdef WITH_SERVANT
clientCheckPresent key clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli su (B64Key key) cu bypass auth) clientenv $ \case
Left err -> return (Left err)
Right (CheckPresentResult res) -> return (Right res)
where
cli = case ver of
3 -> flip v3 V3
2 -> flip v2 V2
1 -> flip v1 V1
0 -> flip v0 V0
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientCheckPresent _ = ()
#endif
-- Similar to P2P.Protocol.remove.
clientRemoveWithProof
:: Maybe SafeDropProof
-> Key
-> Annex RemoveResultPlus
-> Remote
-> Annex RemoveResultPlus
clientRemoveWithProof proof k unabletoremove remote =
case safeDropProofEndTime =<< proof of
Nothing -> removeanytime
Just endtime -> removebefore endtime
where
removeanytime = p2pHttpClient remote giveup (clientRemove k)
removebefore endtime =
p2pHttpClientVersions useversion remote giveup clientGetTimestamp >>= \case
Just (GetTimestampResult (Timestamp remotetime)) ->
removebefore' endtime remotetime
-- Peer is too old to support REMOVE-BEFORE.
Nothing -> removeanytime
removebefore' endtime remotetime =
canRemoveBefore endtime remotetime (liftIO getPOSIXTime) >>= \case
Just remoteendtime -> p2pHttpClient remote giveup $
clientRemoveBefore k (Timestamp remoteendtime)
Nothing -> unabletoremove
useversion v = v >= ProtocolVersion 3
clientRemove :: Key -> ClientAction RemoveResultPlus
#ifdef WITH_SERVANT
clientRemove k clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM cli clientenv return
where
bk = B64Key k
cli = case ver of
3 -> v3 su V3 bk cu bypass auth
2 -> v2 su V2 bk cu bypass auth
1 -> plus <$> v1 su V1 bk cu bypass auth
0 -> plus <$> v0 su V0 bk cu bypass auth
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientRemove _ = ()
#endif
clientRemoveBefore
:: Key
-> Timestamp
-> ClientAction RemoveResultPlus
#ifdef WITH_SERVANT
clientRemoveBefore k ts clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli su (B64Key k) cu bypass ts auth) clientenv return
where
cli = case ver of
3 -> flip v3 V3
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> _ = client p2pHttpAPI
#else
clientRemoveBefore _ _ = ()
#endif
clientGetTimestamp :: ClientAction GetTimestampResult
#ifdef WITH_SERVANT
clientGetTimestamp clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli su cu bypass auth) clientenv return
where
cli = case ver of
3 -> flip v3 V3
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
v3 :<|> _ = client p2pHttpAPI
#else
clientGetTimestamp = ()
#endif
clientPut
:: MeterUpdate
-> Key
-> Maybe Offset
-> AssociatedFile
-> FilePath
-> FileSize
-> Annex Bool
-- ^ Called after sending the file to check if it's valid.
-> ClientAction PutResultPlus
#ifdef WITH_SERVANT
clientPut meterupdate k moffset af contentfile contentfilesize validitycheck clientenv (ProtocolVersion ver) su cu bypass auth = do
checkv <- liftIO newEmptyTMVarIO
checkresultv <- liftIO newEmptyTMVarIO
let checker = do
liftIO $ atomically $ takeTMVar checkv
validitycheck >>= liftIO . atomically . putTMVar checkresultv
checkerthread <- liftIO . async =<< forkState checker
v <- liftIO $ withBinaryFile contentfile ReadMode $ \h -> do
when (offset /= 0) $
hSeek h AbsoluteSeek offset
withClientM (cli (stream h checkv checkresultv)) clientenv return
case v of
Left err -> do
void $ liftIO $ atomically $ tryPutTMVar checkv ()
join $ liftIO (wait checkerthread)
return (Left err)
Right res -> do
join $ liftIO (wait checkerthread)
return (Right res)
where
stream h checkv checkresultv = S.SourceT $ \a -> do
bl <- hGetContentsMetered h meterupdate
v <- newMVar (0, filter (not . B.null) (L.toChunks bl))
a (go v)
where
go v = S.fromActionStep B.null $ modifyMVar v $ \case
(n, (b:[])) -> do
let !n' = n + B.length b
ifM (checkvalid n')
( return ((n', []), b)
-- The key's content is invalid, but
-- the amount of data is the same as
-- the DataLengthHeader indicates.
-- Truncate the stream by one byte to
-- indicate to the server that it's
-- not valid.
, return
( (n' - 1, [])
, B.take (B.length b - 1) b
)
)
(n, []) -> do
void $ checkvalid n
return ((n, []), mempty)
(n, (b:bs)) ->
let !n' = n + B.length b
in return ((n', bs), b)
checkvalid n = do
void $ liftIO $ atomically $ tryPutTMVar checkv ()
valid <- liftIO $ atomically $ readTMVar checkresultv
if not valid
then return (n /= fromIntegral nlen)
else return True
baf = case af of
AssociatedFile Nothing -> Nothing
AssociatedFile (Just f) -> Just (B64FilePath f)
len = DataLength nlen
nlen = contentfilesize - offset
offset = case moffset of
Nothing -> 0
Just (Offset o) -> fromIntegral o
bk = B64Key k
cli src = case ver of
3 -> v3 su V3 len bk cu bypass baf moffset src auth
2 -> v2 su V2 len bk cu bypass baf moffset src auth
1 -> plus <$> v1 su V1 len bk cu bypass baf moffset src auth
0 -> plus <$> v0 su V0 len bk cu bypass baf moffset src auth
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientPut _ _ _ _ _ _ _ = ()
#endif
clientPutOffset
:: Key
-> ClientAction PutOffsetResultPlus
#ifdef WITH_SERVANT
clientPutOffset k clientenv (ProtocolVersion ver) su cu bypass auth
| ver == 0 = return (Right (PutOffsetResultPlus (Offset 0)))
| otherwise = liftIO $ withClientM cli clientenv return
where
bk = B64Key k
cli = case ver of
3 -> v3 su V3 bk cu bypass auth
2 -> v2 su V2 bk cu bypass auth
1 -> plus <$> v1 su V1 bk cu bypass auth
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> _ = client p2pHttpAPI
#else
clientPutOffset _ = ()
#endif
clientLockContent
:: Key
-> ClientAction LockResult
#ifdef WITH_SERVANT
clientLockContent k clientenv (ProtocolVersion ver) su cu bypass auth =
liftIO $ withClientM (cli (B64Key k) cu bypass auth) clientenv return
where
cli = case ver of
3 -> v3 su V3
2 -> v2 su V2
1 -> v1 su V1
0 -> v0 su V0
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientLockContent _ = ()
#endif
clientKeepLocked
:: LockID
-> UUID
-> a
-> (VerifiedCopy -> Annex a)
-- ^ Callback is run only after successfully connecting to the http
-- server. The lock will remain held until the callback returns,
-- and then will be dropped.
-> ClientAction a
#ifdef WITH_SERVANT
clientKeepLocked lckid remoteuuid unablelock callback clientenv (ProtocolVersion ver) su cu bypass auth = do
readyv <- liftIO newEmptyTMVarIO
keeplocked <- liftIO newEmptyTMVarIO
let cli' = cli lckid (Just cu) bypass auth
(Just connectionKeepAlive) (Just keepAlive)
(S.fromStepT (unlocksender readyv keeplocked))
starttime <- liftIO getPOSIXTime
tid <- liftIO $ async $ withClientM cli' clientenv $ \case
Right (LockResult _ _) ->
atomically $ writeTMVar readyv (Right False)
Left err ->
atomically $ writeTMVar readyv (Left err)
let releaselock = liftIO $ do
atomically $ putTMVar keeplocked False
wait tid
liftIO (atomically $ takeTMVar readyv) >>= \case
Left err -> do
liftIO $ wait tid
return (Left err)
Right False -> do
liftIO $ wait tid
return (Right unablelock)
Right True -> do
let checker = return $ Left $ starttime + retentionduration
Right
<$> withVerifiedCopy LockedCopy remoteuuid checker callback
`finally` releaselock
where
retentionduration = fromIntegral $
durationSeconds p2pDefaultLockContentRetentionDuration
unlocksender readyv keeplocked =
S.Yield (UnlockRequest False) $ S.Effect $ do
return $ S.Effect $ do
liftIO $ atomically $ void $
tryPutTMVar readyv (Right True)
stilllocked <- liftIO $ atomically $
takeTMVar keeplocked
return $ if stilllocked
then unlocksender readyv keeplocked
else S.Yield (UnlockRequest True) S.Stop
cli = case ver of
3 -> v3 su V3
2 -> v2 su V2
1 -> v1 su V1
0 -> v0 su V0
_ -> error "unsupported protocol version"
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|>
_ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|>
_ :<|> _ :<|> _ :<|> _ :<|>
v3 :<|> v2 :<|> v1 :<|> v0 :<|> _ = client p2pHttpAPI
#else
clientKeepLocked _ _ _ _ = ()
#endif

478
P2P/Http/Server.hs Normal file
View file

@ -0,0 +1,478 @@
{- P2P protocol over HTTP, server
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module P2P.Http.Server (
module P2P.Http,
module P2P.Http.Server,
module P2P.Http.Types,
module P2P.Http.State,
) where
import Annex.Common
import P2P.Http
import P2P.Http.Types
import P2P.Http.State
import P2P.Protocol hiding (Offset, Bypass, auth)
import P2P.IO
import P2P.Annex
import Annex.WorkerPool
import Types.WorkerPool
import Types.Direction
import Utility.Metered
import Servant
import qualified Servant.Types.SourceT as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Control.Concurrent.STM
import Control.Concurrent.Async
import Control.Concurrent
import System.IO.Unsafe
import Data.Either
p2pHttpApp :: P2PHttpServerState -> Application
p2pHttpApp = serve p2pHttpAPI . serveP2pHttp
serveP2pHttp :: P2PHttpServerState -> Server P2PHttpAPI
serveP2pHttp st
= serveGet st
:<|> serveGet st
:<|> serveGet st
:<|> serveGet st
:<|> serveCheckPresent st
:<|> serveCheckPresent st
:<|> serveCheckPresent st
:<|> serveCheckPresent st
:<|> serveRemove st id
:<|> serveRemove st id
:<|> serveRemove st dePlus
:<|> serveRemove st dePlus
:<|> serveRemoveBefore st
:<|> serveGetTimestamp st
:<|> servePut st id
:<|> servePut st id
:<|> servePut st dePlus
:<|> servePut st dePlus
:<|> servePutOffset st id
:<|> servePutOffset st id
:<|> servePutOffset st dePlus
:<|> serveLockContent st
:<|> serveLockContent st
:<|> serveLockContent st
:<|> serveLockContent st
:<|> serveKeepLocked st
:<|> serveKeepLocked st
:<|> serveKeepLocked st
:<|> serveKeepLocked st
:<|> serveGetGeneric st
serveGetGeneric
:: P2PHttpServerState
-> B64UUID ServerSide
-> B64Key
-> Maybe (B64UUID ClientSide)
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler (Headers '[DataLengthHeader] (S.SourceT IO B.ByteString))
serveGetGeneric st su@(B64UUID u) k mcu bypass =
-- Use V0 because it does not alter the returned data to indicate
-- Invalid content.
serveGet st su V0 k (fromMaybe scu mcu) bypass Nothing Nothing
where
-- Reuse server UUID as client UUID.
scu = B64UUID u :: B64UUID ClientSide
serveGet
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Maybe B64FilePath
-> Maybe Offset
-> IsSecure
-> Maybe Auth
-> Handler (Headers '[DataLengthHeader] (S.SourceT IO B.ByteString))
serveGet st su apiver (B64Key k) cu bypass baf startat sec auth = do
conn <- getP2PConnection apiver st cu su bypass sec auth ReadAction id
bsv <- liftIO newEmptyTMVarIO
endv <- liftIO newEmptyTMVarIO
validityv <- liftIO newEmptyTMVarIO
finalv <- liftIO newEmptyTMVarIO
annexworker <- liftIO $ async $ inAnnexWorker st $ do
let storer _offset len = sendContentWith $ \bs -> liftIO $ do
atomically $ putTMVar bsv (len, bs)
atomically $ takeTMVar endv
signalFullyConsumedByteString $
connOhdl $ serverP2PConnection conn
return $ \v -> do
liftIO $ atomically $ putTMVar validityv v
return True
enteringStage (TransferStage Upload) $
runFullProto (clientRunState conn) (clientP2PConnection conn) $
void $ receiveContent Nothing nullMeterUpdate
sizer storer getreq
void $ liftIO $ forkIO $ waitfinal endv finalv conn annexworker
(Len len, bs) <- liftIO $ atomically $ takeTMVar bsv
bv <- liftIO $ newMVar (filter (not . B.null) (L.toChunks bs))
szv <- liftIO $ newMVar 0
let streamer = S.SourceT $ \s -> s =<< return
(stream (bv, szv, len, endv, validityv, finalv))
return $ addHeader (DataLength len) streamer
where
stream (bv, szv, len, endv, validityv, finalv) =
S.fromActionStep B.null $
modifyMVar bv $ nextchunk szv $
checkvalidity szv len endv validityv finalv
nextchunk szv checkvalid (b:[]) = do
updateszv szv b
ifM checkvalid
( return ([], b)
-- The key's content is invalid, but
-- the amount of data is the same as the
-- DataLengthHeader indicated. Truncate
-- the response by one byte to indicate
-- to the client that it's not valid.
, return ([], B.take (B.length b - 1) b)
)
nextchunk szv _checkvalid (b:bs) = do
updateszv szv b
return (bs, b)
nextchunk _szv checkvalid [] = do
void checkvalid
-- Result ignored because 0 bytes of data are sent,
-- so even if the key is invalid, if that's the
-- amount of data that the DataLengthHeader indicates,
-- we've successfully served an empty key.
return ([], mempty)
updateszv szv b = modifyMVar szv $ \sz ->
let !sz' = sz + fromIntegral (B.length b)
in return (sz', ())
-- Returns False when the key's content is invalid, but the
-- amount of data sent was the same as indicated by the
-- DataLengthHeader.
checkvalidity szv len endv validityv finalv =
ifM (atomically $ isEmptyTMVar endv)
( do
atomically $ putTMVar endv ()
validity <- atomically $ takeTMVar validityv
sz <- takeMVar szv
atomically $ putTMVar finalv ()
atomically $ putTMVar endv ()
return $ case validity of
Nothing -> True
Just Valid -> True
Just Invalid -> sz /= len
, pure True
)
waitfinal endv finalv conn annexworker = do
-- Wait for everything to be transferred before
-- stopping the annexworker. The finalv will usually
-- be written to at the end. If the client disconnects
-- early that does not happen, so catch STM exception.
alltransferred <- isRight
<$> tryNonAsync (liftIO $ atomically $ takeTMVar finalv)
-- Make sure the annexworker is not left blocked on endv
-- if the client disconnected early.
void $ liftIO $ atomically $ tryPutTMVar endv ()
void $ tryNonAsync $ if alltransferred
then releaseP2PConnection conn
else closeP2PConnection conn
void $ tryNonAsync $ wait annexworker
sizer = pure $ Len $ case startat of
Just (Offset o) -> fromIntegral o
Nothing -> 0
getreq offset = P2P.Protocol.GET offset af k
af = ProtoAssociatedFile $ b64FilePathToAssociatedFile baf
serveCheckPresent
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler CheckPresentResult
serveCheckPresent st su apiver (B64Key k) cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth ReadAction id
$ \conn -> liftIO $ proxyClientNetProto conn $ checkPresent k
case res of
Right b -> return (CheckPresentResult b)
Left err -> throwError $ err500 { errBody = encodeBL err }
serveRemove
:: APIVersion v
=> P2PHttpServerState
-> (RemoveResultPlus -> t)
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler t
serveRemove st resultmangle su apiver (B64Key k) cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth RemoveAction id
$ \conn ->
liftIO $ proxyClientNetProto conn $ remove Nothing k
case res of
(Right b, plusuuids) -> return $ resultmangle $
RemoveResultPlus b (map B64UUID (fromMaybe [] plusuuids))
(Left err, _) -> throwError $
err500 { errBody = encodeBL err }
serveRemoveBefore
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Timestamp
-> IsSecure
-> Maybe Auth
-> Handler RemoveResultPlus
serveRemoveBefore st su apiver (B64Key k) cu bypass (Timestamp ts) sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth RemoveAction id
$ \conn ->
liftIO $ proxyClientNetProto conn $
removeBeforeRemoteEndTime ts k
case res of
(Right b, plusuuids) -> return $
RemoveResultPlus b (map B64UUID (fromMaybe [] plusuuids))
(Left err, _) -> throwError $
err500 { errBody = encodeBL err }
serveGetTimestamp
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler GetTimestampResult
serveGetTimestamp st su apiver cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth ReadAction id
$ \conn ->
liftIO $ proxyClientNetProto conn getTimestamp
case res of
Right ts -> return $ GetTimestampResult (Timestamp ts)
Left err -> throwError $
err500 { errBody = encodeBL err }
servePut
:: APIVersion v
=> P2PHttpServerState
-> (PutResultPlus -> t)
-> B64UUID ServerSide
-> v
-> DataLength
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> Maybe B64FilePath
-> Maybe Offset
-> S.SourceT IO B.ByteString
-> IsSecure
-> Maybe Auth
-> Handler t
servePut st resultmangle su apiver (DataLength len) (B64Key k) cu bypass baf moffset stream sec auth = do
validityv <- liftIO newEmptyTMVarIO
let validitycheck = local $ runValidityCheck $
liftIO $ atomically $ readTMVar validityv
tooshortv <- liftIO newEmptyTMVarIO
content <- liftIO $ S.unSourceT stream (gather validityv tooshortv)
res <- withP2PConnection' apiver st cu su bypass sec auth WriteAction
(\cst -> cst { connectionWaitVar = False }) $ \conn ->
liftIO (protoaction conn content validitycheck)
`finally` checktooshort conn tooshortv
case res of
Right (Right (Just plusuuids)) -> return $ resultmangle $
PutResultPlus True (map B64UUID plusuuids)
Right (Right Nothing) -> return $ resultmangle $
PutResultPlus False []
Right (Left protofail) -> throwError $
err500 { errBody = encodeBL (describeProtoFailure protofail) }
Left err -> throwError $
err500 { errBody = encodeBL (show err) }
where
protoaction conn content validitycheck = inAnnexWorker st $
enteringStage (TransferStage Download) $
runFullProto (clientRunState conn) (clientP2PConnection conn) $
protoaction' content validitycheck
protoaction' content validitycheck = put' k af $ \offset' ->
let offsetdelta = offset' - offset
in case compare offset' offset of
EQ -> sendContent' nullMeterUpdate (Len len)
content validitycheck
GT -> sendContent' nullMeterUpdate
(Len (len - fromIntegral offsetdelta))
(L.drop (fromIntegral offsetdelta) content)
validitycheck
LT -> sendContent' nullMeterUpdate
(Len len)
content
(validitycheck >>= \_ -> return Invalid)
offset = case moffset of
Just (Offset o) -> o
Nothing -> 0
af = b64FilePathToAssociatedFile baf
-- Streams the ByteString from the client. Avoids returning a longer
-- than expected ByteString by truncating to the expected length.
-- Returns a shorter than expected ByteString when the data is not
-- valid.
gather validityv tooshortv = unsafeInterleaveIO . go 0
where
go n S.Stop = do
atomically $ do
writeTMVar validityv $
if n == len then Valid else Invalid
writeTMVar tooshortv (n /= len)
return LI.Empty
go n (S.Error _err) = do
atomically $ do
writeTMVar validityv Invalid
writeTMVar tooshortv (n /= len)
return LI.Empty
go n (S.Skip s) = go n s
go n (S.Effect ms) = ms >>= go n
go n (S.Yield v s) =
let !n' = n + fromIntegral (B.length v)
in if n' > len
then do
atomically $ do
writeTMVar validityv Invalid
writeTMVar tooshortv True
return $ LI.Chunk
(B.take (fromIntegral (len - n')) v)
LI.Empty
else LI.Chunk v <$> unsafeInterleaveIO (go n' s)
-- The connection can no longer be used when too short a DATA has
-- been written to it.
checktooshort conn tooshortv =
liftIO $ whenM (atomically $ fromMaybe True <$> tryTakeTMVar tooshortv) $
closeP2PConnection conn
servePutOffset
:: APIVersion v
=> P2PHttpServerState
-> (PutOffsetResultPlus -> t)
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler t
servePutOffset st resultmangle su apiver (B64Key k) cu bypass sec auth = do
res <- withP2PConnection apiver st cu su bypass sec auth WriteAction
(\cst -> cst { connectionWaitVar = False }) $ \conn ->
liftIO $ proxyClientNetProto conn $ getPutOffset k af
case res of
Right offset -> return $ resultmangle $
PutOffsetResultPlus (Offset offset)
Left plusuuids -> return $ resultmangle $
PutOffsetResultAlreadyHavePlus (map B64UUID plusuuids)
where
af = AssociatedFile Nothing
serveLockContent
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> B64Key
-> B64UUID ClientSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Handler LockResult
serveLockContent st su apiver (B64Key k) cu bypass sec auth = do
conn <- getP2PConnection apiver st cu su bypass sec auth WriteAction id
let lock = do
lockresv <- newEmptyTMVarIO
unlockv <- newEmptyTMVarIO
annexworker <- async $ inAnnexWorker st $ do
lockres <- runFullProto (clientRunState conn) (clientP2PConnection conn) $ do
net $ sendMessage (LOCKCONTENT k)
checkSuccess
liftIO $ atomically $ putTMVar lockresv lockres
liftIO $ atomically $ takeTMVar unlockv
void $ runFullProto (clientRunState conn) (clientP2PConnection conn) $ do
net $ sendMessage UNLOCKCONTENT
atomically (takeTMVar lockresv) >>= \case
Right True -> return (Just (annexworker, unlockv))
_ -> return Nothing
let unlock (annexworker, unlockv) = do
atomically $ putTMVar unlockv ()
void $ wait annexworker
releaseP2PConnection conn
liftIO $ mkLocker lock unlock >>= \case
Just (locker, lockid) -> do
liftIO $ storeLock lockid locker st
return $ LockResult True (Just lockid)
Nothing -> return $ LockResult False Nothing
serveKeepLocked
:: APIVersion v
=> P2PHttpServerState
-> B64UUID ServerSide
-> v
-> LockID
-> Maybe (B64UUID ClientSide)
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> Maybe ConnectionKeepAlive
-> Maybe KeepAlive
-> S.SourceT IO UnlockRequest
-> Handler LockResult
serveKeepLocked st _su _apiver lckid _cu _bypass sec auth _ _ unlockrequeststream = do
checkAuthActionClass st sec auth WriteAction $ \_ -> do
liftIO $ keepingLocked lckid st
_ <- liftIO $ S.unSourceT unlockrequeststream go
return (LockResult False Nothing)
where
go S.Stop = dropLock lckid st
go (S.Error _err) = dropLock lckid st
go (S.Skip s) = go s
go (S.Effect ms) = ms >>= go
go (S.Yield (UnlockRequest False) s) = go s
go (S.Yield (UnlockRequest True) _) = dropLock lckid st

642
P2P/Http/State.hs Normal file
View file

@ -0,0 +1,642 @@
{- P2P protocol over HTTP, server state
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
module P2P.Http.State where
import Annex.Common
import qualified Annex
import P2P.Http.Types
import qualified P2P.Protocol as P2P
import qualified P2P.IO as P2P
import P2P.IO
import P2P.Annex
import Annex.UUID
import Types.NumCopies
import Types.WorkerPool
import Annex.WorkerPool
import Annex.BranchState
import Types.Cluster
import CmdLine.Action (startConcurrency)
import Utility.ThreadScheduler
import Utility.HumanTime
import Logs.Proxy
import Annex.Proxy
import Annex.Cluster
import qualified P2P.Proxy as Proxy
import qualified Types.Remote as Remote
import Servant
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import Control.Concurrent.Async
import Control.Concurrent.STM
import Data.Time.Clock.POSIX
data P2PHttpServerState = P2PHttpServerState
{ acquireP2PConnection :: AcquireP2PConnection
, annexWorkerPool :: AnnexWorkerPool
, getServerMode :: GetServerMode
, openLocks :: TMVar (M.Map LockID Locker)
}
type AnnexWorkerPool = TMVar (WorkerPool (Annex.AnnexState, Annex.AnnexRead))
-- Nothing when the server is not allowed to serve any requests.
type GetServerMode = IsSecure -> Maybe Auth -> Maybe P2P.ServerMode
mkP2PHttpServerState :: AcquireP2PConnection -> AnnexWorkerPool -> GetServerMode -> IO P2PHttpServerState
mkP2PHttpServerState acquireconn annexworkerpool getservermode = P2PHttpServerState
<$> pure acquireconn
<*> pure annexworkerpool
<*> pure getservermode
<*> newTMVarIO mempty
data ActionClass = ReadAction | WriteAction | RemoveAction
deriving (Eq)
withP2PConnection
:: APIVersion v
=> v
-> P2PHttpServerState
-> B64UUID ClientSide
-> B64UUID ServerSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (ConnectionParams -> ConnectionParams)
-> (P2PConnectionPair -> Handler (Either ProtoFailure a))
-> Handler a
withP2PConnection apiver st cu su bypass sec auth actionclass fconnparams connaction =
withP2PConnection' apiver st cu su bypass sec auth actionclass fconnparams connaction'
where
connaction' conn = connaction conn >>= \case
Right r -> return r
Left err -> throwError $
err500 { errBody = encodeBL (describeProtoFailure err) }
withP2PConnection'
:: APIVersion v
=> v
-> P2PHttpServerState
-> B64UUID ClientSide
-> B64UUID ServerSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (ConnectionParams -> ConnectionParams)
-> (P2PConnectionPair -> Handler a)
-> Handler a
withP2PConnection' apiver st cu su bypass sec auth actionclass fconnparams connaction = do
conn <- getP2PConnection apiver st cu su bypass sec auth actionclass fconnparams
connaction conn
`finally` liftIO (releaseP2PConnection conn)
getP2PConnection
:: APIVersion v
=> v
-> P2PHttpServerState
-> B64UUID ClientSide
-> B64UUID ServerSide
-> [B64UUID Bypass]
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (ConnectionParams -> ConnectionParams)
-> Handler P2PConnectionPair
getP2PConnection apiver st cu su bypass sec auth actionclass fconnparams =
checkAuthActionClass st sec auth actionclass go
where
go servermode = liftIO (acquireP2PConnection st cp) >>= \case
Left (ConnectionFailed err) ->
throwError err502 { errBody = encodeBL err }
Left TooManyConnections ->
throwError err503
Right v -> return v
where
cp = fconnparams $ ConnectionParams
{ connectionProtocolVersion = protocolVersion apiver
, connectionServerUUID = fromB64UUID su
, connectionClientUUID = fromB64UUID cu
, connectionBypass = map fromB64UUID bypass
, connectionServerMode = servermode
, connectionWaitVar = True
}
checkAuthActionClass
:: P2PHttpServerState
-> IsSecure
-> Maybe Auth
-> ActionClass
-> (P2P.ServerMode -> Handler a)
-> Handler a
checkAuthActionClass st sec auth actionclass go =
case (getServerMode st sec auth, actionclass) of
(Just P2P.ServeReadWrite, _) -> go P2P.ServeReadWrite
(Just P2P.ServeAppendOnly, RemoveAction) -> throwError err403
(Just P2P.ServeAppendOnly, _) -> go P2P.ServeAppendOnly
(Just P2P.ServeReadOnly, ReadAction) -> go P2P.ServeReadOnly
(Just P2P.ServeReadOnly, _) -> throwError err403
(Nothing, _) -> throwError basicAuthRequired
basicAuthRequired :: ServerError
basicAuthRequired = err401 { errHeaders = [(h, v)] }
where
h = "WWW-Authenticate"
v = "Basic realm=\"git-annex\", charset=\"UTF-8\""
data ConnectionParams = ConnectionParams
{ connectionProtocolVersion :: P2P.ProtocolVersion
, connectionServerUUID :: UUID
, connectionClientUUID :: UUID
, connectionBypass :: [UUID]
, connectionServerMode :: P2P.ServerMode
, connectionWaitVar :: Bool
}
deriving (Show, Eq, Ord)
data ConnectionProblem
= ConnectionFailed String
| TooManyConnections
deriving (Show, Eq)
proxyClientNetProto :: P2PConnectionPair -> P2P.Proto a -> IO (Either P2P.ProtoFailure a)
proxyClientNetProto conn = runNetProto
(clientRunState conn) (clientP2PConnection conn)
type AcquireP2PConnection
= ConnectionParams
-> IO (Either ConnectionProblem P2PConnectionPair)
withP2PConnections
:: AnnexWorkerPool
-> ProxyConnectionPoolSize
-> ClusterConcurrency
-> (AcquireP2PConnection -> Annex a)
-> Annex a
withP2PConnections workerpool proxyconnectionpoolsize clusterconcurrency a = do
enableInteractiveBranchAccess
myuuid <- getUUID
myproxies <- M.lookup myuuid <$> getProxies
reqv <- liftIO newEmptyTMVarIO
relv <- liftIO newEmptyTMVarIO
endv <- liftIO newEmptyTMVarIO
proxypool <- liftIO $ newTMVarIO (0, mempty)
asyncservicer <- liftIO $ async $
servicer myuuid myproxies proxypool reqv relv endv
let endit = do
liftIO $ atomically $ putTMVar endv ()
liftIO $ wait asyncservicer
a (acquireconn reqv) `finally` endit
where
acquireconn reqv connparams = do
respvar <- newEmptyTMVarIO
atomically $ putTMVar reqv (connparams, respvar)
atomically $ takeTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv = do
reqrel <- liftIO $
atomically $
(Right <$> takeTMVar reqv)
`orElse`
(Left . Right <$> takeTMVar relv)
`orElse`
(Left . Left <$> takeTMVar endv)
case reqrel of
Right (connparams, respvar) -> do
servicereq myuuid myproxies proxypool relv connparams
>>= atomically . putTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv
Left (Right releaseconn) -> do
releaseconn
servicer myuuid myproxies proxypool reqv relv endv
Left (Left ()) -> return ()
servicereq myuuid myproxies proxypool relv connparams
| connectionServerUUID connparams == myuuid =
localConnection relv connparams workerpool
| otherwise =
atomically (getProxyConnectionPool proxypool connparams) >>= \case
Just conn -> proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool conn
Nothing -> checkcanproxy myproxies proxypool relv connparams
checkcanproxy myproxies proxypool relv connparams =
inAnnexWorker' workerpool
(checkCanProxy' myproxies (connectionServerUUID connparams))
>>= \case
Right (Left reason) -> return $ Left $
ConnectionFailed $
fromMaybe "unknown uuid" reason
Right (Right (Right proxyremote)) -> proxyconnection $
openProxyConnectionToRemote workerpool
(connectionProtocolVersion connparams)
bypass proxyremote
Right (Right (Left clusteruuid)) -> proxyconnection $
openProxyConnectionToCluster workerpool
(connectionProtocolVersion connparams)
bypass clusteruuid clusterconcurrency
Left ex -> return $ Left $
ConnectionFailed $ show ex
where
bypass = P2P.Bypass $ S.fromList $ connectionBypass connparams
proxyconnection openconn = openconn >>= \case
Right conn -> proxyConnection proxyconnectionpoolsize
relv connparams workerpool proxypool conn
Left ex -> return $ Left $
ConnectionFailed $ show ex
data P2PConnectionPair = P2PConnectionPair
{ clientRunState :: RunState
, clientP2PConnection :: P2PConnection
, serverP2PConnection :: P2PConnection
, releaseP2PConnection :: IO ()
-- ^ Releases a P2P connection, which can be reused for other
-- requests.
, closeP2PConnection :: IO ()
-- ^ Closes a P2P connection, which is in a state where it is no
-- longer usable.
}
localConnection
:: TMVar (IO ())
-> ConnectionParams
-> AnnexWorkerPool
-> IO (Either ConnectionProblem P2PConnectionPair)
localConnection relv connparams workerpool =
localP2PConnectionPair connparams relv $ \serverrunst serverconn ->
inAnnexWorker' workerpool $
void $ runFullProto serverrunst serverconn $
P2P.serveOneCommandAuthed
(connectionServerMode connparams)
(connectionServerUUID connparams)
localP2PConnectionPair
:: ConnectionParams
-> TMVar (IO ())
-> (RunState -> P2PConnection -> IO (Either SomeException ()))
-> IO (Either ConnectionProblem P2PConnectionPair)
localP2PConnectionPair connparams relv startworker = do
(clientconn, serverconn) <- mkP2PConnectionPair connparams
("http client", "http server")
clientrunst <- mkClientRunState connparams
serverrunst <- mkServerRunState connparams
asyncworker <- async $
startworker serverrunst serverconn
let releaseconn = atomically $ void $ tryPutTMVar relv $
liftIO $ wait asyncworker
>>= either throwM return
return $ Right $ P2PConnectionPair
{ clientRunState = clientrunst
, clientP2PConnection = clientconn
, serverP2PConnection = serverconn
, releaseP2PConnection = releaseconn
, closeP2PConnection = releaseconn
}
mkP2PConnectionPair
:: ConnectionParams
-> (String, String)
-> IO (P2PConnection, P2PConnection)
mkP2PConnectionPair connparams (n1, n2) = do
hdl1 <- newEmptyTMVarIO
hdl2 <- newEmptyTMVarIO
wait1 <- newEmptyTMVarIO
wait2 <- newEmptyTMVarIO
closed1 <- newEmptyTMVarIO
closed2 <- newEmptyTMVarIO
let h1 = P2PHandleTMVar hdl1
(if connectionWaitVar connparams then Just wait1 else Nothing)
closed1
let h2 = P2PHandleTMVar hdl2
(if connectionWaitVar connparams then Just wait2 else Nothing)
closed2
let clientconn = P2PConnection Nothing
(const True) h2 h1
(ConnIdent (Just n1))
let serverconn = P2PConnection Nothing
(const True) h1 h2
(ConnIdent (Just n2))
return (clientconn, serverconn)
mkServerRunState :: ConnectionParams -> IO RunState
mkServerRunState connparams = do
prototvar <- newTVarIO $ connectionProtocolVersion connparams
mkRunState $ const $ Serving
(connectionClientUUID connparams)
Nothing
prototvar
mkClientRunState :: ConnectionParams -> IO RunState
mkClientRunState connparams = do
prototvar <- newTVarIO $ connectionProtocolVersion connparams
mkRunState $ const $ Client prototvar
proxyConnection
:: ProxyConnectionPoolSize
-> TMVar (IO ())
-> ConnectionParams
-> AnnexWorkerPool
-> TMVar ProxyConnectionPool
-> ProxyConnection
-> IO (Either ConnectionProblem P2PConnectionPair)
proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool proxyconn = do
(clientconn, proxyfromclientconn) <-
mkP2PConnectionPair connparams ("http client", "proxy")
clientrunst <- mkClientRunState connparams
proxyfromclientrunst <- mkClientRunState connparams
asyncworker <- async $
inAnnexWorker' workerpool $ do
proxystate <- liftIO Proxy.mkProxyState
let proxyparams = Proxy.ProxyParams
{ Proxy.proxyMethods = mkProxyMethods
, Proxy.proxyState = proxystate
, Proxy.proxyServerMode = connectionServerMode connparams
, Proxy.proxyClientSide = Proxy.ClientSide proxyfromclientrunst proxyfromclientconn
, Proxy.proxyUUID = proxyConnectionRemoteUUID proxyconn
, Proxy.proxySelector = proxyConnectionSelector proxyconn
, Proxy.proxyConcurrencyConfig = proxyConnectionConcurrency proxyconn
, Proxy.proxyClientProtocolVersion = connectionProtocolVersion connparams
}
let proxy mrequestmessage = case mrequestmessage of
Just requestmessage -> do
Proxy.proxyRequest proxydone proxyparams
requestcomplete requestmessage protoerrhandler
Nothing -> return ()
protoerrhandler proxy $
liftIO $ runNetProto proxyfromclientrunst proxyfromclientconn $
P2P.net P2P.receiveMessage
let releaseconn returntopool =
atomically $ void $ tryPutTMVar relv $ do
r <- liftIO $ wait asyncworker
liftIO $ closeConnection proxyfromclientconn
liftIO $ closeConnection clientconn
if returntopool
then liftIO $ do
now <- getPOSIXTime
evicted <- atomically $ putProxyConnectionPool proxypool proxyconnectionpoolsize connparams $
proxyconn { proxyConnectionLastUsed = now }
maybe noop closeproxyconnection evicted
else closeproxyconnection proxyconn
either throwM return r
return $ Right $ P2PConnectionPair
{ clientRunState = clientrunst
, clientP2PConnection = clientconn
, serverP2PConnection = proxyfromclientconn
, releaseP2PConnection = releaseconn True
, closeP2PConnection = releaseconn False
}
where
protoerrhandler cont a = a >>= \case
Left _ -> proxyConnectionCloser proxyconn
Right v -> cont v
proxydone = return ()
requestcomplete () = return ()
closeproxyconnection =
void . inAnnexWorker' workerpool . proxyConnectionCloser
data Locker = Locker
{ lockerThread :: Async ()
, lockerVar :: TMVar Bool
-- ^ Left empty until the thread has taken the lock
-- (or failed to do so), then True while the lock is held,
-- and setting to False causes the lock to be released.
, lockerTimeoutDisable :: TMVar ()
-- ^ Until this is filled, the lock will be subject to timeout.
-- Once filled the lock will remain held until explicitly dropped.
}
mkLocker :: (IO (Maybe a)) -> (a -> IO ()) -> IO (Maybe (Locker, LockID))
mkLocker lock unlock = do
lv <- newEmptyTMVarIO
timeoutdisablev <- newEmptyTMVarIO
let setlocked = putTMVar lv
locktid <- async $ lock >>= \case
Nothing ->
atomically $ setlocked False
Just st -> do
atomically $ setlocked True
atomically $ do
v <- takeTMVar lv
if v
then retry
else setlocked False
unlock st
locksuccess <- atomically $ readTMVar lv
if locksuccess
then do
timeouttid <- async $ do
threadDelaySeconds $ Seconds $ fromIntegral $
durationSeconds p2pDefaultLockContentRetentionDuration
atomically (tryReadTMVar timeoutdisablev) >>= \case
Nothing -> void $ atomically $
writeTMVar lv False
Just () -> noop
tid <- async $ do
wait locktid
cancel timeouttid
lckid <- B64UUID <$> genUUID
return (Just (Locker tid lv timeoutdisablev, lckid))
else do
wait locktid
return Nothing
storeLock :: LockID -> Locker -> P2PHttpServerState -> IO ()
storeLock lckid locker st = atomically $ do
m <- takeTMVar (openLocks st)
let !m' = M.insert lckid locker m
putTMVar (openLocks st) m'
keepingLocked :: LockID -> P2PHttpServerState -> IO ()
keepingLocked lckid st = do
m <- atomically $ readTMVar (openLocks st)
case M.lookup lckid m of
Nothing -> return ()
Just locker ->
atomically $ void $
tryPutTMVar (lockerTimeoutDisable locker) ()
dropLock :: LockID -> P2PHttpServerState -> IO ()
dropLock lckid st = do
v <- atomically $ do
m <- takeTMVar (openLocks st)
let (mlocker, !m') =
M.updateLookupWithKey (\_ _ -> Nothing) lckid m
putTMVar (openLocks st) m'
case mlocker of
Nothing -> return Nothing
-- Signal to the locker's thread that it can
-- release the lock.
Just locker -> do
_ <- swapTMVar (lockerVar locker) False
return (Just locker)
case v of
Nothing -> return ()
Just locker -> wait (lockerThread locker)
getAnnexWorkerPool :: (AnnexWorkerPool -> Annex a) -> Annex a
getAnnexWorkerPool a = startConcurrency transferStages $
Annex.getState Annex.workers >>= \case
Nothing -> giveup "Use -Jn or set annex.jobs to configure the number of worker threads."
Just wp -> a wp
inAnnexWorker :: P2PHttpServerState -> Annex a -> IO (Either SomeException a)
inAnnexWorker st = inAnnexWorker' (annexWorkerPool st)
inAnnexWorker' :: AnnexWorkerPool -> Annex a -> IO (Either SomeException a)
inAnnexWorker' poolv annexaction = do
(workerstrd, workerstage) <- atomically $ waitStartWorkerSlot poolv
resv <- newEmptyTMVarIO
aid <- async $ do
(res, strd) <- Annex.run workerstrd annexaction
atomically $ putTMVar resv res
return strd
atomically $ do
pool <- takeTMVar poolv
let !pool' = addWorkerPool (ActiveWorker aid workerstage) pool
putTMVar poolv pool'
(res, workerstrd') <- waitCatch aid >>= \case
Right strd -> do
r <- atomically $ takeTMVar resv
return (Right r, strd)
Left err -> return (Left err, workerstrd)
atomically $ do
pool <- takeTMVar poolv
let !pool' = deactivateWorker pool aid workerstrd'
putTMVar poolv pool'
return res
data ProxyConnection = ProxyConnection
{ proxyConnectionRemoteUUID :: UUID
, proxyConnectionSelector :: Proxy.ProxySelector
, proxyConnectionCloser :: Annex ()
, proxyConnectionConcurrency :: Proxy.ConcurrencyConfig
, proxyConnectionLastUsed :: POSIXTime
}
instance Show ProxyConnection where
show pc = unwords
[ "ProxyConnection"
, show (proxyConnectionRemoteUUID pc)
, show (proxyConnectionLastUsed pc)
]
openedProxyConnection
:: UUID
-> Proxy.ProxySelector
-> Annex ()
-> Proxy.ConcurrencyConfig
-> IO ProxyConnection
openedProxyConnection u selector closer concurrency = do
now <- getPOSIXTime
return $ ProxyConnection u selector closer concurrency now
openProxyConnectionToRemote
:: AnnexWorkerPool
-> P2P.ProtocolVersion
-> P2P.Bypass
-> Remote
-> IO (Either SomeException ProxyConnection)
openProxyConnectionToRemote workerpool clientmaxversion bypass remote =
inAnnexWorker' workerpool $ do
remoteside <- proxyRemoteSide clientmaxversion bypass remote
concurrencyconfig <- Proxy.noConcurrencyConfig
liftIO $ openedProxyConnection (Remote.uuid remote)
(Proxy.singleProxySelector remoteside)
(Proxy.closeRemoteSide remoteside)
concurrencyconfig
type ClusterConcurrency = Int
openProxyConnectionToCluster
:: AnnexWorkerPool
-> P2P.ProtocolVersion
-> P2P.Bypass
-> ClusterUUID
-> ClusterConcurrency
-> IO (Either SomeException ProxyConnection)
openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid concurrency =
inAnnexWorker' workerpool $ do
(proxyselector, closenodes) <-
clusterProxySelector clusteruuid clientmaxversion bypass
concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency
liftIO $ openedProxyConnection (fromClusterUUID clusteruuid)
proxyselector closenodes concurrencyconfig
type ProxyConnectionPool = (Integer, M.Map ProxyConnectionPoolKey [ProxyConnection])
type ProxyConnectionPoolSize = Integer
-- Returns any older ProxyConnection that was evicted from the pool.
putProxyConnectionPool
:: TMVar ProxyConnectionPool
-> ProxyConnectionPoolSize
-> ConnectionParams
-> ProxyConnection
-> STM (Maybe ProxyConnection)
putProxyConnectionPool proxypool maxsz connparams conn = do
(sz, m) <- takeTMVar proxypool
let ((sz', m'), evicted) = case M.lookup k m of
Nothing -> ((succ sz, M.insert k [conn] m), Nothing)
Just [] -> ((succ sz, M.insert k [conn] m), Nothing)
Just cs -> if sz >= maxsz
then ((sz, M.insert k (conn : dropFromEnd 1 cs) m), lastMaybe cs)
else ((sz, M.insert k (conn : cs) m), Nothing)
let ((sz'', m''), evicted') = if sz' > maxsz
then removeOldestProxyConnectionPool (sz', m')
else ((sz', m'), Nothing)
putTMVar proxypool (sz'', m'')
return (evicted <|> evicted')
where
k = proxyConnectionPoolKey connparams
removeOldestProxyConnectionPool :: ProxyConnectionPool -> (ProxyConnectionPool, Maybe ProxyConnection)
removeOldestProxyConnectionPool (sz, m) =
((pred sz, m'), snd <$> headMaybe l)
where
m' = M.fromListWith (++) $ map (\(k', v) -> (k', [v])) (drop 1 l)
l = sortOn (proxyConnectionLastUsed . snd) $
concatMap (\(k', pl) -> map (k', ) pl) $
M.toList m
getProxyConnectionPool
:: TMVar ProxyConnectionPool
-> ConnectionParams
-> STM (Maybe ProxyConnection)
getProxyConnectionPool proxypool connparams = do
(sz, m) <- takeTMVar proxypool
case M.lookup k m of
Just (c:cs) -> do
putTMVar proxypool (sz-1, M.insert k cs m)
return (Just c)
_ -> do
putTMVar proxypool (sz, m)
return Nothing
where
k = proxyConnectionPoolKey connparams
type ProxyConnectionPoolKey = (UUID, UUID, [UUID], P2P.ProtocolVersion)
proxyConnectionPoolKey :: ConnectionParams -> ProxyConnectionPoolKey
proxyConnectionPoolKey connparams =
( connectionServerUUID connparams
, connectionClientUUID connparams
, connectionBypass connparams
, connectionProtocolVersion connparams
)

398
P2P/Http/Types.hs Normal file
View file

@ -0,0 +1,398 @@
{- P2P protocol over HTTP,
- data types for servant not including the servant API
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE CPP #-}
module P2P.Http.Types where
import Annex.Common
import qualified P2P.Protocol as P2P
import Utility.MonotonicClock
#ifdef WITH_SERVANT
import Servant
import Data.Aeson hiding (Key)
import Text.Read (readMaybe)
#endif
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.ByteString as B
import Codec.Binary.Base64Url as B64
import Data.Char
import Control.DeepSeq
import GHC.Generics (Generic)
data V3 = V3 deriving (Show)
data V2 = V2 deriving (Show)
data V1 = V1 deriving (Show)
data V0 = V0 deriving (Show)
class APIVersion v where
protocolVersion :: v -> P2P.ProtocolVersion
instance APIVersion V3 where protocolVersion _ = P2P.ProtocolVersion 3
instance APIVersion V2 where protocolVersion _ = P2P.ProtocolVersion 2
instance APIVersion V1 where protocolVersion _ = P2P.ProtocolVersion 1
instance APIVersion V0 where protocolVersion _ = P2P.ProtocolVersion 0
-- Keys, UUIDs, and filenames can be base64 encoded since Servant uses
-- Text and so needs UTF-8.
newtype B64Key = B64Key Key
deriving (Show)
newtype B64FilePath = B64FilePath RawFilePath
deriving (Show)
associatedFileToB64FilePath :: AssociatedFile -> Maybe B64FilePath
associatedFileToB64FilePath (AssociatedFile Nothing) = Nothing
associatedFileToB64FilePath (AssociatedFile (Just f)) = Just (B64FilePath f)
b64FilePathToAssociatedFile :: Maybe B64FilePath -> AssociatedFile
b64FilePathToAssociatedFile Nothing = AssociatedFile Nothing
b64FilePathToAssociatedFile (Just (B64FilePath f)) = AssociatedFile (Just f)
newtype B64UUID t = B64UUID { fromB64UUID :: UUID }
deriving (Show, Ord, Eq, Generic, NFData)
encodeB64Text :: B.ByteString -> T.Text
encodeB64Text b = case TE.decodeUtf8' b of
Right t
| (snd <$> B.unsnoc b) == Just closebracket
&& (fst <$> B.uncons b) == Just openbracket ->
b64wrapped
| otherwise -> t
Left _ -> b64wrapped
where
b64wrapped = TE.decodeUtf8Lenient $ "[" <> B64.encode b <> "]"
openbracket = fromIntegral (ord '[')
closebracket = fromIntegral (ord ']')
decodeB64Text :: T.Text -> Either T.Text B.ByteString
decodeB64Text t =
case T.unsnoc t of
Just (t', lastc) | lastc == ']' ->
case T.uncons t' of
Just (firstc, t'') | firstc == '[' ->
case B64.decode (TE.encodeUtf8 t'') of
Right b -> Right b
Left _ -> Left "unable to base64 decode [] wrapped value"
_ -> Right (TE.encodeUtf8 t)
_ -> Right (TE.encodeUtf8 t)
-- Phantom types.
data ClientSide
data ServerSide
data Bypass
data Plus
data Lock
type LockID = B64UUID Lock
newtype DataLength = DataLength Integer
deriving (Show)
newtype CheckPresentResult = CheckPresentResult Bool
deriving (Show)
newtype RemoveResult = RemoveResult Bool
deriving (Show)
data RemoveResultPlus = RemoveResultPlus Bool [B64UUID Plus]
deriving (Show)
newtype GetTimestampResult = GetTimestampResult Timestamp
deriving (Show)
newtype PutResult = PutResult Bool
deriving (Eq, Show)
data PutResultPlus = PutResultPlus Bool [B64UUID Plus]
deriving (Show)
data PutOffsetResult
= PutOffsetResult Offset
| PutOffsetResultAlreadyHave
deriving (Show)
data PutOffsetResultPlus
= PutOffsetResultPlus Offset
| PutOffsetResultAlreadyHavePlus [B64UUID Plus]
deriving (Show, Generic, NFData)
newtype Offset = Offset P2P.Offset
deriving (Show, Generic, NFData)
newtype Timestamp = Timestamp MonotonicTimestamp
deriving (Show)
data LockResult = LockResult Bool (Maybe LockID)
deriving (Show, Generic, NFData)
newtype UnlockRequest = UnlockRequest Bool
deriving (Show, Generic, NFData)
-- Not using servant's built-in basic authentication support,
-- because whether authentication is needed depends on server
-- configuration.
data Auth = Auth B.ByteString B.ByteString
deriving (Show, Generic, NFData, Eq, Ord)
#ifdef WITH_SERVANT
instance ToHttpApiData Auth where
toHeader (Auth u p) = "Basic " <> B64.encode (u <> ":" <> p)
toUrlPiece = TE.decodeUtf8Lenient . toHeader
instance FromHttpApiData Auth where
parseHeader h =
let (b, rest) = B.break (isSpace . chr . fromIntegral) h
in if map toLower (decodeBS b) == "basic"
then case B64.decode (B.dropWhile (isSpace . chr . fromIntegral) rest) of
Right v -> case B.split (fromIntegral (ord ':')) v of
(u:ps) -> Right $
Auth u (B.intercalate ":" ps)
_ -> bad
Left _ -> bad
else bad
where
bad = Left "invalid basic auth header"
parseUrlPiece = parseHeader . encodeBS . T.unpack
newtype ConnectionKeepAlive = ConnectionKeepAlive T.Text
connectionKeepAlive :: ConnectionKeepAlive
connectionKeepAlive = ConnectionKeepAlive "Keep-Alive"
newtype KeepAlive = KeepAlive T.Text
keepAlive :: KeepAlive
keepAlive = KeepAlive "timeout=1200"
instance ToHttpApiData ConnectionKeepAlive where
toUrlPiece (ConnectionKeepAlive t) = t
instance FromHttpApiData ConnectionKeepAlive where
parseUrlPiece = Right . ConnectionKeepAlive
instance ToHttpApiData KeepAlive where
toUrlPiece (KeepAlive t) = t
instance FromHttpApiData KeepAlive where
parseUrlPiece = Right . KeepAlive
instance ToHttpApiData V3 where toUrlPiece _ = "v3"
instance ToHttpApiData V2 where toUrlPiece _ = "v2"
instance ToHttpApiData V1 where toUrlPiece _ = "v1"
instance ToHttpApiData V0 where toUrlPiece _ = "v0"
instance FromHttpApiData V3 where parseUrlPiece = parseAPIVersion V3 "v3"
instance FromHttpApiData V2 where parseUrlPiece = parseAPIVersion V2 "v2"
instance FromHttpApiData V1 where parseUrlPiece = parseAPIVersion V1 "v1"
instance FromHttpApiData V0 where parseUrlPiece = parseAPIVersion V0 "v0"
parseAPIVersion :: v -> T.Text -> T.Text -> Either T.Text v
parseAPIVersion v need t
| t == need = Right v
| otherwise = Left "bad version"
instance ToHttpApiData B64Key where
toUrlPiece (B64Key k) = encodeB64Text (serializeKey' k)
instance FromHttpApiData B64Key where
parseUrlPiece t = case decodeB64Text t of
Right b -> maybe (Left "key parse error") (Right . B64Key)
(deserializeKey' b)
Left err -> Left err
instance ToHttpApiData (B64UUID t) where
toUrlPiece (B64UUID u) = encodeB64Text (fromUUID u)
instance FromHttpApiData (B64UUID t) where
parseUrlPiece t = case decodeB64Text t of
Right b -> case toUUID b of
u@(UUID _) -> Right (B64UUID u)
NoUUID -> Left "empty UUID"
Left err -> Left err
instance ToHttpApiData B64FilePath where
toUrlPiece (B64FilePath f) = encodeB64Text f
instance FromHttpApiData B64FilePath where
parseUrlPiece t = case decodeB64Text t of
Right b -> Right (B64FilePath b)
Left err -> Left err
instance ToHttpApiData Offset where
toUrlPiece (Offset (P2P.Offset n)) = T.pack (show n)
instance FromHttpApiData Offset where
parseUrlPiece t = case readMaybe (T.unpack t) of
Nothing -> Left "offset parse error"
Just n -> Right (Offset (P2P.Offset n))
instance ToHttpApiData Timestamp where
toUrlPiece (Timestamp (MonotonicTimestamp n)) = T.pack (show n)
instance FromHttpApiData Timestamp where
parseUrlPiece t = case readMaybe (T.unpack t) of
Nothing -> Left "timestamp parse error"
Just n -> Right (Timestamp (MonotonicTimestamp n))
instance ToHttpApiData DataLength where
toUrlPiece (DataLength n) = T.pack (show n)
instance FromHttpApiData DataLength where
parseUrlPiece t = case readMaybe (T.unpack t) of
Nothing -> Left "X-git-annex-data-length parse error"
Just n -> Right (DataLength n)
instance ToJSON PutResult where
toJSON (PutResult b) =
object ["stored" .= b]
instance FromJSON PutResult where
parseJSON = withObject "PutResult" $ \v -> PutResult
<$> v .: "stored"
instance ToJSON PutResultPlus where
toJSON (PutResultPlus b us) = object
[ "stored" .= b
, "plusuuids" .= plusList us
]
instance FromJSON PutResultPlus where
parseJSON = withObject "PutResultPlus" $ \v -> PutResultPlus
<$> v .: "stored"
<*> v .: "plusuuids"
instance ToJSON CheckPresentResult where
toJSON (CheckPresentResult b) = object
["present" .= b]
instance FromJSON CheckPresentResult where
parseJSON = withObject "CheckPresentResult" $ \v -> CheckPresentResult
<$> v .: "present"
instance ToJSON RemoveResult where
toJSON (RemoveResult b) = object
["removed" .= b]
instance FromJSON RemoveResult where
parseJSON = withObject "RemoveResult" $ \v -> RemoveResult
<$> v .: "removed"
instance ToJSON RemoveResultPlus where
toJSON (RemoveResultPlus b us) = object
[ "removed" .= b
, "plusuuids" .= plusList us
]
instance FromJSON RemoveResultPlus where
parseJSON = withObject "RemoveResultPlus" $ \v -> RemoveResultPlus
<$> v .: "removed"
<*> v .: "plusuuids"
instance ToJSON GetTimestampResult where
toJSON (GetTimestampResult (Timestamp (MonotonicTimestamp t))) = object
["timestamp" .= t]
instance FromJSON GetTimestampResult where
parseJSON = withObject "GetTimestampResult" $ \v ->
GetTimestampResult . Timestamp . MonotonicTimestamp
<$> v .: "timestamp"
instance ToJSON PutOffsetResult where
toJSON (PutOffsetResult (Offset (P2P.Offset o))) = object
["offset" .= o]
toJSON PutOffsetResultAlreadyHave = object
["alreadyhave" .= True]
instance FromJSON PutOffsetResult where
parseJSON = withObject "PutOffsetResult" $ \v ->
(PutOffsetResult
<$> (Offset . P2P.Offset <$> v .: "offset"))
<|> (mkalreadyhave
<$> (v .: "alreadyhave"))
where
mkalreadyhave :: Bool -> PutOffsetResult
mkalreadyhave _ = PutOffsetResultAlreadyHave
instance ToJSON PutOffsetResultPlus where
toJSON (PutOffsetResultPlus (Offset (P2P.Offset o))) = object
[ "offset" .= o ]
toJSON (PutOffsetResultAlreadyHavePlus us) = object
[ "alreadyhave" .= True
, "plusuuids" .= plusList us
]
instance FromJSON PutOffsetResultPlus where
parseJSON = withObject "PutOffsetResultPlus" $ \v ->
(PutOffsetResultPlus
<$> (Offset . P2P.Offset <$> v .: "offset"))
<|> (mkalreadyhave
<$> (v .: "alreadyhave")
<*> (v .: "plusuuids"))
where
mkalreadyhave :: Bool -> [B64UUID Plus] -> PutOffsetResultPlus
mkalreadyhave _ us = PutOffsetResultAlreadyHavePlus us
instance FromJSON (B64UUID t) where
parseJSON (String t) = case decodeB64Text t of
Right s -> pure (B64UUID (toUUID s))
Left _ -> mempty
parseJSON _ = mempty
instance ToJSON LockResult where
toJSON (LockResult v (Just (B64UUID lck))) = object
[ "locked" .= v
, "lockid" .= encodeB64Text (fromUUID lck)
]
toJSON (LockResult v Nothing) = object
[ "locked" .= v
]
instance FromJSON LockResult where
parseJSON = withObject "LockResult" $ \v -> LockResult
<$> v .: "locked"
<*> v .:? "lockid"
instance ToJSON UnlockRequest where
toJSON (UnlockRequest v) = object
["unlock" .= v]
instance FromJSON UnlockRequest where
parseJSON = withObject "UnlockRequest" $ \v -> UnlockRequest
<$> v .: "unlock"
plusList :: [B64UUID Plus] -> [String]
plusList = map (\(B64UUID u) -> fromUUID u)
class PlusClass plus unplus where
dePlus :: plus -> unplus
plus :: unplus -> plus
instance PlusClass RemoveResultPlus RemoveResult where
dePlus (RemoveResultPlus b _) = RemoveResult b
plus (RemoveResult b) = RemoveResultPlus b mempty
instance PlusClass PutResultPlus PutResult where
dePlus (PutResultPlus b _) = PutResult b
plus (PutResult b) = PutResultPlus b mempty
instance PlusClass PutOffsetResultPlus PutOffsetResult where
dePlus (PutOffsetResultPlus o) = PutOffsetResult o
dePlus (PutOffsetResultAlreadyHavePlus _) = PutOffsetResultAlreadyHave
plus (PutOffsetResult o) = PutOffsetResultPlus o
plus PutOffsetResultAlreadyHave = PutOffsetResultAlreadyHavePlus []
#endif

85
P2P/Http/Url.hs Normal file
View file

@ -0,0 +1,85 @@
{- P2P protocol over HTTP, urls
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module P2P.Http.Url where
import Types.UUID
import Utility.FileSystemEncoding
import Utility.PartialPrelude
import Data.List
import Network.URI
import System.FilePath.Posix as P
import qualified Data.UUID as UUID
#ifdef WITH_SERVANT
import Servant.Client (BaseUrl(..), Scheme(..))
import Text.Read
#endif
defaultP2PHttpProtocolPort :: Int
defaultP2PHttpProtocolPort = 9417 -- Git protocol is 9418
isP2PHttpProtocolUrl :: String -> Bool
isP2PHttpProtocolUrl s =
"annex+http://" `isPrefixOf` s ||
"annex+https://" `isPrefixOf` s
data P2PHttpUrl = P2PHttpUrl
{ p2pHttpUrlString :: String
#ifdef WITH_SERVANT
, p2pHttpBaseUrl :: BaseUrl
#endif
}
deriving (Show)
parseP2PHttpUrl :: String -> Maybe P2PHttpUrl
parseP2PHttpUrl us
| isP2PHttpProtocolUrl us = case parseURI (drop prefixlen us) of
Nothing -> Nothing
Just u ->
#ifdef WITH_SERVANT
case uriScheme u of
"http:" -> mkbaseurl Http u
"https:" -> mkbaseurl Https u
_ -> Nothing
#else
Just $ P2PHttpUrl us
#endif
| otherwise = Nothing
where
prefixlen = length "annex+"
#ifdef WITH_SERVANT
mkbaseurl s u = do
auth <- uriAuthority u
port <- if null (uriPort auth)
then Just defaultP2PHttpProtocolPort
else readMaybe (dropWhile (== ':') (uriPort auth))
return $ P2PHttpUrl us $ BaseUrl
{ baseUrlScheme = s
, baseUrlHost = uriRegName auth
, baseUrlPath = basepath u
, baseUrlPort = port
}
-- The servant server uses urls that start with "/git-annex/",
-- and so the servant client adds that to the base url. So remove
-- it from the url that the user provided. However, it may not be
-- present, eg if some other server is speaking the git-annex
-- protocol. The UUID is also removed from the end of the url.
basepath u = case reverse $ P.splitDirectories (uriPath u) of
("git-annex":"/":rest) -> P.joinPath (reverse rest)
rest -> P.joinPath (reverse rest)
#endif
unavailableP2PHttpUrl :: P2PHttpUrl -> P2PHttpUrl
unavailableP2PHttpUrl p = p
#ifdef WITH_SERVANT
{ p2pHttpBaseUrl = (p2pHttpBaseUrl p) { baseUrlHost = "!dne!" } }
#endif

View file

@ -25,6 +25,7 @@ module P2P.IO
, describeProtoFailure , describeProtoFailure
, runNetProto , runNetProto
, runNet , runNet
, signalFullyConsumedByteString
) where ) where
import Common import Common
@ -62,6 +63,7 @@ data ProtoFailure
= ProtoFailureMessage String = ProtoFailureMessage String
| ProtoFailureException SomeException | ProtoFailureException SomeException
| ProtoFailureIOError IOError | ProtoFailureIOError IOError
deriving (Show)
describeProtoFailure :: ProtoFailure -> String describeProtoFailure :: ProtoFailure -> String
describeProtoFailure (ProtoFailureMessage s) = s describeProtoFailure (ProtoFailureMessage s) = s
@ -79,7 +81,17 @@ mkRunState mk = do
data P2PHandle data P2PHandle
= P2PHandle Handle = P2PHandle Handle
| P2PHandleTMVar (TMVar (Either L.ByteString Message)) (TMVar ()) | P2PHandleTMVar
(TMVar (Either L.ByteString Message))
(Maybe (TMVar ()))
(TMVar ())
signalFullyConsumedByteString :: P2PHandle -> IO ()
signalFullyConsumedByteString (P2PHandle _) = return ()
signalFullyConsumedByteString (P2PHandleTMVar _ Nothing _) = return ()
signalFullyConsumedByteString (P2PHandleTMVar _ (Just waitv) closedv) =
atomically $ putTMVar waitv ()
`orElse` readTMVar closedv
data P2PConnection = P2PConnection data P2PConnection = P2PConnection
{ connRepo :: Maybe Repo { connRepo :: Maybe Repo
@ -91,6 +103,7 @@ data P2PConnection = P2PConnection
-- Identifier for a connection, only used for debugging. -- Identifier for a connection, only used for debugging.
newtype ConnIdent = ConnIdent (Maybe String) newtype ConnIdent = ConnIdent (Maybe String)
deriving (Show)
data ClosableConnection conn data ClosableConnection conn
= OpenConnection conn = OpenConnection conn
@ -138,7 +151,8 @@ closeConnection conn = do
closehandle (connOhdl conn) closehandle (connOhdl conn)
where where
closehandle (P2PHandle h) = hClose h closehandle (P2PHandle h) = hClose h
closehandle (P2PHandleTMVar _ _) = return () closehandle (P2PHandleTMVar _ _ closedv) =
atomically $ void $ tryPutTMVar closedv ()
-- Serves the protocol on a unix socket. -- Serves the protocol on a unix socket.
-- --
@ -188,11 +202,6 @@ runNetProto runst conn = go
go (Free (Local _)) = return $ Left $ go (Free (Local _)) = return $ Left $
ProtoFailureMessage "unexpected annex operation attempted" ProtoFailureMessage "unexpected annex operation attempted"
data P2PTMVarException = P2PTMVarException String
deriving (Show)
instance Exception P2PTMVarException
-- Interpreter of the Net part of Proto. -- Interpreter of the Net part of Proto.
-- --
-- An interpreter of Proto has to be provided, to handle the rest of Proto -- An interpreter of Proto has to be provided, to handle the rest of Proto
@ -206,18 +215,15 @@ runNet runst conn runner f = case f of
P2PHandle h -> tryNonAsync $ do P2PHandle h -> tryNonAsync $ do
hPutStrLn h $ unwords (formatMessage m) hPutStrLn h $ unwords (formatMessage m)
hFlush h hFlush h
P2PHandleTMVar mv _ -> P2PHandleTMVar mv _ closedv -> tryNonAsync $
ifM (atomically (tryPutTMVar mv (Right m))) atomically $ putTMVar mv (Right m)
( return $ Right () `orElse` readTMVar closedv
, return $ Left $ toException $
P2PTMVarException "TMVar left full"
)
case v of case v of
Left e -> return $ Left $ ProtoFailureException e Left e -> return $ Left $ ProtoFailureException e
Right () -> runner next Right () -> runner next
ReceiveMessage next -> ReceiveMessage next ->
let protoerr = return $ Left $ let protoerr = return $ Left $
ProtoFailureMessage "protocol error 1" ProtoFailureMessage "protocol error"
gotmessage m = do gotmessage m = do
liftIO $ debugMessage conn "P2P <" m liftIO $ debugMessage conn "P2P <" m
runner (next (Just m)) runner (next (Just m))
@ -230,10 +236,13 @@ runNet runst conn runner f = case f of
Right (Just l) -> case parseMessage l of Right (Just l) -> case parseMessage l of
Just m -> gotmessage m Just m -> gotmessage m
Nothing -> runner (next Nothing) Nothing -> runner (next Nothing)
P2PHandleTMVar mv _ -> P2PHandleTMVar mv _ closedv -> do
liftIO (atomically (takeTMVar mv)) >>= \case let recv = (Just <$> takeTMVar mv)
Right m -> gotmessage m `orElse` (readTMVar closedv >> return Nothing)
Left _b -> protoerr liftIO (atomically recv) >>= \case
Just (Right m) -> gotmessage m
Just (Left _b) -> protoerr
Nothing -> runner (next Nothing)
SendBytes len b p next -> SendBytes len b p next ->
case connOhdl conn of case connOhdl conn of
P2PHandle h -> do P2PHandle h -> do
@ -246,11 +255,16 @@ runNet runst conn runner f = case f of
Right False -> return $ Left $ Right False -> return $ Left $
ProtoFailureMessage "short data write" ProtoFailureMessage "short data write"
Left e -> return $ Left $ ProtoFailureException e Left e -> return $ Left $ ProtoFailureException e
P2PHandleTMVar mv waitv -> do P2PHandleTMVar mv waitv closedv -> do
liftIO $ atomically $ putTMVar mv (Left b) liftIO $ atomically $ putTMVar mv (Left b)
-- Wait for the whole bytestring to be `orElse` readTMVar closedv
-- processed. Necessary due to lazyiness. -- Wait for the whole bytestring to
liftIO $ atomically $ takeTMVar waitv -- be processed.
case waitv of
Nothing -> noop
Just v -> liftIO $ atomically $
takeTMVar v
`orElse` readTMVar closedv
runner next runner next
ReceiveBytes len p next -> ReceiveBytes len p next ->
case connIhdl conn of case connIhdl conn of
@ -260,11 +274,15 @@ runNet runst conn runner f = case f of
Right b -> runner (next b) Right b -> runner (next b)
Left e -> return $ Left $ Left e -> return $ Left $
ProtoFailureException e ProtoFailureException e
P2PHandleTMVar mv _ -> P2PHandleTMVar mv _ closedv -> do
liftIO (atomically (takeTMVar mv)) >>= \case let recv = (Just <$> takeTMVar mv)
Left b -> runner (next b) `orElse` (readTMVar closedv >> return Nothing)
Right _ -> return $ Left $ liftIO (atomically recv) >>= \case
ProtoFailureMessage "protocol error 2" Just (Left b) -> runner (next b)
Just (Right _) -> return $ Left $
ProtoFailureMessage "protocol error"
Nothing -> return $ Left $
ProtoFailureMessage "connection closed"
CheckAuthToken _u t next -> do CheckAuthToken _u t next -> do
let authed = connCheckAuth conn t let authed = connCheckAuth conn t
runner (next authed) runner (next authed)
@ -317,12 +335,16 @@ debugMessage conn prefix m = do
-- Must avoid sending too many bytes as it would confuse the other end. -- Must avoid sending too many bytes as it would confuse the other end.
-- This is easily dealt with by truncating it. -- This is easily dealt with by truncating it.
-- --
-- However, the whole ByteString will be evaluated here, even if
-- the end of it does not get sent.
--
-- If too few bytes are sent, the only option is to give up on this -- If too few bytes are sent, the only option is to give up on this
-- connection. False is returned to indicate this problem. -- connection. False is returned to indicate this problem.
sendExactly :: Len -> L.ByteString -> Handle -> MeterUpdate -> IO Bool sendExactly :: Len -> L.ByteString -> Handle -> MeterUpdate -> IO Bool
sendExactly (Len n) b h p = do sendExactly (Len n) b h p = do
sent <- meteredWrite' p (B.hPut h) (L.take (fromIntegral n) b) let (x, y) = L.splitAt (fromIntegral n) b
return (fromBytesProcessed sent == n) sent <- meteredWrite' p (B.hPut h) x
L.length y `seq` return (fromBytesProcessed sent == n)
receiveExactly :: Len -> Handle -> MeterUpdate -> IO L.ByteString receiveExactly :: Len -> Handle -> MeterUpdate -> IO L.ByteString
receiveExactly (Len n) h p = hGetMetered h (Just n) p receiveExactly (Len n) h p = hGetMetered h (Just n) p

View file

@ -42,12 +42,14 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S import qualified Data.Set as S
import Data.Char import Data.Char
import Data.Maybe
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
import Control.Applicative import Control.Applicative
import Control.DeepSeq
import Prelude import Prelude
newtype Offset = Offset Integer newtype Offset = Offset Integer
deriving (Show) deriving (Show, Eq, NFData, Num, Real, Ord, Enum, Integral)
newtype Len = Len Integer newtype Len = Len Integer
deriving (Show) deriving (Show)
@ -61,6 +63,15 @@ defaultProtocolVersion = ProtocolVersion 0
maxProtocolVersion :: ProtocolVersion maxProtocolVersion :: ProtocolVersion
maxProtocolVersion = ProtocolVersion 3 maxProtocolVersion = ProtocolVersion 3
-- In order from newest to oldest.
allProtocolVersions :: [ProtocolVersion]
allProtocolVersions =
[ ProtocolVersion 3
, ProtocolVersion 2
, ProtocolVersion 1
, ProtocolVersion 0
]
newtype ProtoAssociatedFile = ProtoAssociatedFile AssociatedFile newtype ProtoAssociatedFile = ProtoAssociatedFile AssociatedFile
deriving (Show) deriving (Show)
@ -250,9 +261,9 @@ data NetF c
-- ^ Sends exactly Len bytes of data. (Any more or less will -- ^ Sends exactly Len bytes of data. (Any more or less will
-- confuse the receiver.) -- confuse the receiver.)
| ReceiveBytes Len MeterUpdate (L.ByteString -> c) | ReceiveBytes Len MeterUpdate (L.ByteString -> c)
-- ^ Lazily reads bytes from peer. Stops once Len are read, -- ^ Streams bytes from peer. Stops once Len are read,
-- or if connection is lost, and in either case returns the bytes -- or if connection is lost. This allows resuming
-- that were read. This allows resuming interrupted transfers. -- interrupted transfers.
| CheckAuthToken UUID AuthToken (Bool -> c) | CheckAuthToken UUID AuthToken (Bool -> c)
| RelayService Service c | RelayService Service c
-- ^ Runs a service, relays its output to the peer, and data -- ^ Runs a service, relays its output to the peer, and data
@ -308,6 +319,10 @@ data LocalF c
-- content been transferred. -- content been transferred.
| StoreContentTo FilePath (Maybe IncrementalVerifier) Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c) | StoreContentTo FilePath (Maybe IncrementalVerifier) Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c)
-- ^ Like StoreContent, but stores the content to a temp file. -- ^ Like StoreContent, but stores the content to a temp file.
| SendContentWith (L.ByteString -> Annex (Maybe Validity -> Annex Bool)) (Proto L.ByteString) (Proto (Maybe Validity)) (Bool -> c)
-- ^ Reads content from the Proto L.ByteString and sends it to the
-- callback. The callback must consume the whole lazy ByteString,
-- before it returns a validity checker.
| SetPresent Key UUID c | SetPresent Key UUID c
| CheckContentPresent Key (Bool -> c) | CheckContentPresent Key (Bool -> c)
-- ^ Checks if the whole content of the key is locally present. -- ^ Checks if the whole content of the key is locally present.
@ -362,7 +377,7 @@ negotiateProtocolVersion preferredversion = do
case r of case r of
Just (VERSION v) -> net $ setProtocolVersion v Just (VERSION v) -> net $ setProtocolVersion v
-- Old server doesn't know about the VERSION command. -- Old server doesn't know about the VERSION command.
Just (ERROR _) -> return () Just (ERROR _) -> net $ setProtocolVersion (ProtocolVersion 0)
_ -> net $ sendMessage (ERROR "expected VERSION") _ -> net $ sendMessage (ERROR "expected VERSION")
sendBypass :: Bypass -> Proto () sendBypass :: Bypass -> Proto ()
@ -414,6 +429,26 @@ remove proof key =
net $ sendMessage (REMOVE key) net $ sendMessage (REMOVE key)
checkSuccessFailurePlus checkSuccessFailurePlus
getTimestamp :: Proto (Either String MonotonicTimestamp)
getTimestamp = do
net $ sendMessage GETTIMESTAMP
net receiveMessage >>= \case
Just (TIMESTAMP ts) -> return (Right ts)
Just (ERROR err) -> return (Left err)
_ -> do
net $ sendMessage (ERROR "expected TIMESTAMP")
return (Left "protocol error")
removeBefore :: POSIXTime -> Key -> Proto (Either String Bool, Maybe [UUID])
removeBefore endtime key = getTimestamp >>= \case
Right remotetime ->
canRemoveBefore endtime remotetime (local getLocalCurrentTime) >>= \case
Just remoteendtime ->
removeBeforeRemoteEndTime remoteendtime key
Nothing ->
return (Right False, Nothing)
Left err -> return (Left err, Nothing)
{- The endtime is the last local time at which the key can be removed. {- The endtime is the last local time at which the key can be removed.
- To tell the remote how long it has to remove the key, get its current - To tell the remote how long it has to remove the key, get its current
- timestamp, and add to it the number of seconds from the current local - timestamp, and add to it the number of seconds from the current local
@ -424,25 +459,21 @@ remove proof key =
- response from the remote, that is reflected in the local time, and so - response from the remote, that is reflected in the local time, and so
- reduces the allowed time. - reduces the allowed time.
-} -}
removeBefore :: POSIXTime -> Key -> Proto (Either String Bool, Maybe [UUID]) canRemoveBefore :: Monad m => POSIXTime -> MonotonicTimestamp -> m POSIXTime -> m (Maybe MonotonicTimestamp)
removeBefore endtime key = do canRemoveBefore endtime remotetime getlocaltime = do
net $ sendMessage GETTIMESTAMP localtime <- getlocaltime
net receiveMessage >>= \case let timeleft = endtime - localtime
Just (TIMESTAMP remotetime) -> do let timeleft' = MonotonicTimestamp (floor timeleft)
localtime <- local getLocalCurrentTime let remoteendtime = remotetime + timeleft'
let timeleft = endtime - localtime return $ if timeleft <= 0
let timeleft' = MonotonicTimestamp (floor timeleft) then Nothing
let remoteendtime = remotetime + timeleft' else Just remoteendtime
if timeleft <= 0
then return (Right False, Nothing) removeBeforeRemoteEndTime :: MonotonicTimestamp -> Key -> Proto (Either String Bool, Maybe [UUID])
else do removeBeforeRemoteEndTime remoteendtime key = do
net $ sendMessage $ net $ sendMessage $
REMOVE_BEFORE remoteendtime key REMOVE_BEFORE remoteendtime key
checkSuccessFailurePlus checkSuccessFailurePlus
Just (ERROR err) -> return (Left err, Nothing)
_ -> do
net $ sendMessage (ERROR "expected TIMESTAMP")
return (Right False, Nothing)
get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification) get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
get dest key iv af m p = get dest key iv af m p =
@ -453,17 +484,39 @@ get dest key iv af m p =
storer = storeContentTo dest iv storer = storeContentTo dest iv
put :: Key -> AssociatedFile -> MeterUpdate -> Proto (Maybe [UUID]) put :: Key -> AssociatedFile -> MeterUpdate -> Proto (Maybe [UUID])
put key af p = do put key af p = put' key af $ \offset ->
sendContent key af Nothing offset p
put' :: Key -> AssociatedFile -> (Offset -> Proto (Maybe [UUID])) -> Proto (Maybe [UUID])
put' key af sender = do
net $ sendMessage (PUT (ProtoAssociatedFile af) key) net $ sendMessage (PUT (ProtoAssociatedFile af) key)
r <- net receiveMessage r <- net receiveMessage
case r of case r of
Just (PUT_FROM offset) -> sendContent key af Nothing offset p Just (PUT_FROM offset) -> sender offset
Just ALREADY_HAVE -> return (Just []) Just ALREADY_HAVE -> return (Just [])
Just (ALREADY_HAVE_PLUS uuids) -> return (Just uuids) Just (ALREADY_HAVE_PLUS uuids) -> return (Just uuids)
_ -> do _ -> do
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE") net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
return Nothing return Nothing
-- The protocol does not have a way to get the PUT offset
-- without sending DATA, so send an empty bytestring and indicate
-- it is not valid.
getPutOffset :: Key -> AssociatedFile -> Proto (Either [UUID] Offset)
getPutOffset key af = do
net $ sendMessage (PUT (ProtoAssociatedFile af) key)
r <- net receiveMessage
case r of
Just (PUT_FROM offset) -> do
void $ sendContent' nullMeterUpdate (Len 0) L.empty $
return Invalid
return (Right offset)
Just ALREADY_HAVE -> return (Left [])
Just (ALREADY_HAVE_PLUS uuids) -> return (Left uuids)
_ -> do
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
return (Left [])
data ServerHandler a data ServerHandler a
= ServerGot a = ServerGot a
| ServerContinue | ServerContinue
@ -471,7 +524,14 @@ data ServerHandler a
-- Server loop, getting messages from the client and handling them -- Server loop, getting messages from the client and handling them
serverLoop :: (Message -> Proto (ServerHandler a)) -> Proto (Maybe a) serverLoop :: (Message -> Proto (ServerHandler a)) -> Proto (Maybe a)
serverLoop a = do serverLoop a = serveOneMessage a serverLoop
-- Get one message from the client and handle it.
serveOneMessage
:: (Message -> Proto (ServerHandler a))
-> ((Message -> Proto (ServerHandler a)) -> Proto (Maybe a))
-> Proto (Maybe a)
serveOneMessage a cont = do
mcmd <- net receiveMessage mcmd <- net receiveMessage
case mcmd of case mcmd of
-- When the client sends ERROR to the server, the server -- When the client sends ERROR to the server, the server
@ -479,16 +539,16 @@ serverLoop a = do
-- is in, and so not possible to recover. -- is in, and so not possible to recover.
Just (ERROR _) -> return Nothing Just (ERROR _) -> return Nothing
-- When the client sends an unparsable message, the server -- When the client sends an unparsable message, the server
-- responds with an error message, and loops. This allows -- responds with an error message, and continues. This allows
-- expanding the protocol with new messages. -- expanding the protocol with new messages.
Nothing -> do Nothing -> do
net $ sendMessage (ERROR "unknown command") net $ sendMessage (ERROR "unknown command")
serverLoop a cont a
Just cmd -> do Just cmd -> do
v <- a cmd v <- a cmd
case v of case v of
ServerGot r -> return (Just r) ServerGot r -> return (Just r)
ServerContinue -> serverLoop a ServerContinue -> cont a
-- If the client sends an unexpected message, -- If the client sends an unexpected message,
-- the server will respond with ERROR, and -- the server will respond with ERROR, and
-- always continues processing messages. -- always continues processing messages.
@ -500,7 +560,7 @@ serverLoop a = do
-- support some new feature, and fall back. -- support some new feature, and fall back.
ServerUnexpected -> do ServerUnexpected -> do
net $ sendMessage (ERROR "unexpected command") net $ sendMessage (ERROR "unexpected command")
serverLoop a cont a
-- | Serve the protocol, with an unauthenticated peer. Once the peer -- | Serve the protocol, with an unauthenticated peer. Once the peer
-- successfully authenticates, returns their UUID. -- successfully authenticates, returns their UUID.
@ -525,11 +585,22 @@ data ServerMode
-- ^ Allow reading, and storing new objects, but not deleting objects. -- ^ Allow reading, and storing new objects, but not deleting objects.
| ServeReadWrite | ServeReadWrite
-- ^ Full read and write access. -- ^ Full read and write access.
deriving (Eq, Ord) deriving (Show, Eq, Ord)
-- | Serve the protocol, with a peer that has authenticated. -- | Serve the protocol, with a peer that has authenticated.
serveAuthed :: ServerMode -> UUID -> Proto () serveAuthed :: ServerMode -> UUID -> Proto ()
serveAuthed servermode myuuid = void $ serverLoop handler serveAuthed servermode myuuid = void $ serverLoop $
serverHandler servermode myuuid
-- | Serve a single command in the protocol, the same as serveAuthed,
-- but without looping to handle the next command.
serveOneCommandAuthed :: ServerMode -> UUID -> Proto ()
serveOneCommandAuthed servermode myuuid = fromMaybe () <$>
serveOneMessage (serverHandler servermode myuuid)
(const $ pure Nothing)
serverHandler :: ServerMode -> UUID -> Message -> Proto (ServerHandler ())
serverHandler servermode myuuid = handler
where where
handler (VERSION theirversion) = do handler (VERSION theirversion) = do
let v = min theirversion maxProtocolVersion let v = min theirversion maxProtocolVersion
@ -650,16 +721,21 @@ sendContent key af o offset@(Offset n) p = go =<< local (contentSize key)
else local $ readContent key af o offset $ else local $ readContent key af o offset $
sender (Len len) sender (Len len)
-- Content not available to send. Indicate this by sending -- Content not available to send. Indicate this by sending
-- empty data and indlicate it's invalid. -- empty data and indicate it's invalid.
go Nothing = sender (Len 0) L.empty (return Invalid) go Nothing = sender (Len 0) L.empty (return Invalid)
sender len content validitycheck = do
let p' = offsetMeterUpdate p (toBytesProcessed n) sender = sendContent' p'
net $ sendMessage (DATA len)
net $ sendBytes len content p' p' = offsetMeterUpdate p (toBytesProcessed n)
ver <- net getProtocolVersion
when (ver >= ProtocolVersion 1) $ sendContent' :: MeterUpdate -> Len -> L.ByteString -> Proto Validity -> Proto (Maybe [UUID])
net . sendMessage . VALIDITY =<< validitycheck sendContent' p len content validitycheck = do
checkSuccessPlus net $ sendMessage (DATA len)
net $ sendBytes len content p
ver <- net getProtocolVersion
when (ver >= ProtocolVersion 1) $
net . sendMessage . VALIDITY =<< validitycheck
checkSuccessPlus
receiveContent receiveContent
:: Observable t :: Observable t

View file

@ -45,6 +45,9 @@ data RemoteSide = RemoteSide
, remoteSideId :: RemoteSideId , remoteSideId :: RemoteSideId
} }
instance Show RemoteSide where
show rs = show (remote rs)
mkRemoteSide :: Remote -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide mkRemoteSide :: Remote -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide
mkRemoteSide r remoteconnect = RemoteSide mkRemoteSide r remoteconnect = RemoteSide
<$> pure r <$> pure r
@ -76,7 +79,6 @@ closeRemoteSide remoteside =
data ProxySelector = ProxySelector data ProxySelector = ProxySelector
{ proxyCHECKPRESENT :: Key -> Annex (Maybe RemoteSide) { proxyCHECKPRESENT :: Key -> Annex (Maybe RemoteSide)
, proxyLOCKCONTENT :: Key -> Annex (Maybe RemoteSide) , proxyLOCKCONTENT :: Key -> Annex (Maybe RemoteSide)
, proxyUNLOCKCONTENT :: Annex (Maybe RemoteSide)
, proxyREMOVE :: Key -> Annex [RemoteSide] , proxyREMOVE :: Key -> Annex [RemoteSide]
-- ^ remove from all of these remotes -- ^ remove from all of these remotes
, proxyGETTIMESTAMP :: Annex [RemoteSide] , proxyGETTIMESTAMP :: Annex [RemoteSide]
@ -91,7 +93,6 @@ singleProxySelector :: RemoteSide -> ProxySelector
singleProxySelector r = ProxySelector singleProxySelector r = ProxySelector
{ proxyCHECKPRESENT = const (pure (Just r)) { proxyCHECKPRESENT = const (pure (Just r))
, proxyLOCKCONTENT = const (pure (Just r)) , proxyLOCKCONTENT = const (pure (Just r))
, proxyUNLOCKCONTENT = pure (Just r)
, proxyREMOVE = const (pure [r]) , proxyREMOVE = const (pure [r])
, proxyGETTIMESTAMP = pure [r] , proxyGETTIMESTAMP = pure [r]
, proxyGET = const (pure (Just r)) , proxyGET = const (pure (Just r))
@ -200,86 +201,88 @@ mkProxyState = ProxyState
<$> newTVarIO mempty <$> newTVarIO mempty
<*> newTVarIO Nothing <*> newTVarIO Nothing
data ProxyParams = ProxyParams
{ proxyMethods :: ProxyMethods
, proxyState :: ProxyState
, proxyServerMode :: ServerMode
, proxyClientSide :: ClientSide
, proxyUUID :: UUID
, proxySelector :: ProxySelector
, proxyConcurrencyConfig :: ConcurrencyConfig
, proxyClientProtocolVersion :: ProtocolVersion
-- ^ The remote(s) may speak an earlier version, or the same
-- version, but not a later version.
}
{- Proxy between the client and the remote. This picks up after {- Proxy between the client and the remote. This picks up after
- sendClientProtocolVersion. - sendClientProtocolVersion.
-} -}
proxy proxy
:: Annex r :: Annex r
-> ProxyMethods -> ProxyParams
-> ProxyState
-> ServerMode
-> ClientSide
-> UUID
-> ProxySelector
-> ConcurrencyConfig
-> ProtocolVersion
-- ^ Protocol version being spoken between the proxy and the
-- client. When there are multiple remotes, some may speak an
-- earlier version.
-> Maybe Message -> Maybe Message
-- ^ non-VERSION message that was received from the client when -- ^ non-VERSION message that was received from the client when
-- negotiating protocol version, and has not been responded to yet -- negotiating protocol version, and has not been responded to yet
-> ProtoErrorHandled r -> ProtoErrorHandled r
proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clientconn) remoteuuid proxyselector concurrencyconfig (ProtocolVersion protocolversion) othermsg protoerrhandler = do proxy proxydone proxyparams othermsg protoerrhandler = do
case othermsg of case othermsg of
Nothing -> proxynextclientmessage () Nothing -> proxynextclientmessage ()
Just message -> proxyclientmessage (Just message) Just message -> proxyclientmessage (Just message)
where where
client = liftIO . runNetProto clientrunst clientconn proxyclientmessage Nothing = proxydone
proxyclientmessage (Just message) = proxyRequest
proxydone proxyparams proxynextclientmessage
message protoerrhandler
proxynextclientmessage () = protoerrhandler proxyclientmessage $ proxynextclientmessage () = protoerrhandler proxyclientmessage $
client (net receiveMessage) client (net receiveMessage)
client = liftIO . runNetProto clientrunst clientconn
ClientSide clientrunst clientconn = proxyClientSide proxyparams
servermodechecker c a = c servermode $ \case {- Handles proxying a single request between the client and remote. -}
Nothing -> a proxyRequest
Just notallowed -> :: Annex r
protoerrhandler proxynextclientmessage $ -> ProxyParams
client notallowed -> (() -> Annex r) -- ^ called once the request has been handled
-> Message
proxyclientmessage Nothing = proxydone -> ProtoErrorHandled r
proxyclientmessage (Just message) = case message of proxyRequest proxydone proxyparams requestcomplete requestmessage protoerrhandler =
CHECKPRESENT k -> proxyCHECKPRESENT proxyselector k >>= \case case requestmessage of
CHECKPRESENT k -> proxyCHECKPRESENT (proxySelector proxyparams) k >>= \case
Just remoteside -> Just remoteside ->
proxyresponse remoteside message proxyresponse remoteside requestmessage
(const proxynextclientmessage) (const requestcomplete)
Nothing -> Nothing ->
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage FAILURE client $ net $ sendMessage FAILURE
LOCKCONTENT k -> proxyLOCKCONTENT proxyselector k >>= \case LOCKCONTENT k -> proxyLOCKCONTENT (proxySelector proxyparams) k >>= \case
Just remoteside -> Just remoteside ->
proxyresponse remoteside message handleLOCKCONTENT remoteside requestmessage
(const proxynextclientmessage)
Nothing -> Nothing ->
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage FAILURE client $ net $ sendMessage FAILURE
UNLOCKCONTENT -> proxyUNLOCKCONTENT proxyselector >>= \case
Just remoteside ->
proxynoresponse remoteside message
proxynextclientmessage
Nothing -> proxynextclientmessage ()
REMOVE k -> do REMOVE k -> do
remotesides <- proxyREMOVE proxyselector k remotesides <- proxyREMOVE (proxySelector proxyparams) k
servermodechecker checkREMOVEServerMode $ servermodechecker checkREMOVEServerMode $
handleREMOVE remotesides k message handleREMOVE remotesides k requestmessage
REMOVE_BEFORE _ k -> do REMOVE_BEFORE _ k -> do
remotesides <- proxyREMOVE proxyselector k remotesides <- proxyREMOVE (proxySelector proxyparams) k
servermodechecker checkREMOVEServerMode $ servermodechecker checkREMOVEServerMode $
handleREMOVE remotesides k message handleREMOVE remotesides k requestmessage
GETTIMESTAMP -> do GETTIMESTAMP -> do
remotesides <- proxyGETTIMESTAMP proxyselector remotesides <- proxyGETTIMESTAMP (proxySelector proxyparams)
handleGETTIMESTAMP remotesides handleGETTIMESTAMP remotesides
GET _ _ k -> proxyGET proxyselector k >>= \case GET _ _ k -> proxyGET (proxySelector proxyparams) k >>= \case
Just remoteside -> handleGET remoteside message Just remoteside -> handleGET remoteside requestmessage
Nothing -> Nothing -> handleGETNoRemoteSide
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
ERROR "content not present"
PUT paf k -> do PUT paf k -> do
af <- getassociatedfile paf af <- getassociatedfile paf
remotesides <- proxyPUT proxyselector af k remotesides <- proxyPUT (proxySelector proxyparams) af k
servermodechecker checkPUTServerMode $ servermodechecker checkPUTServerMode $
handlePUT remotesides k message handlePUT remotesides k requestmessage
BYPASS _ -> proxynextclientmessage () BYPASS _ -> requestcomplete ()
-- These messages involve the git repository, not the -- These messages involve the git repository, not the
-- annex. So they affect the git repository of the proxy, -- annex. So they affect the git repository of the proxy,
-- not the remote. -- not the remote.
@ -298,6 +301,7 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
FAILURE_PLUS _ -> protoerr FAILURE_PLUS _ -> protoerr
DATA _ -> protoerr DATA _ -> protoerr
VALIDITY _ -> protoerr VALIDITY _ -> protoerr
UNLOCKCONTENT -> protoerr
-- If the client errors out, give up. -- If the client errors out, give up.
ERROR msg -> giveup $ "client error: " ++ msg ERROR msg -> giveup $ "client error: " ++ msg
-- Messages that only the server should send. -- Messages that only the server should send.
@ -312,6 +316,16 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
-- Early messages that the client should not send now. -- Early messages that the client should not send now.
AUTH _ _ -> protoerr AUTH _ _ -> protoerr
VERSION _ -> protoerr VERSION _ -> protoerr
where
client = liftIO . runNetProto clientrunst clientconn
ClientSide clientrunst clientconn = proxyClientSide proxyparams
servermodechecker c a = c (proxyServerMode proxyparams) $ \case
Nothing -> a
Just notallowed ->
protoerrhandler requestcomplete $
client notallowed
-- Send a message to the remote, send its response back to the -- Send a message to the remote, send its response back to the
-- client, and pass it to the continuation. -- client, and pass it to the continuation.
@ -320,11 +334,6 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
protoerrhandler (a resp) $ protoerrhandler (a resp) $
client $ net $ sendMessage resp client $ net $ sendMessage resp
-- Send a message to the remote, that it will not respond to.
proxynoresponse remoteside message a =
protoerrhandler a $
runRemoteSide remoteside $ net $ sendMessage message
-- Send a message to the endpoint and get back its response. -- Send a message to the endpoint and get back its response.
getresponse endpoint message handleresp = getresponse endpoint message handleresp =
protoerrhandler (withresp handleresp) $ protoerrhandler (withresp handleresp) $
@ -340,26 +349,33 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
-- Read a message from one party, send it to the other, -- Read a message from one party, send it to the other,
-- and then pass the message to the continuation. -- and then pass the message to the continuation.
relayonemessage from to cont = relayonemessage from to cont =
flip protoerrhandler (from $ net $ receiveMessage) $ flip protoerrhandler (from $ net receiveMessage) $
withresp $ \message -> withresp $ \message ->
protoerrhandler (cont message) $ protoerrhandler (cont message) $
to $ net $ sendMessage message to $ net $ sendMessage message
protoerr = do protoerr = do
_ <- client $ net $ sendMessage (ERROR "protocol error X") _ <- client $ net $ sendMessage (ERROR "protocol error")
giveup "protocol error M" giveup "protocol error"
handleLOCKCONTENT remoteside msg =
proxyresponse remoteside msg $ \r () -> case r of
SUCCESS -> relayonemessage client
(runRemoteSide remoteside)
(const requestcomplete)
FAILURE -> requestcomplete ()
_ -> requestcomplete ()
-- When there is a single remote, reply with its timestamp, -- When there is a single remote, reply with its timestamp,
-- to avoid needing timestamp translation. -- to avoid needing timestamp translation.
handleGETTIMESTAMP (remoteside:[]) = do handleGETTIMESTAMP (remoteside:[]) = do
liftIO $ hPutStrLn stderr "!!!! single remote side"
liftIO $ atomically $ do liftIO $ atomically $ do
writeTVar (proxyRemoteLatestTimestamps proxystate) writeTVar (proxyRemoteLatestTimestamps (proxyState proxyparams))
mempty mempty
writeTVar (proxyRemoteLatestLocalTimestamp proxystate) writeTVar (proxyRemoteLatestLocalTimestamp (proxyState proxyparams))
Nothing Nothing
proxyresponse remoteside GETTIMESTAMP proxyresponse remoteside GETTIMESTAMP
(const proxynextclientmessage) (const requestcomplete)
-- When there are multiple remotes, reply with our local timestamp, -- When there are multiple remotes, reply with our local timestamp,
-- and do timestamp translation when sending REMOVE-FROM. -- and do timestamp translation when sending REMOVE-FROM.
handleGETTIMESTAMP remotesides = do handleGETTIMESTAMP remotesides = do
@ -371,14 +387,14 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
remotetimes <- (M.fromList . mapMaybe join) <$> getremotetimes remotetimes <- (M.fromList . mapMaybe join) <$> getremotetimes
localtime <- liftIO currentMonotonicTimestamp localtime <- liftIO currentMonotonicTimestamp
liftIO $ atomically $ do liftIO $ atomically $ do
writeTVar (proxyRemoteLatestTimestamps proxystate) writeTVar (proxyRemoteLatestTimestamps (proxyState proxyparams))
remotetimes remotetimes
writeTVar (proxyRemoteLatestLocalTimestamp proxystate) writeTVar (proxyRemoteLatestLocalTimestamp (proxyState proxyparams))
(Just localtime) (Just localtime)
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage (TIMESTAMP localtime) client $ net $ sendMessage (TIMESTAMP localtime)
where where
getremotetimes = forMC concurrencyconfig remotesides $ \r -> getremotetimes = forMC (proxyConcurrencyConfig proxyparams) remotesides $ \r ->
runRemoteSideOrSkipFailed r $ do runRemoteSideOrSkipFailed r $ do
net $ sendMessage GETTIMESTAMP net $ sendMessage GETTIMESTAMP
net receiveMessage >>= return . \case net receiveMessage >>= return . \case
@ -395,14 +411,14 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
handleREMOVE [] _ _ = handleREMOVE [] _ _ =
-- When no places are provided to remove from, -- When no places are provided to remove from,
-- don't report a successful remote. -- don't report a successful remote.
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage FAILURE client $ net $ sendMessage FAILURE
handleREMOVE remotesides k message = do handleREMOVE remotesides k message = do
tsm <- liftIO $ readTVarIO $ tsm <- liftIO $ readTVarIO $
proxyRemoteLatestTimestamps proxystate proxyRemoteLatestTimestamps (proxyState proxyparams)
oldlocaltime <- liftIO $ readTVarIO $ oldlocaltime <- liftIO $ readTVarIO $
proxyRemoteLatestLocalTimestamp proxystate proxyRemoteLatestLocalTimestamp (proxyState proxyparams)
v <- forMC concurrencyconfig remotesides $ \r -> v <- forMC (proxyConcurrencyConfig proxyparams) remotesides $ \r ->
runRemoteSideOrSkipFailed r $ do runRemoteSideOrSkipFailed r $ do
case message of case message of
REMOVE_BEFORE ts _ -> do REMOVE_BEFORE ts _ -> do
@ -427,11 +443,11 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
_ -> Nothing _ -> Nothing
let v' = map join v let v' = map join v
let us = concatMap snd $ catMaybes v' let us = concatMap snd $ catMaybes v'
mapM_ (\u -> removedContent proxymethods u k) us mapM_ (\u -> removedContent (proxyMethods proxyparams) u k) us
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage $ client $ net $ sendMessage $
let nonplussed = all (== remoteuuid) us let nonplussed = all (== proxyUUID proxyparams) us
|| protocolversion < 2 || proxyClientProtocolVersion proxyparams < ProtocolVersion 2
in if all (maybe False (fst . fst)) v' in if all (maybe False (fst . fst)) v'
then if nonplussed then if nonplussed
then SUCCESS then SUCCESS
@ -441,19 +457,28 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
[] -> FAILURE [] -> FAILURE
(err:_) -> ERROR err (err:_) -> ERROR err
else FAILURE_PLUS us else FAILURE_PLUS us
-- Send an empty DATA and indicate it was invalid.
handleGETNoRemoteSide = protoerrhandler requestcomplete $
client $ net $ do
sendMessage $ DATA (Len 0)
sendBytes (Len 0) mempty nullMeterUpdate
when (proxyClientProtocolVersion proxyparams /= ProtocolVersion 0) $
sendMessage (VALIDITY Invalid)
void $ receiveMessage
handleGET remoteside message = getresponse (runRemoteSide remoteside) message $ handleGET remoteside message = getresponse (runRemoteSide remoteside) message $
withDATA (relayGET remoteside) $ \case withDATA (relayGET remoteside) $ \case
ERROR err -> protoerrhandler proxynextclientmessage $ ERROR err -> protoerrhandler requestcomplete $
client $ net $ sendMessage (ERROR err) client $ net $ sendMessage (ERROR err)
_ -> protoerr _ -> protoerr
handlePUT (remoteside:[]) k message handlePUT (remoteside:[]) k message
| Remote.uuid (remote remoteside) == remoteuuid = | Remote.uuid (remote remoteside) == proxyUUID proxyparams =
getresponse (runRemoteSide remoteside) message $ \resp -> case resp of getresponse (runRemoteSide remoteside) message $ \resp -> case resp of
ALREADY_HAVE -> protoerrhandler proxynextclientmessage $ ALREADY_HAVE -> protoerrhandler requestcomplete $
client $ net $ sendMessage resp client $ net $ sendMessage resp
ALREADY_HAVE_PLUS _ -> protoerrhandler proxynextclientmessage $ ALREADY_HAVE_PLUS _ -> protoerrhandler requestcomplete $
client $ net $ sendMessage resp client $ net $ sendMessage resp
PUT_FROM _ -> PUT_FROM _ ->
getresponse client resp $ getresponse client resp $
@ -462,7 +487,7 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
(const protoerr) (const protoerr)
_ -> protoerr _ -> protoerr
handlePUT [] _ _ = handlePUT [] _ _ =
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage ALREADY_HAVE client $ net $ sendMessage ALREADY_HAVE
handlePUT remotesides k message = handlePUT remotesides k message =
handlePutMulti remotesides k message handlePutMulti remotesides k message
@ -474,8 +499,8 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
relayDATACore len (runRemoteSide remoteside) client $ relayDATACore len (runRemoteSide remoteside) client $
relayDATAFinish (runRemoteSide remoteside) client $ relayDATAFinish (runRemoteSide remoteside) client $
relayonemessage client (runRemoteSide remoteside) $ relayonemessage client (runRemoteSide remoteside) $
const proxynextclientmessage const requestcomplete
relayPUT remoteside k len = relayDATAStart (runRemoteSide remoteside) $ relayPUT remoteside k len = relayDATAStart (runRemoteSide remoteside) $
relayDATACore len client (runRemoteSide remoteside) $ relayDATACore len client (runRemoteSide remoteside) $
relayDATAFinish client (runRemoteSide remoteside) $ relayDATAFinish client (runRemoteSide remoteside) $
@ -483,15 +508,15 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
where where
finished resp () = do finished resp () = do
void $ relayPUTRecord k remoteside resp void $ relayPUTRecord k remoteside resp
proxynextclientmessage () requestcomplete ()
relayPUTRecord k remoteside SUCCESS = do relayPUTRecord k remoteside SUCCESS = do
addedContent proxymethods (Remote.uuid (remote remoteside)) k addedContent (proxyMethods proxyparams) (Remote.uuid (remote remoteside)) k
return $ Just [Remote.uuid (remote remoteside)] return $ Just [Remote.uuid (remote remoteside)]
relayPUTRecord k remoteside (SUCCESS_PLUS us) = do relayPUTRecord k remoteside (SUCCESS_PLUS us) = do
let us' = (Remote.uuid (remote remoteside)) : us let us' = (Remote.uuid (remote remoteside)) : us
forM_ us' $ \u -> forM_ us' $ \u ->
addedContent proxymethods u k addedContent (proxyMethods proxyparams) u k
return $ Just us' return $ Just us'
relayPUTRecord _ _ _ = relayPUTRecord _ _ _ =
return Nothing return Nothing
@ -513,14 +538,14 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
let alreadyhave = \case let alreadyhave = \case
Right (Left _) -> True Right (Left _) -> True
_ -> False _ -> False
l <- forMC concurrencyconfig remotesides initiate l <- forMC (proxyConcurrencyConfig proxyparams) remotesides initiate
if all alreadyhave l if all alreadyhave l
then if protocolversion < 2 then if proxyClientProtocolVersion proxyparams < ProtocolVersion 2
then protoerrhandler proxynextclientmessage $ then protoerrhandler requestcomplete $
client $ net $ sendMessage ALREADY_HAVE client $ net $ sendMessage ALREADY_HAVE
else protoerrhandler proxynextclientmessage $ else protoerrhandler requestcomplete $
client $ net $ sendMessage $ ALREADY_HAVE_PLUS $ client $ net $ sendMessage $ ALREADY_HAVE_PLUS $
filter (/= remoteuuid) $ filter (/= proxyUUID proxyparams) $
map (Remote.uuid . remote) (lefts (rights l)) map (Remote.uuid . remote) (lefts (rights l))
else if null (rights l) else if null (rights l)
-- no response from any remote -- no response from any remote
@ -533,10 +558,9 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
(const protoerr) (const protoerr)
relayPUTMulti minoffset remotes k (Len datalen) _ = do relayPUTMulti minoffset remotes k (Len datalen) _ = do
let totallen = datalen + minoffset
-- Tell each remote how much data to expect, depending -- Tell each remote how much data to expect, depending
-- on the remote's offset. -- on the remote's offset.
rs <- forMC concurrencyconfig remotes $ \r@(remoteside, remoteoffset) -> rs <- forMC (proxyConcurrencyConfig proxyparams) remotes $ \r@(remoteside, remoteoffset) ->
runRemoteSideOrSkipFailed remoteside $ do runRemoteSideOrSkipFailed remoteside $ do
net $ sendMessage $ DATA $ Len $ net $ sendMessage $ DATA $ Len $
totallen - remoteoffset totallen - remoteoffset
@ -544,6 +568,8 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
protoerrhandler (send (catMaybes rs) minoffset) $ protoerrhandler (send (catMaybes rs) minoffset) $
client $ net $ receiveBytes (Len datalen) nullMeterUpdate client $ net $ receiveBytes (Len datalen) nullMeterUpdate
where where
totallen = datalen + minoffset
chunksize = fromIntegral defaultChunkSize chunksize = fromIntegral defaultChunkSize
-- Stream the lazy bytestring out to the remotes in chunks. -- Stream the lazy bytestring out to the remotes in chunks.
@ -553,7 +579,7 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
let (chunk, b') = L.splitAt chunksize b let (chunk, b') = L.splitAt chunksize b
let chunklen = fromIntegral (L.length chunk) let chunklen = fromIntegral (L.length chunk)
let !n' = n + chunklen let !n' = n + chunklen
rs' <- forMC concurrencyconfig rs $ \r@(remoteside, remoteoffset) -> rs' <- forMC (proxyConcurrencyConfig proxyparams) rs $ \r@(remoteside, remoteoffset) ->
if n >= remoteoffset if n >= remoteoffset
then runRemoteSideOrSkipFailed remoteside $ do then runRemoteSideOrSkipFailed remoteside $ do
net $ sendBytes (Len chunklen) chunk nullMeterUpdate net $ sendBytes (Len chunklen) chunk nullMeterUpdate
@ -568,13 +594,21 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
return r return r
else return (Just r) else return (Just r)
if L.null b' if L.null b'
then sent (catMaybes rs') then do
-- If we didn't receive as much
-- data as expected, close
-- connections to all the remotes,
-- because they are still waiting
-- on the rest of the data.
when (n' /= totallen) $
mapM_ (closeRemoteSide . fst) rs
sent (catMaybes rs')
else send (catMaybes rs') n' b' else send (catMaybes rs') n' b'
sent [] = proxydone sent [] = proxydone
sent rs = relayDATAFinishMulti k (map fst rs) sent rs = relayDATAFinishMulti k (map fst rs)
runRemoteSideOrSkipFailed remoteside a = runRemoteSideOrSkipFailed remoteside a =
runRemoteSide remoteside a >>= \case runRemoteSide remoteside a >>= \case
Right v -> return (Just v) Right v -> return (Just v)
Left _ -> do Left _ -> do
@ -594,16 +628,16 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
y $ net $ sendBytes len b nullMeterUpdate y $ net $ sendBytes len b nullMeterUpdate
relayDATAFinish x y sendsuccessfailure () relayDATAFinish x y sendsuccessfailure ()
| protocolversion == 0 = sendsuccessfailure | proxyClientProtocolVersion proxyparams == ProtocolVersion 0 = sendsuccessfailure
-- Protocol version 1 has a VALID or -- Protocol version 1 has a VALID or
-- INVALID message after the data. -- INVALID message after the data.
| otherwise = relayonemessage x y (\_ () -> sendsuccessfailure) | otherwise = relayonemessage x y (\_ () -> sendsuccessfailure)
relayDATAFinishMulti k rs relayDATAFinishMulti k rs
| protocolversion == 0 = | proxyClientProtocolVersion proxyparams == ProtocolVersion 0 =
finish $ net receiveMessage finish $ net receiveMessage
| otherwise = | otherwise =
flip protoerrhandler (client $ net $ receiveMessage) $ flip protoerrhandler (client $ net receiveMessage) $
withresp $ \message -> withresp $ \message ->
finish $ do finish $ do
-- Relay VALID or INVALID message -- Relay VALID or INVALID message
@ -615,17 +649,17 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
net receiveMessage net receiveMessage
where where
finish a = do finish a = do
storeduuids <- forMC concurrencyconfig rs $ \r -> storeduuids <- forMC (proxyConcurrencyConfig proxyparams) rs $ \r ->
runRemoteSideOrSkipFailed r a >>= \case runRemoteSideOrSkipFailed r a >>= \case
Just (Just resp) -> Just (Just resp) ->
relayPUTRecord k r resp relayPUTRecord k r resp
_ -> return Nothing _ -> return Nothing
protoerrhandler proxynextclientmessage $ protoerrhandler requestcomplete $
client $ net $ sendMessage $ client $ net $ sendMessage $
case concat (catMaybes storeduuids) of case concat (catMaybes storeduuids) of
[] -> FAILURE [] -> FAILURE
us us
| protocolversion < 2 -> SUCCESS | proxyClientProtocolVersion proxyparams < ProtocolVersion 2 -> SUCCESS
| otherwise -> SUCCESS_PLUS us | otherwise -> SUCCESS_PLUS us
-- The associated file received from the P2P protocol -- The associated file received from the P2P protocol
@ -640,10 +674,13 @@ proxy proxydone proxymethods proxystate servermode (ClientSide clientrunst clien
data ConcurrencyConfig = ConcurrencyConfig Int (MSem.MSem Int) data ConcurrencyConfig = ConcurrencyConfig Int (MSem.MSem Int)
noConcurrencyConfig :: Annex ConcurrencyConfig noConcurrencyConfig :: Annex ConcurrencyConfig
noConcurrencyConfig = liftIO $ ConcurrencyConfig 1 <$> MSem.new 1 noConcurrencyConfig = mkConcurrencyConfig 1
getConcurrencyConfig :: Annex ConcurrencyConfig mkConcurrencyConfig :: Int -> Annex ConcurrencyConfig
getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case mkConcurrencyConfig n = liftIO $ ConcurrencyConfig n <$> MSem.new n
concurrencyConfigJobs :: Annex ConcurrencyConfig
concurrencyConfigJobs = (annexJobs <$> Annex.getGitConfig) >>= \case
NonConcurrent -> noConcurrencyConfig NonConcurrent -> noConcurrencyConfig
Concurrent n -> go n Concurrent n -> go n
ConcurrentPerCpu -> go =<< liftIO getNumProcessors ConcurrentPerCpu -> go =<< liftIO getNumProcessors
@ -653,8 +690,7 @@ getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case
when (n > c) $ when (n > c) $
liftIO $ setNumCapabilities n liftIO $ setNumCapabilities n
setConcurrency (ConcurrencyGitConfig (Concurrent n)) setConcurrency (ConcurrencyGitConfig (Concurrent n))
msem <- liftIO $ MSem.new n mkConcurrencyConfig n
return (ConcurrencyConfig n msem)
forMC :: ConcurrencyConfig -> [a] -> (a -> Annex b) -> Annex [b] forMC :: ConcurrencyConfig -> [a] -> (a -> Annex b) -> Annex [b]
forMC _ (x:[]) a = do forMC _ (x:[]) a = do

View file

@ -57,7 +57,10 @@ import qualified Remote.GCrypt
import qualified Remote.GitLFS import qualified Remote.GitLFS
import qualified Remote.P2P import qualified Remote.P2P
import qualified Remote.Helper.P2P as P2PHelper import qualified Remote.Helper.P2P as P2PHelper
import qualified P2P.Protocol as P2P
import P2P.Address import P2P.Address
import P2P.Http.Url
import P2P.Http.Client
import Annex.Path import Annex.Path
import Creds import Creds
import Types.NumCopies import Types.NumCopies
@ -103,14 +106,20 @@ list autoinit = do
proxied <- listProxied proxies rs' proxied <- listProxied proxies rs'
return (proxied++rs') return (proxied++rs')
where where
annexurl r = remoteConfig r "annexurl"
tweakurl c r = do tweakurl c r = do
let n = fromJust $ Git.remoteName r let n = fromJust $ Git.remoteName r
case M.lookup (annexurl r) c of case getAnnexUrl r c of
Nothing -> return r Just url | not (isP2PHttpProtocolUrl url) ->
Just url -> inRepo $ \g -> inRepo $ \g -> Git.Construct.remoteNamed n $
Git.Construct.remoteNamed n $ Git.Construct.fromRemoteLocation url
Git.Construct.fromRemoteLocation (Git.fromConfigValue url) False g False g
_ -> return r
getAnnexUrl :: Git.Repo -> M.Map Git.ConfigKey Git.ConfigValue -> Maybe String
getAnnexUrl r c = Git.fromConfigValue <$> M.lookup (annexUrlConfigKey r) c
annexUrlConfigKey :: Git.Repo -> Git.ConfigKey
annexUrlConfigKey r = remoteConfig r "annexurl"
isGitRemoteAnnex :: Git.Repo -> Bool isGitRemoteAnnex :: Git.Repo -> Bool
isGitRemoteAnnex r = "annex::" `isPrefixOf` Git.repoLocation r isGitRemoteAnnex r = "annex::" `isPrefixOf` Git.repoLocation r
@ -160,8 +169,9 @@ enableRemote Nothing _ = giveup "unable to enable git remote with no specified u
- done each time git-annex is run in a way that uses remotes, unless - done each time git-annex is run in a way that uses remotes, unless
- annex-checkuuid is false. - annex-checkuuid is false.
- -
- Conversely, the config of an URL remote is only read when there is no - The config of other URL remotes is only read when there is no
- cached UUID value. -} - cached UUID value.
-}
configRead :: Bool -> Git.Repo -> Annex Git.Repo configRead :: Bool -> Git.Repo -> Annex Git.Repo
configRead autoinit r = do configRead autoinit r = do
gc <- Annex.getRemoteGitConfig r gc <- Annex.getRemoteGitConfig r
@ -177,7 +187,6 @@ configRead autoinit r = do
Just r' -> return r' Just r' -> return r'
_ -> return r _ -> return r
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs gen r u rc gc rs
-- Remote.GitLFS may be used with a repo that is also encrypted -- Remote.GitLFS may be used with a repo that is also encrypted
@ -236,7 +245,7 @@ defaultRepoCost r
| otherwise = expensiveRemoteCost | otherwise = expensiveRemoteCost
unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
unavailable r = gen r' unavailable r u c gc = gen r' u c gc'
where where
r' = case Git.location r of r' = case Git.location r of
Git.Local { Git.gitdir = d } -> Git.Local { Git.gitdir = d } ->
@ -247,6 +256,10 @@ unavailable r = gen r'
in r { Git.location = Git.Url (url { uriAuthority = Just auth' })} in r { Git.location = Git.Url (url { uriAuthority = Just auth' })}
Nothing -> r { Git.location = Git.Unknown } Nothing -> r { Git.location = Git.Unknown }
_ -> r -- already unavailable _ -> r -- already unavailable
gc' = gc
{ remoteAnnexP2PHttpUrl =
unavailableP2PHttpUrl <$> remoteAnnexP2PHttpUrl gc
}
{- Tries to read the config for a specified remote, updates state, and {- Tries to read the config for a specified remote, updates state, and
- returns the updated repo. -} - returns the updated repo. -}
@ -308,6 +321,12 @@ tryGitConfigRead autoinit r hasuuid
-- optimisation. -- optimisation.
unless (fromMaybe False $ Git.Config.isBare r') $ unless (fromMaybe False $ Git.Config.isBare r') $
setremote setRemoteBare False setremote setRemoteBare False
-- When annex.url is set to a P2P http url,
-- store in remote.name.annexUrl
case Git.fromConfigValue <$> Git.Config.getMaybe (annexConfig "url") r' of
Just u | isP2PHttpProtocolUrl u ->
setremote (setConfig . annexUrlConfigKey) u
_ -> noop
return r' return r'
Left err -> do Left err -> do
set_ignore "not usable by git-annex" False set_ignore "not usable by git-annex" False
@ -405,10 +424,12 @@ inAnnex rmt st key = do
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
inAnnex' repo rmt st@(State connpool duc _ _ _) key inAnnex' repo rmt st@(State connpool duc _ _ _) key
| isP2PHttp rmt = checkp2phttp
| Git.repoIsHttp repo = checkhttp | Git.repoIsHttp repo = checkhttp
| Git.repoIsUrl repo = checkremote | Git.repoIsUrl repo = checkremote
| otherwise = checklocal | otherwise = checklocal
where where
checkp2phttp = p2pHttpClient rmt giveup (clientCheckPresent key)
checkhttp = do checkhttp = do
gc <- Annex.getGitConfig gc <- Annex.getGitConfig
Url.withUrlOptionsPromptingCreds $ \uo -> Url.withUrlOptionsPromptingCreds $ \uo ->
@ -445,15 +466,24 @@ dropKey r st proof key = do
dropKey' :: Git.Repo -> Remote -> State -> Maybe SafeDropProof -> Key -> Annex () dropKey' :: Git.Repo -> Remote -> State -> Maybe SafeDropProof -> Key -> Annex ()
dropKey' repo r st@(State connpool duc _ _ _) proof key dropKey' repo r st@(State connpool duc _ _ _) proof key
| isP2PHttp r =
clientRemoveWithProof proof key unabletoremove r >>= \case
RemoveResultPlus True fanoutuuids ->
storefanout fanoutuuids
RemoveResultPlus False fanoutuuids -> do
storefanout fanoutuuids
unabletoremove
| not $ Git.repoIsUrl repo = ifM duc | not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (giveup "cannot access remote") removelocal ( guardUsable repo (giveup "cannot access remote") removelocal
, giveup "remote does not have expected annex.uuid value" , giveup "remote does not have expected annex.uuid value"
) )
| Git.repoIsHttp repo = giveup "dropping from http remote not supported" | Git.repoIsHttp repo = giveup "dropping from this remote is not supported"
| otherwise = P2PHelper.remove (uuid r) p2prunner proof key | otherwise = P2PHelper.remove (uuid r) p2prunner proof key
where where
p2prunner = Ssh.runProto r connpool (return (Right False, Nothing)) p2prunner = Ssh.runProto r connpool (return (Right False, Nothing))
unabletoremove = giveup "removing content from remote failed"
-- It could take a long time to eg, automount a drive containing -- It could take a long time to eg, automount a drive containing
-- the repo, so check the proof for expiry again after locking the -- the repo, so check the proof for expiry again after locking the
-- content for removal. -- content for removal.
@ -475,14 +505,24 @@ dropKey' repo r st@(State connpool duc _ _ _) proof key
) )
unless proofunexpired unless proofunexpired
safeDropProofExpired safeDropProofExpired
storefanout = P2PHelper.storeFanout key InfoMissing (uuid r) . map fromB64UUID
lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey r st key callback = do lockKey r st key callback = do
repo <- getRepo r repo <- getRepo r
lockKey' repo r st key callback lockKey' repo r st key callback
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey' repo r st@(State connpool duc _ _ _) key callback lockKey' repo r st@(State connpool duc _ _ _) key callback
| isP2PHttp r = do
showLocking r
p2pHttpClient r giveup (clientLockContent key) >>= \case
LockResult True (Just lckid) ->
p2pHttpClient r failedlock $
clientKeepLocked lckid (uuid r)
failedlock callback
_ -> failedlock
| not $ Git.repoIsUrl repo = ifM duc | not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo failedlock $ do ( guardUsable repo failedlock $ do
inorigrepo <- Annex.makeRunner inorigrepo <- Annex.makeRunner
@ -509,7 +549,8 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' repo r st key file dest meterupdate vc copyFromRemote'' repo r st key file dest meterupdate vc
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
| isP2PHttp r = copyp2phttp
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do | Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
gc <- Annex.getGitConfig gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $ ok <- Url.withUrlOptionsPromptingCreds $
@ -519,8 +560,6 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
| not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do
u <- getUUID u <- getUUID
hardlink <- wantHardLink hardlink <- wantHardLink
let bwlimit = remoteAnnexBwLimitDownload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
-- run copy from perspective of remote -- run copy from perspective of remote
onLocalFast st $ Annex.Content.prepSendAnnex' key Nothing >>= \case onLocalFast st $ Annex.Content.prepSendAnnex' key Nothing >>= \case
Just (object, _sz, check) -> do Just (object, _sz, check) -> do
@ -529,7 +568,7 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
Nothing -> return True Nothing -> return True
copier <- mkFileCopier hardlink st copier <- mkFileCopier hardlink st
(ok, v) <- runTransfer (Transfer Download u (fromKey id key)) (ok, v) <- runTransfer (Transfer Download u (fromKey id key))
Nothing file Nothing stdRetry $ \p -> Nothing af Nothing stdRetry $ \p ->
metered (Just (combineMeterUpdate p meterupdate)) key bwlimit $ \_ p' -> metered (Just (combineMeterUpdate p meterupdate)) key bwlimit $ \_ p' ->
copier object dest key p' checksuccess vc copier object dest key p' checksuccess vc
if ok if ok
@ -540,8 +579,26 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
P2PHelper.retrieve P2PHelper.retrieve
(gitconfig r) (gitconfig r)
(Ssh.runProto r connpool (return (False, UnVerified))) (Ssh.runProto r connpool (return (False, UnVerified)))
key file dest meterupdate vc key af dest meterupdate vc
| otherwise = giveup "copying from non-ssh, non-http remote not supported" | otherwise = giveup "copying from this remote is not supported"
where
bwlimit = remoteAnnexBwLimitDownload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
copyp2phttp = verifyKeyContentIncrementally vc key $ \iv -> do
startsz <- liftIO $ tryWhenExists $
getFileSize (toRawFilePath dest)
bracketIO (openBinaryFile dest ReadWriteMode) (hClose) $ \h -> do
metered (Just meterupdate) key bwlimit $ \_ p -> do
p' <- case startsz of
Just startsz' -> liftIO $ do
resumeVerifyFromOffset startsz' iv p h
_ -> return p
let consumer = meteredWrite' p'
(writeVerifyChunk iv h)
p2pHttpClient r giveup (clientGet key af consumer startsz) >>= \case
Valid -> return ()
Invalid -> giveup "Transfer failed"
copyFromRemoteCheap :: State -> Git.Repo -> Maybe (Key -> AssociatedFile -> FilePath -> Annex ()) copyFromRemoteCheap :: State -> Git.Repo -> Maybe (Key -> AssociatedFile -> FilePath -> Annex ())
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
@ -568,9 +625,10 @@ copyToRemote r st key af o meterupdate = do
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> Maybe FilePath -> MeterUpdate -> Annex () copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> Maybe FilePath -> MeterUpdate -> Annex ()
copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
| isP2PHttp r = prepsendwith copyp2phttp
| not $ Git.repoIsUrl repo = ifM duc | not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (giveup "cannot access remote") $ commitOnCleanup repo r st $ ( guardUsable repo (giveup "cannot access remote") $ commitOnCleanup repo r st $
copylocal =<< Annex.Content.prepSendAnnex' key o prepsendwith copylocal
, giveup "remote does not have expected annex.uuid value" , giveup "remote does not have expected annex.uuid value"
) )
| Git.repoIsSsh repo = | Git.repoIsSsh repo =
@ -578,18 +636,24 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
(Ssh.runProto r connpool (return Nothing)) (Ssh.runProto r connpool (return Nothing))
key af o meterupdate key af o meterupdate
| otherwise = giveup "copying to non-ssh repo not supported" | otherwise = giveup "copying to this remote is not supported"
where where
copylocal Nothing = giveup "content not available" prepsendwith a = Annex.Content.prepSendAnnex' key o >>= \case
copylocal (Just (object, sz, check)) = do Nothing -> giveup "content not available"
Just v -> a v
bwlimit = remoteAnnexBwLimitUpload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
failedsend = giveup "failed to send content to remote"
copylocal (object, sz, check) = do
-- The check action is going to be run in -- The check action is going to be run in
-- the remote's Annex, but it needs access to the local -- the remote's Annex, but it needs access to the local
-- Annex monad's state. -- Annex monad's state.
checkio <- Annex.withCurrentState check checkio <- Annex.withCurrentState check
u <- getUUID u <- getUUID
hardlink <- wantHardLink hardlink <- wantHardLink
let bwlimit = remoteAnnexBwLimitUpload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
-- run copy from perspective of remote -- run copy from perspective of remote
res <- onLocalFast st $ ifM (Annex.Content.inAnnex key) res <- onLocalFast st $ ifM (Annex.Content.inAnnex key)
( return True ( return True
@ -605,7 +669,30 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
copier object (fromRawFilePath dest) key p' checksuccess verify copier object (fromRawFilePath dest) key p' checksuccess verify
) )
unless res $ unless res $
giveup "failed to send content to remote" failedsend
copyp2phttp (object, sz, check) =
let check' = check >>= \case
Just s -> do
warning (UnquotedString s)
return False
Nothing -> return True
in p2pHttpClient r (const $ pure $ PutOffsetResultPlus (Offset 0)) (clientPutOffset key) >>= \case
PutOffsetResultPlus (offset@(Offset (P2P.Offset n))) ->
metered (Just meterupdate) key bwlimit $ \_ p -> do
let p' = offsetMeterUpdate p (BytesProcessed n)
res <- p2pHttpClient r giveup $
clientPut p' key (Just offset) af object sz check'
case res of
PutResultPlus False fanoutuuids -> do
storefanout fanoutuuids
failedsend
PutResultPlus True fanoutuuids ->
storefanout fanoutuuids
PutOffsetResultAlreadyHavePlus fanoutuuids ->
storefanout fanoutuuids
storefanout = P2PHelper.storeFanout key InfoPresent (uuid r) . map fromB64UUID
fsckOnRemote :: Git.Repo -> [CommandParam] -> Annex (IO Bool) fsckOnRemote :: Git.Repo -> [CommandParam] -> Annex (IO Bool)
fsckOnRemote r params fsckOnRemote r params
@ -865,7 +952,7 @@ listProxied proxies rs = concat <$> mapM go rs
adduuid ck = M.insert ck adduuid ck = M.insert ck
[Git.ConfigValue $ fromUUID $ proxyRemoteUUID p] [Git.ConfigValue $ fromUUID $ proxyRemoteUUID p]
addurl = M.insert (remoteConfig renamedr (remoteGitConfigKey UrlField)) addurl = M.insert (mkRemoteConfigKey renamedr (remoteGitConfigKey UrlField))
[Git.ConfigValue $ encodeBS $ Git.repoLocation r] [Git.ConfigValue $ encodeBS $ Git.repoLocation r]
addproxiedby = case remoteAnnexUUID gc of addproxiedby = case remoteAnnexUUID gc of
@ -893,7 +980,7 @@ listProxied proxies rs = concat <$> mapM go rs
proxieduuids = S.map proxyRemoteUUID proxied proxieduuids = S.map proxyRemoteUUID proxied
addremoteannexfield f = M.insert addremoteannexfield f = M.insert
(remoteAnnexConfig renamedr (remoteGitConfigKey f)) (mkRemoteConfigKey renamedr (remoteGitConfigKey f))
inheritconfigs c = foldl' inheritconfig c proxyInheritedFields inheritconfigs c = foldl' inheritconfig c proxyInheritedFields
@ -901,8 +988,8 @@ listProxied proxies rs = concat <$> mapM go rs
(Nothing, Just v) -> M.insert dest v c (Nothing, Just v) -> M.insert dest v c
_ -> c _ -> c
where where
src = remoteAnnexConfig r k src = mkRemoteConfigKey r k
dest = remoteAnnexConfig renamedr k dest = mkRemoteConfigKey renamedr k
-- When the git config has anything set for a remote, -- When the git config has anything set for a remote,
-- avoid making a proxied remote with the same name. -- avoid making a proxied remote with the same name.
@ -919,7 +1006,15 @@ listProxied proxies rs = concat <$> mapM go rs
-- Proxing is also yet supported for remotes using P2P -- Proxing is also yet supported for remotes using P2P
-- addresses. -- addresses.
canproxy gc r canproxy gc r
| isP2PHttp' gc = True
| remoteAnnexGitLFS gc = False | remoteAnnexGitLFS gc = False
| Git.GCrypt.isEncrypted r = False | Git.GCrypt.isEncrypted r = False
| Git.repoIsLocal r || Git.repoIsLocalUnknown r = False | Git.repoIsLocal r || Git.repoIsLocalUnknown r = False
| otherwise = isNothing (repoP2PAddress r) | otherwise = isNothing (repoP2PAddress r)
isP2PHttp :: Remote -> Bool
isP2PHttp = isP2PHttp' . gitconfig
isP2PHttp' :: RemoteGitConfig -> Bool
isP2PHttp' = isJust . remoteAnnexP2PHttpUrl

View file

@ -42,16 +42,17 @@ store remoteuuid gc runner k af o p = do
let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc
metered (Just p) sizer bwlimit $ \_ p' -> metered (Just p) sizer bwlimit $ \_ p' ->
runner (P2P.put k af p') >>= \case runner (P2P.put k af p') >>= \case
Just (Just fanoutuuids) -> do Just (Just fanoutuuids) ->
-- Storing on the remote can cause it storeFanout k InfoPresent remoteuuid fanoutuuids
-- to be stored on additional UUIDs,
-- so record those.
forM_ fanoutuuids $ \u ->
when (u /= remoteuuid) $
logChange k u InfoPresent
Just Nothing -> giveup "Transfer failed" Just Nothing -> giveup "Transfer failed"
Nothing -> remoteUnavail Nothing -> remoteUnavail
storeFanout :: Key -> LogStatus -> UUID -> [UUID] -> Annex ()
storeFanout k logstatus remoteuuid us =
forM_ us $ \u ->
when (u /= remoteuuid) $
logChange k u logstatus
retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
retrieve gc runner k af dest p verifyconfig = do retrieve gc runner k af dest p verifyconfig = do
iv <- startVerifyKeyContentIncrementally verifyconfig k iv <- startVerifyKeyContentIncrementally verifyconfig k
@ -64,20 +65,16 @@ retrieve gc runner k af dest p verifyconfig = do
remove :: UUID -> ProtoRunner (Either String Bool, Maybe [UUID]) -> Maybe SafeDropProof -> Key -> Annex () remove :: UUID -> ProtoRunner (Either String Bool, Maybe [UUID]) -> Maybe SafeDropProof -> Key -> Annex ()
remove remoteuuid runner proof k = runner (P2P.remove proof k) >>= \case remove remoteuuid runner proof k = runner (P2P.remove proof k) >>= \case
Just (Right True, alsoremoveduuids) -> note alsoremoveduuids Just (Right True, alsoremoveduuids) ->
storeFanout k InfoMissing remoteuuid
(fromMaybe [] alsoremoveduuids)
Just (Right False, alsoremoveduuids) -> do Just (Right False, alsoremoveduuids) -> do
note alsoremoveduuids storeFanout k InfoMissing remoteuuid
(fromMaybe [] alsoremoveduuids)
giveup "removing content from remote failed" giveup "removing content from remote failed"
Just (Left err, _) -> do Just (Left err, _) -> do
giveup (safeOutput err) giveup (safeOutput err)
Nothing -> remoteUnavail Nothing -> remoteUnavail
where
-- The remote reports removal from other UUIDs than its own,
-- so record those.
note alsoremoveduuids =
forM_ (fromMaybe [] alsoremoveduuids) $ \u ->
when (u /= remoteuuid) $
logChange k u InfoMissing
checkpresent :: ProtoRunner (Either String Bool) -> Key -> Annex Bool checkpresent :: ProtoRunner (Either String Bool) -> Key -> Annex Bool
checkpresent runner k = checkpresent runner k =

View file

@ -44,12 +44,13 @@ toRepo cs r gc remotecmd = do
git_annex_shell :: ConsumeStdin -> Git.Repo -> String -> [CommandParam] -> [(Field, String)] -> Annex (Maybe (FilePath, [CommandParam])) git_annex_shell :: ConsumeStdin -> Git.Repo -> String -> [CommandParam] -> [(Field, String)] -> Annex (Maybe (FilePath, [CommandParam]))
git_annex_shell cs r command params fields git_annex_shell cs r command params fields
| not $ Git.repoIsUrl r = do | not $ Git.repoIsUrl r = do
shellopts <- getshellopts dir <- liftIO $ absPath (Git.repoPath r)
shellopts <- getshellopts dir
return $ Just (shellcmd, shellopts ++ fieldopts) return $ Just (shellcmd, shellopts ++ fieldopts)
| Git.repoIsSsh r = do | Git.repoIsSsh r = do
gc <- Annex.getRemoteGitConfig r gc <- Annex.getRemoteGitConfig r
u <- getRepoUUID r u <- getRepoUUID r
shellopts <- getshellopts shellopts <- getshellopts (Git.repoPath r)
let sshcmd = unwords $ let sshcmd = unwords $
fromMaybe shellcmd (remoteAnnexShell gc) fromMaybe shellcmd (remoteAnnexShell gc)
: map shellEscape (toCommand shellopts) ++ : map shellEscape (toCommand shellopts) ++
@ -58,9 +59,8 @@ git_annex_shell cs r command params fields
Just <$> toRepo cs r gc sshcmd Just <$> toRepo cs r gc sshcmd
| otherwise = return Nothing | otherwise = return Nothing
where where
dir = Git.repoPath r
shellcmd = "git-annex-shell" shellcmd = "git-annex-shell"
getshellopts = do getshellopts dir = do
debugenabled <- Annex.getRead Annex.debugenabled debugenabled <- Annex.getRead Annex.debugenabled
debugselector <- Annex.getRead Annex.debugselector debugselector <- Annex.getRead Annex.debugselector
let params' = case (debugenabled, debugselector) of let params' = case (debugenabled, debugselector) of

View file

@ -25,8 +25,12 @@ module Types.GitConfig (
RemoteGitConfigField(..), RemoteGitConfigField(..),
remoteGitConfigKey, remoteGitConfigKey,
proxyInheritedFields, proxyInheritedFields,
MkRemoteConfigKey,
mkRemoteConfigKey,
) where ) where
import Debug.Trace
import Common import Common
import qualified Git import qualified Git
import qualified Git.Config import qualified Git.Config
@ -55,6 +59,7 @@ import Utility.StatelessOpenPGP (SOPCmd(..), SOPProfile(..))
import Utility.ThreadScheduler (Seconds(..)) import Utility.ThreadScheduler (Seconds(..))
import Utility.Url (Scheme, mkScheme) import Utility.Url (Scheme, mkScheme)
import Network.Socket (PortNumber) import Network.Socket (PortNumber)
import P2P.Http.Url
import Control.Concurrent.STM import Control.Concurrent.STM
import qualified Data.Set as S import qualified Data.Set as S
@ -104,6 +109,7 @@ data GitConfig = GitConfig
, annexSyncMigrations :: Bool , annexSyncMigrations :: Bool
, annexDebug :: Bool , annexDebug :: Bool
, annexDebugFilter :: Maybe String , annexDebugFilter :: Maybe String
, annexUrl :: Maybe String
, annexWebOptions :: [String] , annexWebOptions :: [String]
, annexYoutubeDlOptions :: [String] , annexYoutubeDlOptions :: [String]
, annexYoutubeDlCommand :: Maybe String , annexYoutubeDlCommand :: Maybe String
@ -199,6 +205,7 @@ extractGitConfig configsource r = GitConfig
, annexSyncMigrations = getbool (annexConfig "syncmigrations") True , annexSyncMigrations = getbool (annexConfig "syncmigrations") True
, annexDebug = getbool (annexConfig "debug") False , annexDebug = getbool (annexConfig "debug") False
, annexDebugFilter = getmaybe (annexConfig "debugfilter") , annexDebugFilter = getmaybe (annexConfig "debugfilter")
, annexUrl = getmaybe (annexConfig "url")
, annexWebOptions = getwords (annexConfig "web-options") , annexWebOptions = getwords (annexConfig "web-options")
, annexYoutubeDlOptions = getwords (annexConfig "youtube-dl-options") , annexYoutubeDlOptions = getwords (annexConfig "youtube-dl-options")
, annexYoutubeDlCommand = getmaybe (annexConfig "youtube-dl-command") , annexYoutubeDlCommand = getmaybe (annexConfig "youtube-dl-command")
@ -395,6 +402,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexClusterNode :: Maybe [RemoteName] , remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteAnnexClusterGateway :: [ClusterUUID] , remoteAnnexClusterGateway :: [ClusterUUID]
, remoteUrl :: Maybe String , remoteUrl :: Maybe String
, remoteAnnexP2PHttpUrl :: Maybe P2PHttpUrl
{- These settings are specific to particular types of remotes {- These settings are specific to particular types of remotes
- including special remotes. -} - including special remotes. -}
@ -487,12 +495,17 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexClusterGateway = fromMaybe [] $ , remoteAnnexClusterGateway = fromMaybe [] $
(mapMaybe (mkClusterUUID . toUUID) . words) (mapMaybe (mkClusterUUID . toUUID) . words)
<$> getmaybe ClusterGatewayField <$> getmaybe ClusterGatewayField
, remoteUrl = , remoteUrl = traceShow (mkRemoteConfigKey remotename (remoteGitConfigKey UrlField)) $
case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b) Just (ConfigValue b)
| B.null b -> Nothing | B.null b -> Nothing
| otherwise -> Just (decodeBS b) | otherwise -> Just (decodeBS b)
_ -> Nothing _ -> Nothing
, remoteAnnexP2PHttpUrl =
case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey AnnexUrlField)) r of
Just (ConfigValue b) ->
parseP2PHttpUrl (decodeBS b)
_ -> Nothing
, remoteAnnexShell = getmaybe ShellField , remoteAnnexShell = getmaybe ShellField
, remoteAnnexSshOptions = getoptions SshOptionsField , remoteAnnexSshOptions = getoptions SshOptionsField
, remoteAnnexRsyncOptions = getoptions RsyncOptionsField , remoteAnnexRsyncOptions = getoptions RsyncOptionsField
@ -527,8 +540,8 @@ extractRemoteGitConfig r remotename = do
getmaybe' :: RemoteGitConfigField -> Maybe ConfigValue getmaybe' :: RemoteGitConfigField -> Maybe ConfigValue
getmaybe' f = getmaybe' f =
let k = remoteGitConfigKey f let k = remoteGitConfigKey f
in Git.Config.getMaybe (remoteAnnexConfig remotename k) r in Git.Config.getMaybe (mkRemoteConfigKey remotename k) r
<|> Git.Config.getMaybe (annexConfig k) r <|> Git.Config.getMaybe (mkAnnexConfigKey k) r
getoptions k = fromMaybe [] $ words <$> getmaybe k getoptions k = fromMaybe [] $ words <$> getmaybe k
data RemoteGitConfigField data RemoteGitConfigField
@ -569,6 +582,7 @@ data RemoteGitConfigField
| ClusterNodeField | ClusterNodeField
| ClusterGatewayField | ClusterGatewayField
| UrlField | UrlField
| AnnexUrlField
| ShellField | ShellField
| SshOptionsField | SshOptionsField
| RsyncOptionsField | RsyncOptionsField
@ -594,86 +608,89 @@ data RemoteGitConfigField
| ExternalTypeField | ExternalTypeField
deriving (Enum, Bounded) deriving (Enum, Bounded)
remoteGitConfigField :: RemoteGitConfigField -> (UnqualifiedConfigKey, ProxyInherited) remoteGitConfigField :: RemoteGitConfigField -> (MkRemoteConfigKey, ProxyInherited)
remoteGitConfigField = \case remoteGitConfigField = \case
-- Hard to know the true cost of accessing eg a slow special -- Hard to know the true cost of accessing eg a slow special
-- remote via the proxy. The cost of the proxy is the best guess -- remote via the proxy. The cost of the proxy is the best guess
-- so do inherit it. -- so do inherit it.
CostField -> inherited "cost" CostField -> inherited True "cost"
CostCommandField -> inherited "cost-command" CostCommandField -> inherited True "cost-command"
IgnoreField -> inherited "ignore" IgnoreField -> inherited True "ignore"
IgnoreCommandField -> inherited "ignore-command" IgnoreCommandField -> inherited True "ignore-command"
SyncField -> inherited "sync" SyncField -> inherited True "sync"
SyncCommandField -> inherited "sync-command" SyncCommandField -> inherited True "sync-command"
PullField -> inherited "pull" PullField -> inherited True "pull"
PushField -> inherited "push" PushField -> inherited True "push"
ReadOnlyField -> inherited "readonly" ReadOnlyField -> inherited True "readonly"
CheckUUIDField -> uninherited "checkuuid" CheckUUIDField -> uninherited True "checkuuid"
VerifyField -> inherited "verify" VerifyField -> inherited True "verify"
TrackingBranchField -> uninherited "tracking-branch" TrackingBranchField -> uninherited True "tracking-branch"
ExportTrackingField -> uninherited "export-tracking" ExportTrackingField -> uninherited True "export-tracking"
TrustLevelField -> uninherited "trustlevel" TrustLevelField -> uninherited True "trustlevel"
StartCommandField -> uninherited "start-command" StartCommandField -> uninherited True "start-command"
StopCommandField -> uninherited "stop-command" StopCommandField -> uninherited True "stop-command"
SpeculatePresentField -> inherited "speculate-present" SpeculatePresentField -> inherited True "speculate-present"
BareField -> inherited "bare" BareField -> inherited True "bare"
RetryField -> inherited "retry" RetryField -> inherited True "retry"
ForwardRetryField -> inherited "forward-retry" ForwardRetryField -> inherited True "forward-retry"
RetryDelayField -> inherited "retrydelay" RetryDelayField -> inherited True "retrydelay"
StallDetectionField -> inherited "stalldetection" StallDetectionField -> inherited True "stalldetection"
StallDetectionUploadField -> inherited "stalldetection-upload" StallDetectionUploadField -> inherited True "stalldetection-upload"
StallDetectionDownloadField -> inherited "stalldetection-download" StallDetectionDownloadField -> inherited True "stalldetection-download"
BWLimitField -> inherited "bwlimit" BWLimitField -> inherited True "bwlimit"
BWLimitUploadField -> inherited "bwlimit-upload" BWLimitUploadField -> inherited True "bwlimit-upload"
BWLimitDownloadField -> inherited "bwlimit-upload" BWLimitDownloadField -> inherited True "bwlimit-upload"
UUIDField -> uninherited "uuid" UUIDField -> uninherited True "uuid"
ConfigUUIDField -> uninherited "config-uuid" ConfigUUIDField -> uninherited True "config-uuid"
SecurityAllowUnverifiedDownloadsField -> inherited "security-allow-unverified-downloads" SecurityAllowUnverifiedDownloadsField -> inherited True "security-allow-unverified-downloads"
MaxGitBundlesField -> inherited "max-git-bundles" MaxGitBundlesField -> inherited True "max-git-bundles"
AllowEncryptedGitRepoField -> inherited "allow-encrypted-gitrepo" AllowEncryptedGitRepoField -> inherited True "allow-encrypted-gitrepo"
-- Allow proxy chains. -- Allow proxy chains.
ProxyField -> inherited "proxy" ProxyField -> inherited True "proxy"
ProxiedByField -> uninherited "proxied-by" ProxiedByField -> uninherited True "proxied-by"
ClusterNodeField -> uninherited "cluster-node" ClusterNodeField -> uninherited True "cluster-node"
ClusterGatewayField -> uninherited "cluster-gateway" ClusterGatewayField -> uninherited True "cluster-gateway"
UrlField -> uninherited "url" UrlField -> uninherited False "url"
ShellField -> inherited "shell" AnnexUrlField -> inherited False "annexurl"
SshOptionsField -> inherited "ssh-options" ShellField -> inherited True "shell"
RsyncOptionsField -> inherited "rsync-options" SshOptionsField -> inherited True "ssh-options"
RsyncDownloadOptionsField -> inherited "rsync-download-options" RsyncOptionsField -> inherited True "rsync-options"
RsyncUploadOptionsField -> inherited "rsync-upload-options" RsyncDownloadOptionsField -> inherited True "rsync-download-options"
RsyncTransportField -> inherited "rsync-transport" RsyncUploadOptionsField -> inherited True "rsync-upload-options"
GnupgOptionsField -> inherited "gnupg-options" RsyncTransportField -> inherited True "rsync-transport"
GnupgDecryptOptionsField -> inherited "gnupg-decrypt-options" GnupgOptionsField -> inherited True "gnupg-options"
SharedSOPCommandField -> inherited "shared-sop-command" GnupgDecryptOptionsField -> inherited True "gnupg-decrypt-options"
SharedSOPProfileField -> inherited "shared-sop-profile" SharedSOPCommandField -> inherited True "shared-sop-command"
RsyncUrlField -> uninherited "rsyncurl" SharedSOPProfileField -> inherited True "shared-sop-profile"
BupRepoField -> uninherited "buprepo" RsyncUrlField -> uninherited True "rsyncurl"
BorgRepoField -> uninherited "borgrepo" BupRepoField -> uninherited True "buprepo"
TahoeField -> uninherited "tahoe" BorgRepoField -> uninherited True "borgrepo"
BupSplitOptionsField -> uninherited "bup-split-options" TahoeField -> uninherited True "tahoe"
DirectoryField -> uninherited "directory" BupSplitOptionsField -> uninherited True "bup-split-options"
AndroidDirectoryField -> uninherited "androiddirectory" DirectoryField -> uninherited True "directory"
AndroidSerialField -> uninherited "androidserial" AndroidDirectoryField -> uninherited True "androiddirectory"
GCryptField -> uninherited "gcrypt" AndroidSerialField -> uninherited True "androidserial"
GitLFSField -> uninherited "git-lfs" GCryptField -> uninherited True "gcrypt"
DdarRepoField -> uninherited "ddarrepo" GitLFSField -> uninherited True "git-lfs"
HookTypeField -> uninherited "hooktype" DdarRepoField -> uninherited True "ddarrepo"
ExternalTypeField -> uninherited "externaltype" HookTypeField -> uninherited True "hooktype"
ExternalTypeField -> uninherited True "externaltype"
where where
inherited f = (f, ProxyInherited True) inherited True f = (MkRemoteAnnexConfigKey f, ProxyInherited True)
uninherited f = (f, ProxyInherited False) inherited False f = (MkRemoteConfigKey f, ProxyInherited True)
uninherited True f = (MkRemoteAnnexConfigKey f, ProxyInherited False)
uninherited False f = (MkRemoteConfigKey f, ProxyInherited False)
newtype ProxyInherited = ProxyInherited Bool newtype ProxyInherited = ProxyInherited Bool
-- All remote config fields that are inherited from a proxy. -- All remote config fields that are inherited from a proxy.
proxyInheritedFields :: [UnqualifiedConfigKey] proxyInheritedFields :: [MkRemoteConfigKey]
proxyInheritedFields = proxyInheritedFields =
map fst $ map fst $
filter (\(_, ProxyInherited p) -> p) $ filter (\(_, ProxyInherited p) -> p) $
map remoteGitConfigField [minBound..maxBound] map remoteGitConfigField [minBound..maxBound]
remoteGitConfigKey :: RemoteGitConfigField -> UnqualifiedConfigKey remoteGitConfigKey :: RemoteGitConfigField -> MkRemoteConfigKey
remoteGitConfigKey = fst . remoteGitConfigField remoteGitConfigKey = fst . remoteGitConfigField
notempty :: Maybe String -> Maybe String notempty :: Maybe String -> Maybe String
@ -685,13 +702,23 @@ dummyRemoteGitConfig :: IO RemoteGitConfig
dummyRemoteGitConfig = atomically $ dummyRemoteGitConfig = atomically $
extractRemoteGitConfig Git.Construct.fromUnknown "dummy" extractRemoteGitConfig Git.Construct.fromUnknown "dummy"
type UnqualifiedConfigKey = B.ByteString data MkRemoteConfigKey
= MkRemoteAnnexConfigKey B.ByteString
| MkRemoteConfigKey B.ByteString
mkRemoteConfigKey :: RemoteNameable r => r -> MkRemoteConfigKey -> ConfigKey
mkRemoteConfigKey r (MkRemoteAnnexConfigKey b) = remoteAnnexConfig r b
mkRemoteConfigKey r (MkRemoteConfigKey b) = remoteConfig r b
mkAnnexConfigKey :: MkRemoteConfigKey -> ConfigKey
mkAnnexConfigKey (MkRemoteAnnexConfigKey b) = annexConfig b
mkAnnexConfigKey (MkRemoteConfigKey b) = annexConfig b
annexConfigPrefix :: B.ByteString annexConfigPrefix :: B.ByteString
annexConfigPrefix = "annex." annexConfigPrefix = "annex."
{- A global annex setting in git config. -} {- A global annex setting in git config. -}
annexConfig :: UnqualifiedConfigKey -> ConfigKey annexConfig :: B.ByteString -> ConfigKey
annexConfig key = ConfigKey (annexConfigPrefix <> key) annexConfig key = ConfigKey (annexConfigPrefix <> key)
class RemoteNameable r where class RemoteNameable r where
@ -704,13 +731,13 @@ instance RemoteNameable RemoteName where
getRemoteName = id getRemoteName = id
{- A per-remote annex setting in git config. -} {- A per-remote annex setting in git config. -}
remoteAnnexConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey remoteAnnexConfig :: RemoteNameable r => r -> B.ByteString -> ConfigKey
remoteAnnexConfig r = remoteConfig r . remoteAnnexConfigEnd remoteAnnexConfig r = remoteConfig r . remoteAnnexConfigEnd
remoteAnnexConfigEnd :: UnqualifiedConfigKey -> UnqualifiedConfigKey remoteAnnexConfigEnd :: B.ByteString -> B.ByteString
remoteAnnexConfigEnd key = "annex-" <> key remoteAnnexConfigEnd key = "annex-" <> key
{- A per-remote setting in git config. -} {- A per-remote setting in git config. -}
remoteConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey remoteConfig :: RemoteNameable r => r -> B.ByteString -> ConfigKey
remoteConfig r key = ConfigKey $ remoteConfig r key = ConfigKey $
"remote." <> encodeBS (getRemoteName r) <> "." <> key "remote." <> encodeBS (getRemoteName r) <> "." <> key

View file

@ -16,6 +16,7 @@ import qualified Data.UUID as U
import Data.Maybe import Data.Maybe
import Data.String import Data.String
import Data.ByteString.Builder import Data.ByteString.Builder
import Control.DeepSeq
import qualified Data.Semigroup as Sem import qualified Data.Semigroup as Sem
import Git.Types (ConfigValue(..)) import Git.Types (ConfigValue(..))
@ -28,6 +29,10 @@ import qualified Utility.SimpleProtocol as Proto
data UUID = NoUUID | UUID B.ByteString data UUID = NoUUID | UUID B.ByteString
deriving (Eq, Ord, Show, Read) deriving (Eq, Ord, Show, Read)
instance NFData UUID where
rnf NoUUID = ()
rnf (UUID b) = rnf b
class FromUUID a where class FromUUID a where
fromUUID :: UUID -> a fromUUID :: UUID -> a

3
debian/control vendored
View file

@ -85,6 +85,9 @@ Build-Depends:
libghc-git-lfs-dev (>= 1.2.0), libghc-git-lfs-dev (>= 1.2.0),
libghc-criterion-dev, libghc-criterion-dev,
libghc-clock-dev, libghc-clock-dev,
libghc-servant-dev,
libghc-servant-server-dev,
libghc-servant-client-dev,
lsof [linux-any], lsof [linux-any],
ikiwiki, ikiwiki,
libimage-magick-perl, libimage-magick-perl,

View file

@ -114,8 +114,10 @@ the client sends:
LOCKCONTENT Key LOCKCONTENT Key
The server responds with either SUCCESS or FAILURE. The server responds with either SUCCESS or FAILURE.
The former indicates the content is locked. It will remain The former indicates the content is locked.
locked until the client sends:
After SUCCESS, the content will remain locked until the
client sends its next message, which must be:
UNLOCKCONTENT Key UNLOCKCONTENT Key
@ -182,7 +184,7 @@ whitespace.)
The server may respond with ALREADY-HAVE if it already The server may respond with ALREADY-HAVE if it already
had the content of that key. had the content of that key.
In protocol version 2, the server can optionally reply with In protocol version 2 and above, the server can optionally reply with
ALREADY-HAVE-PLUS. The subsequent list of UUIDs are additional ALREADY-HAVE-PLUS. The subsequent list of UUIDs are additional
UUIDs where the content is stored, in addition to the UUID where UUIDs where the content is stored, in addition to the UUID where
the client was going to send it. the client was going to send it.
@ -197,9 +199,9 @@ the client to start. This allows resuming transfers.
The client then sends a DATA message with content of the file from The client then sends a DATA message with content of the file from
the offset to the end of file. the offset to the end of file.
In protocol version 1, after the data, the client sends an additional In protocol version 1 and above, after the data, the client sends an
message, to indicate if the content of the file has changed while it additional message, to indicate if the content of the file has changed
was being sent. while it was being sent.
INVALID INVALID
VALID VALID
@ -207,8 +209,8 @@ was being sent.
If the server successfully receives the data and stores the content, If the server successfully receives the data and stores the content,
it replies with SUCCESS. Otherwise, FAILURE. it replies with SUCCESS. Otherwise, FAILURE.
In protocol version 2, the server can optionally reply with SUCCESS-PLUS In protocol version 2 and above, the server can optionally reply with
and a list of UUIDs where the content was stored. SUCCESS-PLUS and a list of UUIDs where the content was stored.
## Getting content from the server ## Getting content from the server
@ -223,7 +225,7 @@ See description of AssociatedFile above.
The server then sends a DATA message with the content of the file The server then sends a DATA message with the content of the file
from the offset to end of file. from the offset to end of file.
In protocol version 1, after the data, the server sends an additional In protocol version 1 and above, after the data, the server sends an additional
message, to indicate if the content of the file has changed while it message, to indicate if the content of the file has changed while it
was being sent. was being sent.

View file

@ -1,153 +1,437 @@
[[!toc ]] [[!toc ]]
## motivation ## introduction
The [[P2P protocol]] is a custom protocol that git-annex speaks over a ssh The [[P2P protocol]] is a custom protocol that git-annex speaks over a ssh
connection (mostly). This is a design working on supporting the P2P connection (mostly). This is a translation of that protocol to HTTP.
protocol over HTTP.
Upload of annex objects to git remotes that use http is currently not [[git-annex-p2phttp]] serves this protocol.
supported by git-annex, and this would be a generally very useful addition.
For use cases such as OpenNeuro's javascript client, ssh is too difficult To indicate that an url uses this protocol, use
to support, so they currently use a special remote that talks to a http `annex+http` or `annex+https` as the url scheme. Such an url uses
endpoint in order to upload objects. Implementing this would let them port 9417 by default, although another port can be specified.
talk to git-annex over http. For example, "annex+http://example.com/git-annex/"
With the [[passthrough_proxy]], this would let clients configure a single ## base64 encoding of keys, uuids, and filenames
http remote that accesses a more complicated network of git-annex
repositories.
## integration with git A git-annex key can contain text in any encoding. So can a filename,
and it's even possible, though unlikely, that the UUID of a git-annex
repository might.
A webserver that is configured to serve a git repository either serves the But this protocol requires that UTF-8 be used throughout, except
files in the repository with dumb http, or uses the git-http-backend CGI where bodies use `Content-Type: application/octet-stream`.
program for url paths under eg `/git/`.
To integrate with that, git-annex would need a git-annex-http-backend CGI So this protocol allows using
program, that the webserver is configured to run for url paths under [base64url](https://datatracker.ietf.org/doc/html/rfc4648#section-5)
`/git/.*/annex/`. encoding for such values. Any key, filename, or UUID wrapped in square
brackets is a base64url encoded value.
For example, "[Zm9v]" is the same as "foo".
So, for a remote with an url `http://example.com/git/foo`, git-annex would A filename like "[foo]" will need to itself be encoded that way: "[W2Zvb10=]"
use paths under `http://example.com/git/foo/annex/` to run its CGI.
But, the CGI interface is a poor match for the P2P protocol. ## authentication
A particular problem is that `LOCKCONTENT` would need to be in one CGI Some requests need authentication. Which requests do depends on the
request, followed by another request to `UNLOCKCONTENT`. Unless configuration of the HTTP server. When a request needs authentication,
git-annex-http-backend forked a daemon to keep the content locked, it would it will fail with 401 Unauthorized.
not be able to retain a file lock across the 2 requests. While the 10
minute retention lock would paper over that, UNLOCKCONTENT would not be
able to delete the retention lock, because there is no way to know if
another LOCKCONTENT was received later. So LOCKCONTENT would always lock
content for 10 minutes. Which would result in some undesirable behaviors.
Another problem is with proxies and clusters. The CGI would need to open Authentication is done using HTTP basic auth. The realm to use when
ssh (or http) connections to the proxied repositories and cluster nodes authenticating is "git-annex". The charset is UTF-8.
each time it is run. That would add a lot of latency to every request.
And running a git-annex process once per CGI request also makes git-annex's When authentication is successful but does not allow a request to be
own startup speed, which is ok but not great, add latency. And each time performed, it will fail with 403 Forbidden.
the CGI needed to change the git-annex branch, it would have to commit on
shutdown. Lots of time and space optimisations would be prevented by using
the CGI interface.
So, rather than having the CGI program do anything in the repository Note that HTTP basic auth is not encrypted so is only secure when used
itself, have it pass each request through to a long-running server. over HTTPS.
(This does have the downside that files would get double-copied
through the CGI, which adds some overhead.)
A reasonable way to do that would be to have a webserver speaking a
HTTP version of the git-annex P2P protocol and the CGI just talks to that.
The CGI program then becomes tiny, and just needs to know the url to ## protocol version
connect to the git-annex HTTP server.
Alternatively, a remote's configuration could include that url, and Requests are versioned. The versions correspond to
then we don't need the complication and overhead of the CGI program at all. P2P protocol versions. The version is part of the request path,
Eg: eg "v3"
git config remote.origin.annex-url http://example.com:8080/ If the server does not support a particular protocol version, the
request will fail with a 404, and the client should fall
back to an earlier protocol version.
So, the rest of this design will focus on implementing that. The CGI ## common request parameters
program can be added later if desired, so avoid users needing to configure
an additional thing.
Note that, one nice benefit of having a separate annex-url is it allows Every request supports this parameter, and unless documented
having remote.origin.url on eg github, but with an annex-url configured otherwise, it is required to be included.
that remote can also be used as a git-annex repository.
## approach 1: websockets * `clientuuid`
The client connects to the server over a websocket. From there on, The value is the UUID of the git-annex repository of the client.
the protocol is encapsulated in websockets.
This seems nice and simple to implement, but not very web native. Anyone Any request may also optionally include these parameters:
wanting to talk to this web server would need to understand the P2P
protocol. Just to upload a file would need to deal with AUTH,
AUTH-SUCCESS, AUTH-FAILURE, VERSION, PUT, ALREADY-HAVE, PUT-FROM, DATA,
INVALID, VALID, SUCCESS, and FAILURE messages. Seems like a lot.
Some requests like `LOCKCONTENT` do need full duplex communication like * `bypass`
websockets provide. But, it might be more web native to only use websockets
for that request, and not for everything.
## approach 2: web-native API The value is the UUID of a cluster gateway, which the server should avoid
connecting to when serving a cluster. This is the equivilant of the
`BYPASS` message in the [[P2P_Protocol]].
Another approach is to define a web-native API with endpoints that This parameter can be given multiple times to list several cluster
correspond to each action in the P2P protocol. gateway UUIDs.
Something like this: This parameter is only available for v2 and above.
> POST /git-annex/v1/AUTH?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.0 [Internally, git-annex can use these common parameters, plus the protocol
< AUTH-SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 version, and remote UUID, to create a P2P session. The P2P session is
driven through the AUTH, VERSION, and BYPASS messages, leaving the session
ready to service requests.]
> POST /git-annex/v1/CHECKPRESENT?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0 ## requests
> SUCCESS
> POST /git-annex/v1/PUT-FROM?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0 ### GET /git-annex/$uuid/key/$key
< PUT-FROM 0
> POST /git-annex/v1/PUT?key=SHA1--foo&associatedfile=bar&put-from=0&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0 This is a simple, unversioned interface to get the content of a key
> Content-Type: application/octet-stream from a repository.
> Content-Length: 20
> foo
> {"valid": true}
< {"stored": true}
(In the last example above "foo" is the content, it is followed by a line of json. It is not part of the P2P protocol per se, but is provided to let
This seems better than needing an entire other request to indicate validitity.) other clients than git-annex easily download the content of keys from the
http server.
This needs a more complex spec. But it's easier for others to implement, When the key is not present on the server, it will respond
especially since it does not need a session identifier, so the HTTP server can with 404 Not Found.
be stateless.
A full draft protocol for this is being developed at [[p2p_protocol_over_http/draft1]]. Note that the common parameters bypass and clientuuid, while
accepted, have no effect. Both are optional for this request.
## HTTP GET ### GET /git-annex/$uuid/v3/key/$key
It should be possible to support a regular HTTP get of a key, with Get the content of a key from the repository with the specified uuid.
no additional parameters, so that annex objects can be served to other clients
from this web server.
> GET /git-annex/key/SHA1--foo HTTP/1.0 Example:
> GET /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/key/SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< X-git-annex-data-length: 3
< Content-Type: application/octet-stream
<
< foo < foo
Although this would be a special case, not used by git-annex, because the P2P All parameters are optional, including the common parameters, and these:
protocol's GET has the complication of offsets, and of the server sending
VALID/INVALID after the content, and of needing to know the client's UUID in
order to update the location log.
## Problem: CONNECT * `associatedfile`
The CONNECT message allows both sides of the P2P protocol to send DATA The name of a file in the git repository, for informational purposes
messages in any order. This seems difficult to encapsulate in HTTP. only.
Probably this can be not implemented, it's probably not needed for a HTTP * `offset`
remote? This is used to tunnel git protocol over the P2P protocol, but for
a HTTP remote the git repository can be accessed over HTTP as well.
## security Number of bytes to skip sending from the beginning of the file.
Should support HTTPS and/or be limited to only HTTPS. Request headers are currently ignored, so eg Range requests are
not supported. (This would be possible to implement, up to a point.)
Authentication via http basic auth? The body of the request is empty.
The server's response will have a `Content-Type` header of
`application/octet-stream`.
The server's response will have a `X-git-annex-data-length`
header that indicates the number of bytes of content that are expected to
be sent. Note that there is no Content-Length header.
The body of the response is the content of the key.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
When the content is not present, the server will respond with
422 Unprocessable Content.
### GET /git-annex/$uuid/v2/key/$key
Identical to v3.
### GET /git-annex/$uuid/v1/key/$key
Identical to v3.
### GET /git-annex/$uuid/v0/key/$key
Same as v3, except the X-git-annex-data-length header is not used.
Additional checking client-side will be required to validate the data.
### POST /git-annex/$uuid/v3/checkpresent
Checks if a key is currently present on the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/checkpresent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"present": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "present" field that is true
if the key is present, or false if it is not present.
### POST /git-annex/$uuid/v2/checkpresent
Identical to v3.
### POST /git-annex/$uuid/v1/checkpresent
Identical to v3.
### POST /git-annex/$uuid/v0/checkpresent
Identical to v3.
### POST /git-annex/$uuid/v3/lockcontent
Locks the content of a key on the server, preventing it from being removed.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/lockcontent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"locked": true, "lockid": "foo"}
There is one required additional parameter, `key`.
The server will reply with `{"locked": true}` if it was able
to lock the key, or `{"locked": false}` if it was not.
The key will remain locked for 10 minutes. But, usually `keeplocked`
is used to control the lifetime of the lock, using the "lockid"
parameter from the server's reply. (See below.)
### POST /git-annex/$uuid/v2/lockcontent
Identical to v3.
### POST /git-annex/$uuid/v1/lockcontent
Identical to v3.
### POST /git-annex/$uuid/v0/lockcontent
Identical to v3.
### POST /git-annex/$uuid/v3/keeplocked
Controls the lifetime of a lock on a key that was earlier obtained
with `lockcontent`.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/keeplocked?lockid=foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
> Connection: Keep-Alive
> Keep-Alive: timeout=1200
[some time later]
> {"unlock": true}
< {"locked": false}
There is one required additional parameter, `lockid`.
This uses long polling. So it's important to use
Connection and Keep-Alive headers.
This keeps an active lock from expiring until the client sends
`{"unlock": true}`, and then it immediately unlocks it.
The client can send `{"unlock": false}` any number of times first.
This has no effect, but may be useful to keep the connection alive.
This must be called within ten minutes of `lockcontent`, otherwise
the lock will have already expired when this runs. Note that this
does not indicate if the lock expired, it always returns
`{"locked": false}`.
If the connection is closed before the client sends `{"unlock": true},
or even if the web server gets shut down, the content will remain
locked for 10 minutes from the time it was first locked.
Note that the common parameters bypass and clientuuid, while
accepted, have no effect.
### POST /git-annex/$uuid/v2/keeplocked
Identical to v3.
### POST /git-annex/$uuid/v1/keeplocked
Identical to v3.
### POST /git-annex/$uuid/v0/keeplocked
Identical to v3.
### POST /git-annex/$uuid/v3/remove
Remove a key's content from the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/remove?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"removed": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "removed" field that is true
if the key was removed (or was not present on the server),
or false if the key was not able to be removed.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was removed from.
### POST /git-annex/$uuid/v2/remove
Identical to v3.
### POST /git-annex/$uuid/v1/remove
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/$uuid/v0/remove
Identical to v1.
## POST /git-annex/$uuid/v3/remove-before
Remove a key's content from the server, but only before a specified time.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/remove-before?timestamp=4949292929&key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"removed": true}
This is the same as the `remove` request, but with an additional parameter,
`timestamp`.
If the server's monotonic clock is past the specified timestamp, the
removal will fail and the server will respond with: `{"removed": false}`
This is used to avoid removing content after a point in
time where it is no longer locked in other repostitories.
## POST /git-annex/$uuid/v3/gettimestamp
Gets the current timestamp from the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/gettimestamp?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"timestamp": 59459392}
The body of the request is empty.
The server responds with JSON object with a timestmap field that has the
current value of its monotonic clock, as a number of seconds.
Important: If multiple servers are serving this protocol for the same
repository, they MUST all use the same monotonic clock.
### POST /git-annex/$uuid/v3/put
Store content on the server.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/put?key=SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
> Content-Type: application/octet-stream
> X-git-annex-data-length: 3
>
> foo
< {"stored": true}
There is one required additional parameter, `key`.
There are are also these optional parameters:
* `associatedfile`
The name of a file in the git repository, for informational purposes
only.
* `offset`
Number of bytes that have been omitted from the beginning of the file.
Usually this will be determined by making a `putoffset` request.
The `Content-Type` header should be `application/octet-stream`.
The `X-git-annex-data-length` must be included. It indicates the number
of bytes of content that are expected to be sent.
Note that there is no need to send a Content-Length header.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
The server responds with a JSON object with a field "stored"
that is true if it received the data and stored the content.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was stored to.
### POST /git-annex/$uuid/v2/put
Identical to v3.
### POST /git-annex/$uuid/v1/put
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/$uuid/v0/put
Same as v1, except additional checking is done to validate the data.
### POST /git-annex/$uuid/v3/putoffset
Asks the server what `offset` can be used in a `put` of a key.
This should usually be used right before sending a `put` request.
The offset may not be valid after some point in time, which could result in
the `put` request failing.
Example:
> POST /git-annex/ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6/v3/putoffset?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.1
< {"offset": 10}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with an "offset" field that
is the largest allowable offset.
If the server already has the content of the key, it will respond instead
with a JSON object with an "alreadyhave" field that is set to true. This JSON
object may also have a field "plusuuids" that lists
the UUIDs of other repositories where the content is stored, in addition to
the serveruuid.
[Implementation note: This will be implemented by sending `PUT` and
returning the `PUT-FROM` offset. To avoid leaving the P2P protocol stuck
part way through a `PUT`, a synthetic empty `DATA` followed by `INVALID`
will be used to get the P2P protocol back into a state where it will accept
any request.]
### POST /git-annex/$uuid/v2/putoffset
Identical to v3.
### POST /git-annex/$uuid/v1/putoffset
Same as v3, except the JSON will not include "plusuuids".
## parts of P2P protocol that are not supported over HTTP
`NOTIFYCHANGE` is not supported, but it would be possible to extend
this HTTP protocol to support it.
`CONNECT` is not supported, and due to the bi-directional message passing
nature of it, it cannot easily be done over HTTP (would need websockets).
It should not be necessary anyway, because the git repository itself can be
accessed over HTTP.

View file

@ -1,389 +0,0 @@
[[!toc ]]
Draft 1 of a complete [[P2P_protocol]] over HTTP.
## authentication
A git-annex protocol endpoint can optionally operate in readonly mode without
authentication.
Authentication is required to make any changes.
Authentication is done using HTTP basic auth.
The user is recommended to only authenticate over HTTPS, since otherwise
HTTP basic auth (as well as git-annex data) can be snooped. But some users
may want git-annex to use HTTP in eg a LAN.
## protocol version
Each request in the protocol is versioned. The versions correspond
to P2P protocol versions.
The protocol version comes before the request. Eg: `/git-annex/v3/put`
If the server does not support a particular protocol version, the
request will fail with a 404, and the client should fall back to an earlier
protocol version.
## common request parameters
Every request supports these common parameters, and unless documented
otherwise, a request requires both of them to be included.
* `clientuuid`
The value is the UUID of the git-annex repository of the client.
* `serveruuid`
The value is the UUID of the git-annex repository that the server
should serve.
Any request may also optionally include these parameters:
* `bypass`
The value is the UUID of a cluster gateway, which the server should avoid
connecting to when serving a cluster. This is the equivilant of the
`BYPASS` message in the [[P2P_Protocol]].
This parameter can be given multiple times to list several cluster
gateway UUIDs.
This parameter is only available for v3 and above.
[Internally, git-annex can use these common parameters, plus the protocol
version, to create a P2P session. The P2P session is driven through
the AUTH, VERSION, and BYPASS messages, leaving the session ready to
service requests.]
## requests
### GET /git-annex/key/$key
This is a simple, unversioned interface to get a key from the server.
It is not part of the P2P protocol per se, but is provided to let
other clients than git-annex easily download the content of keys from the
http server.
This behaves almost the same as `GET /git-annex/v3/key/$key`, although its
behavior may change in later versions.
When the key is not present on the server, this returns a 404 Not Found.
### GET /git-annex/v3/key/$key
Get the content of a key from the server.
This is designed so it can be used both by a peer in the P2P protocol,
and by a regular HTTP client that just wants to download a file.
Example:
> GET /git-annex/v3/key/SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< X-git-annex-data-length: 3
< Content-Type: application/octet-stream
<
< foo
The key to get is the part of the url after "/git-annex/vN/key/"
and before any url parameters.
All parameters are optional, including the common parameters, and these:
* `associatedfile`
The name of a file in the git repository, for informational purposes
only.
* `offset`
Number of bytes to skip sending from the beginning of the file.
Request headers are currently ignored, so eg Range requests are
not supported. (This would be possible to implement, up to a point.)
The body of the request is empty.
The server's response will have a `Content-Type` header of
`application/octet-stream`.
The server's response will have a `X-git-annex-data-length`
header that indicates the number of bytes of content that are expected to
be sent. Note that there is no Content-Length header.
The body of the response is the content of the key.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
When the content is not present, the server will respond with
422 Unprocessable Content.
### GET /git-annex/v2/key/$key
Identical to v3.
### GET /git-annex/v1/key/$key
Identical to v3.
### GET /git-annex/v0/key/$key
Same as v3, except there is no X-git-annex-data-length header.
Additional checking client-side will be required to validate the data.
### POST /git-annex/v3/checkpresent
Checks if a key is currently present on the server.
Example:
> POST /git-annex/v3/checkpresent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"present": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "present" field that is true
if the key is present, or false if it is not present.
### POST /git-annex/v2/checkpresent
Identical to v3.
### POST /git-annex/v1/checkpresent
Identical to v3.
### POST /git-annex/v0/checkpresent
Identical to v3.
### POST /git-annex/v3/lockcontent
Locks the content of a key on the server, preventing it from being removed.
Example:
> POST /git-annex/v3/lockcontent?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
[websocket protocol follows]
< SUCCESS
> UNLOCKCONTENT
There is one required additional parameter, `key`.
This request opens a websocket between the client and the server.
The server sends "SUCCESS" over the websocket once it has locked
the content. Or it sends "FAILURE" if it is unable to lock the content.
Once the server has sent "SUCCESS", the content remains locked
until the client sends "UNLOCKCONTENT" over the websocket.
If the client disconnects without sending "UNLOCKCONTENT", or the web
server gets shut down before it can receive that, the content will remain
locked for at least 10 minutes from when the server sent "SUCCESS".
### POST /git-annex/v2/lockcontent
Identical to v3.
### POST /git-annex/v1/lockcontent
Identical to v3.
### POST /git-annex/v0/lockcontent
Identical to v3.
### POST /git-annex/v3/remove
Remove a key's content from the server.
Example:
> POST /git-annex/v3/remove?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"removed": true}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with a "removed" field that is true
if the key was removed (or was not present on the server),
or false if the key was not able to be removed.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was removed from.
If the server does not allow removing the key due to a policy
(eg due to being read-only or append-only), it will respond with a JSON
object with an "error" field that has an error message as its value.
### POST /git-annex/v2/remove
Identical to v3.
### POST /git-annex/v1/remove
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/v0/remove
Identival to v1.
## POST /git-annex/v3/remove-before
Remove a key's content from the server, but only before a specified time.
Example:
> POST /git-annex/v3/remove-before?timestamp=4949292929&key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"removed": true}
This is the same as the `remove` request, but with an additional parameter,
`timestamp`.
If the server's monotonic clock is past the specified timestamp, the
removal will fail and the server will respond with: `{"removed": false}`
This is used to avoid removing content after a point in
time where it is no longer locked in other repostitories.
## POST /git-annex/v3/gettimestamp
Gets the current timestamp from the server.
Example:
> POST /git-annex/v3/gettimestamp?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"timestamp": 59459392}
The body of the request is empty.
The server responds with JSON object with a timestmap field that has the
current value of its monotonic clock, as a number of seconds.
Important: If multiple servers are serving this protocol for the same
repository, they MUST all use the same monotonic clock.
### POST /git-annex/v3/put
Store content on the server.
Example:
> POST /git-annex/v3/put?key=SHA1--foo&associatedfile=bar&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
> Content-Type: application/octet-stream
> X-git-annex-object-size: 3
>
> foo
< {"stored": true}
There is one required additional parameter, `key`.
There are are also these optional parameters:
* `associatedfile`
The name of a file in the git repository, for informational purposes
only.
* `offset`
Number of bytes that have been omitted from the beginning of the file.
Usually this will be determined by making a `putoffset` request.
The `Content-Type` header should be `application/octet-stream`.
The `X-git-annex-data-length` must be included. It indicates the number
of bytes of content that are expected to be sent.
Note that there is no need to send a Content-Length header.
If the length of the body is different than what the the
X-git-annex-data-length header indicated, then the data is invalid and
should not be used. This can happen when eg, the data was being sent from
an unlocked annexed file, which got modified while it was being sent.
The server responds with a JSON object with a field "stored"
that is true if it received the data and stored the
content.
The JSON object can have an additional field "plusuuids" that is a list of
UUIDs of other repositories that the content was stored to.
If the server does not allow storing the key due eg to a policy
(eg due to being read-only or append-only), or due to the data being
invalid, or because it ran out of disk space, it will respond with a
JSON object with an "error" field that has an error message as its value.
### POST /git-annex/v2/put
Identical to v3.
### POST /git-annex/v1/put
Same as v3, except the JSON will not include "plusuuids".
### POST /git-annex/v0/put
Same as v1, except there is no X-git-annex-data-length header.
Additional checking client-side will be required to validate the data.
### POST /git-annex/v3/putoffset
Asks the server what `offset` can be used in a `put` of a key.
This should usually be used right before sending a `put` request.
The offset may not be valid after some point in time, which could result in
the `put` request failing.
Example:
> POST /git-annex/v3/putoffset?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.1
< {"offset": 10}
There is one required additional parameter, `key`.
The body of the request is empty.
The server responds with a JSON object with an "offset" field that
is the largest allowable offset.
If the server already has the content of the key, it will respond with a
JSON object with an "alreadyhave" field that is set to true. This JSON
object may also have a field "plusuuids" that lists
the UUIDs of other repositories where the content is stored, in addition to
the serveruuid.
If the server does not allow storing the key due to a policy
(eg due to being read-only or append-only), it will respond with a JSON
object with an "error" field that has an error message as its value.
[Implementation note: This will be implemented by sending `PUT` and
returning the `PUT-FROM` offset. To avoid leaving the P2P protocol stuck
part way through a `PUT`, a synthetic empty `DATA` followed by `INVALID`
will be used to get the P2P protocol back into a state where it will accept
any request.]
### POST /git-annex/v2/putoffset
Identical to v3.
### POST /git-annex/v1/putoffset
Same as v3, except the JSON will not include "plusuuids".
## parts of P2P protocol that are not supported over HTTP
`NOTIFYCHANGE` is not supported, but it would be possible to extend
this HTTP protocol to support it.
`CONNECT` is not supported, and due to the bi-directional message passing
nature of it, it cannot easily be done over HTTP (would need websockets).
It should not be necessary anyway, because the git repository itself can be
accessed over HTTP.

View file

@ -565,26 +565,41 @@ Tentative design for exporttree=yes with proxies:
* Configure annex-tracking-branch for the proxy in the git-annex branch. * Configure annex-tracking-branch for the proxy in the git-annex branch.
(For the proxy as a whole, or for specific exporttree=yes repos behind (For the proxy as a whole, or for specific exporttree=yes repos behind
it?) it?)
* Then the user's workflow is simply: `git-annex push proxy` * Then the user's workflow is simply: `git-annex push`
* sync/push need to first push any updated annex-tracking-branch to the * sync/push need to first push any updated annex-tracking-branch to the
proxy before sending content to it. (Currently sync only pushes at the proxy before sending content to it. (Currently sync only pushes at the
end.) end.)
* If proxied remotes are all exporttree=yes, the proxy rejects any * If proxied remotes are all exporttree=yes, the proxy rejects any
transfers of a key that is not in the annex-tracking-branch that it puts of a key that is not in the annex-tracking-branch that it
currently knows about. If there is any other proxied remote, the proxy currently knows about.
can direct such transfers to it.
* Upon receiving a new annex-tracking-branch or any transfer of a key * Upon receiving a new annex-tracking-branch or any transfer of a key
used in the current annex-tracking-branch, the proxy can update used in the current annex-tracking-branch, the proxy can update
the exporttree=yes remotes. This needs to happen incrementally, the exporttree=yes remote. This needs to happen incrementally,
eg upon receiving a key, just proxy it on to the exporttree=yes remote, eg upon receiving a key, just proxy it on to the exporttree=yes remote,
and update the export database. Once all keys are received, update and update the export database. Once all keys are received, update
the git-annex branch to indicate a new tree has been exported. the git-annex branch to indicate a new tree has been exported.
* Upon receiving a git push of the annex-tracking-branch, a proxy might
be able to get all the changed objects from non-exporttree=yes proxied A difficulty is that a put of a key to a proxied exporttree=yes remote
remotes that contain them. If so it can update the exporttree=yes can remove another key from it. Eg, a new version of a file. Consider a
remote automatically and inexpensively. At the same time, a case where two files swapped content. The put of key B would drop
`git-annex push` will be attempting to send those same objects. key A that was stored in that file. Since the user's git-annex would not
So somehow the proxy will need to manage this situation. realize that, it would not upload key A again. So this would leave the
exporttree=yes remote without a cooy of key A until the git-annex branch is
synced and then the situation can be noticed. While doing renames first
would avoid this, [[todo/export_paired_rename_innefficenctcy]] is a
situation where it could still be a problem.
A similar difficulty is that a push of the annex-tracking-branch can
remove a file from the proxied exporttree=yes remote. If a second push
of the annex-tracking-branch adds the file back, but the git-annex branch
has not been fetched, it won't know that the file was removed, so it won't
try to send it, leaving the export incomplete.
A possibile solution to all of these problems would be to have a
.git/annex/objects directory in the exporttree=yes remove. Rather than
deleting any key from it, the proxy can mode a key into that directory.
(git-remote-annex already uses such a directory for storing its keys on
exporttree=yes remotes).
## possible enhancement: indirect uploads ## possible enhancement: indirect uploads

153
doc/git-annex-p2phttp.mdwn Normal file
View file

@ -0,0 +1,153 @@
# NAME
git-annex-p2phttp - HTTP server for the git-annex API
# SYNOPSIS
git-annex p2phttp
# DESCRIPTION
This is a HTTP server for the git-annex API.
It is the git-annex equivilant of git-http-backend(1), for serving
a repository over HTTP with write access for authenticated users.
This does not serve the git repository over HTTP, only the git-annex
API.
Typically a remote will have `remote.name.url` set to a http url
as usual, and `remote.name.annexUrl` set to an annex+http url such as
"annex+http://example.com/git-annex/". The annex+http url is
served by this server, and uses port 9417 by default.
As well as serving the git-annex HTTP API, this server provides a
convenient way to download the content of any key, by using the path
"/git-annex/$uuid/$key". For example:
$ curl http://example.com:9417/git-annex/f11773f0-11e1-45b2-9805-06db16768efe/key/SHA256E-s6--5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03
hello
# OPTIONS
* `--jobs=N` `-JN`
This or annex.jobs must be set to configure the number of worker
threads that serve connections to the webserver.
Since the webserver itself also uses one of these threads,
this needs to be set to 2 or more.
A good choice is often one worker per CPU core: `--jobs=cpus`
* `--proxyconnections=N`
When this command is run in a repository that is configured to act as a
proxy for some of its remotes, this is the maximum number of idle
connections to keep open to proxied remotes.
The default is 1.
* `--clusterjobs=N`
When this command is run in a repository that is a gateway for a cluster,
this is the number of concurrent jobs to use to access nodes of the
cluster, per connection to the webserver.
The default is 1.
A good choice for this will be a balance between the number of nodes
in the cluster and the value of `--jobs`.
For example, if the cluster has 4 nodes, and `--jobs=4`, using
`--clusterjobs=4` will make all nodes in the cluster be accessed
concurrently, which is often optimal. But around 20 cores can be needed
when the webserver is busy.
* `--port=N`
Port to listen on. The default is port 9417, which is the default
port used for an annex+http or annex+https url.
It is not recommended to run this command as root in order to
use a low port like port 80. It will not drop permissions when run as
root.
* `--bind=address`
What address to bind to. The default is to bind to all addresses.
* `--certfile=filename`
TLS certificate file to use. Combining this with `--privatekeyfile`
makes the server use HTTPS.
* `--privatekeyfile=filename`
TLS private key file to use. Combining this with `--certfile`
makes the server use HTTPS.
* `--chainfile=filename`
TLS chain file to use. This option can be repeated any number of times.
* `--authenv`
Allows users to be authenticated with a username and password.
For security, this only allows authentication when the user connects over
HTTPS.
To configure the passwords, set environment variables
like `GIT_ANNEX_P2PHTTP_PASSWORD_alice=foo123`
The permissions of users can also be configured by setting
environment variables like
`GIT_ANNEX_P2PHTTP_PERMISSIONS_alice=readonly`. The value
can be either "readonly" or "appendonly". When this is not set,
the default is to give the user full read+write+remove access.
* `--authenv-http`
Like `--authenv`, but allows authentication when the user connects
over HTTP. This is not secure, since HTTP basic authentication is not
encrypted.
* `--unauth-readonly`
Allows unauthenticated users to read the repository, but not make
modifications to it.
* `--unauth-appendonly`
Allows unauthenticated users to read the repository, and store data in
it, but not remove data from it.
* `--wideopen`
Gives unauthenticated users full read+write+remove access to the
repository.
Please think carefully before enabling this option.
# SEE ALSO
[[git-annex]](1)
git-http-backend(1)
[[git-annex-shell]](1)
[[git-annex-updateproxy]](1)
[[git-annex-initcluster]](1)
[[git-annex-updatecluster]](1)
<https://git-annex.branchable.com/design/p2p_protocol_over_http/>
# AUTHOR
Joey Hess <id@joeyh.name>
<http://git-annex.branchable.com/>
Warning: Automatically converted into a man page by mdwn2man. Edit with care

View file

@ -26,7 +26,7 @@ it. Then after pulling from "work", git-annex will know about an
additional remote, "work-foo". That remote will be accessed using "work" as additional remote, "work-foo". That remote will be accessed using "work" as
a proxy. a proxy.
Proxies can only be accessed via ssh. Proxies can only be accessed via ssh or by an annex+http url.
# OPTIONS # OPTIONS

View file

@ -212,6 +212,13 @@ content from the key-value store.
See [[git-annex-webapp]](1) for details. See [[git-annex-webapp]](1) for details.
* `p2phttp`
Allows a git-annex repository to be accessed over HTTP using git-annex
p2p protocol.
See [[git-annex-p2phttp]](1) for details.
* `remotedaemon` * `remotedaemon`
Persistant communication with remotes. Persistant communication with remotes.
@ -1235,6 +1242,13 @@ repository, using [[git-annex-config]]. See its man page for a list.)
After changing this config, you need to re-run `git-annex init` for it After changing this config, you need to re-run `git-annex init` for it
to take effect. to take effect.
* `annex.url`
When a remote has a http url, the first time git-annex uses the remote
it will check if it can download its `.git/config` file. If it is able
to, and the file has this config set to an "annex+http" or "annex+https"
url, that url will be copied into `remote.<name>.annexUrl`.
* `annex.resolvemerge` * `annex.resolvemerge`
Set to false to prevent merge conflicts in the checked out branch Set to false to prevent merge conflicts in the checked out branch
@ -1530,9 +1544,14 @@ Remotes are configured using these settings in `.git/config`.
* `remote.<name>.annexUrl` * `remote.<name>.annexUrl`
Can be used to specify a different url than the regular `remote.<name>.url` Can be used to specify a different url than the regular `remote.<name>.url`
for git-annex to use when talking with the remote. Similar to the `pushUrl` for git-annex to use for accessing the remote. Similar to the `pushUrl`
used by git-push. used by git-push.
When this is set to an annex+http or annex+https url, that url is used
for git-annex operations only, and the `remote.<name>.url` is used for
git operations. This allows using [[git-annex-p2phttp]] to serve a
git-annex repository over http.
* `remote.<name>.annex-uuid` * `remote.<name>.annex-uuid`
git-annex caches UUIDs of remote repositories here. git-annex caches UUIDs of remote repositories here.

View file

@ -12,8 +12,8 @@ special remotes.
## using a cluster ## using a cluster
To use a cluster, your repository needs to have its gateway configured as a To use a cluster, your repository needs to have its gateway configured as a
remote. Clusters can currently only be accessed via ssh. This gateway remote. Clusters can currently only be accessed via ssh or by a annex+http
remote is added the same as any other git remote: url. This gateway remote is added the same as any other git remote:
$ git remote add bigserver me@bigserver:annex $ git remote add bigserver me@bigserver:annex
@ -121,6 +121,9 @@ in the git-annex branch. That tells other repositories about the cluster.
Started proxying for node2 Started proxying for node2
Started proxying for node3 Started proxying for node3
The cluster will now be accessible over ssh. To also let the cluster be
accessed over http, you would need to set up a [[tips/smart_http_server]].
Operations that affect multiple nodes of a cluster can often be sped up by Operations that affect multiple nodes of a cluster can often be sped up by
configuring annex.jobs in the gateway repository. configuring annex.jobs in the gateway repository.
In the example above, the nodes are all disk bound, so operating In the example above, the nodes are all disk bound, so operating

View file

@ -28,4 +28,14 @@ Here's how I set it up. --[[Joey]]
When users clone over http, and run git-annex, it will When users clone over http, and run git-annex, it will
automatically learn all about your repository and be able to download files automatically learn all about your repository and be able to download files
right out of it, also using http. right out of it, also using http.
----
The above is a simple way to set that up, but it's not necessarily the
*best* way. Both git and git-annex will be accessing the repository using
dumb http, which can be innefficient. And it doesn't allow write access.
For something smarter, you may want to also set up
[git smart http](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP),
and the git-annex equivilant, a [[smart_http_server]].

View file

@ -0,0 +1,39 @@
git-annex can access a remote using any web server,
as shown in the tip [[setup_a_public_repository_on_a_web_site]].
That's limited to basic read-only repository access though. Git
has [smart HTTP](https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP)
that can be used to allow pushes over http. And git-annex has an
equivilant, the [[git annex-p2phttp command|/git-annex-p2phttp]].
As well as allowing write access to authorized users over http,
`git-annex p2phttp` also allows accessing [[clusters]], and other proxied
remotes over http.
You will still need to run a web server to serve the git repository.
`git-annex p2phttp` only serves git-annex's own
[[API|design/p2p_protocol_over_http]], and it does it
on a different port (9417 by default).
You will need to arrange to run `git-annex p2phttp` in your repository as a
daemon or service. Note that it should not be run as root, but as whatever
user owns the repository. It has several options you can use to configure
it, including controlling who can access the repository.
So there are two web servers, and thus two different urls.
A remote will have `remote.name.url` set to the http url
that git will use, and also have `remote.name.annexUrl` set to the url
that git-annex will use to talk to `git-annex p2phttp`. That url
looks like this:
annex+http://example.com/git-annex/
The "annex+http" (or "annex+https") indicates that it's a git-annex API
url, which defaults to being on port 9417 unless a different port is set.
It would be annoying if every user who cloned your repository
had to set `remote.name.annexUrl` manually. So there's a way to automate it.
In the git config file of the repository, set `annex.url` to the "annex+http"
(or "annex+https") url. The first time it uses a http remote, git-annex
downloads its git config file, and sets `remote.name.annexUrl` to the value
of the remote's `annex.url`.

View file

@ -28,19 +28,60 @@ Planned schedule of work:
## work notes ## work notes
* Next step: Ready to begin implementing in servant. I have a file * An interrupted PUT to cluster that has a node that is a special remote
`servant.hs` in the httpproto branch that works through some of the over http leaves open the connection to the cluster, so the next request
bytestring streaming issues. opens another one.
* Perhaps: Support cgi program that proxies over to a webserver So does an interrupted PUT directly to the proxied ;
speaking the http protocol. special remote over http.
* When part of a file has been sent to a cluster via the http server,
the transfer interrupted, and another node is added to the cluster,
and the transfer of the file performed again, there is a failure
sending to the node that had an incomplete copy. It fails like this:
//home/joey/tmp/bench/c3/.git/annex/tmp/SHA256E-s1048576000--09d7e19983a65682138fa5944f135e4fc593330c2693c41d22cc7881443d6060: withBinaryFile: illegal operation
git-annex: transfer already in progress, or unable to take transfer lock
p2pstdio: 1 failed
When using ssh and not the http server, the node that had the incomplete
copy also doesn't get the file, altough no error is displayed.
* When proxying a PUT to a special remote, no verification of the received
content is done, it's just written to a file and that is sent to the
special remote. This violates a usual invariant that any data being
received into a repository gets verified in passing. Although on the
other hand, when sending data to a special remote normally, there is also
no verification.
## items deferred until later for p2p protocol over http
* `git-annex p2phttp` should support serving several repositories at the same
time (not as proxied remotes), so that eg, every git-annex repository
on a server can be served on the same port.
* Support proxying to git remotes that use annex+http urls.
* `git-annex p2phttp` could support systemd socket activation. This would
allow making a systemd unit that listens on port 80.
## completed items for July's work on p2p protocol over http ## completed items for July's work on p2p protocol over http
* HTTP P2P protocol design [[design/p2p_protocol_over_http]].
* addressed [[doc/todo/P2P_locking_connection_drop_safety]] * addressed [[doc/todo/P2P_locking_connection_drop_safety]]
* finalized HTTP P2P protocol draft 1, * implemented server and client for HTTP P2P protocol
[[design/p2p_protocol_over_http/draft1]]
* added git-annex p2phttp command to serve HTTP P2P protocol
* Make git-annex p2phttp support https.
* Allow using annex+http urls in remote.name.annexUrl
* Make http server support proxying.
* Make http server support serving a cluster.
## items deferred until later for [[design/passthrough_proxy]] ## items deferred until later for [[design/passthrough_proxy]]
@ -86,6 +127,14 @@ Planned schedule of work:
* Optimise proxy speed. See design for ideas. * Optimise proxy speed. See design for ideas.
* Speed: A proxy to a local git repository spawns git-annex-shell
to communicate with it. It would be more efficient to operate
directly on the Remote. Especially when transferring content to/from it.
But: When a cluster has several nodes that are local git repositories,
and is sending data to all of them, this would need an alternate
interface than `storeKey`, which supports streaming, of chunks
of a ByteString.
* Use `sendfile()` to avoid data copying overhead when * Use `sendfile()` to avoid data copying overhead when
`receiveBytes` is being fed right into `sendBytes`. `receiveBytes` is being fed right into `sendBytes`.
Library to use: Library to use:

View file

@ -173,6 +173,9 @@ Flag MagicMime
Flag Crypton Flag Crypton
Description: Use the crypton library rather than the no longer maintained cryptonite Description: Use the crypton library rather than the no longer maintained cryptonite
Flag Servant
Description: Use the servant library, enabling using annex+http urls and git-annex p2phttp
Flag Benchmark Flag Benchmark
Description: Enable benchmarking Description: Enable benchmarking
Default: True Default: True
@ -312,6 +315,21 @@ Executable git-annex
else else
Build-Depends: cryptonite (>= 0.23) Build-Depends: cryptonite (>= 0.23)
if flag(Servant)
Build-Depends:
servant,
servant-server,
servant-client,
servant-client-core,
warp (>= 3.2.8),
warp-tls (>= 3.2.2)
CPP-Options: -DWITH_SERVANT
Other-Modules:
Command.P2PHttp
P2P.Http
P2P.Http.Server
P2P.Http.State
if (os(windows)) if (os(windows))
Build-Depends: Build-Depends:
Win32 ((>= 2.6.1.0 && < 2.12.0.0) || >= 2.13.4.0), Win32 ((>= 2.6.1.0 && < 2.12.0.0) || >= 2.13.4.0),
@ -878,6 +896,9 @@ Executable git-annex
P2P.Address P2P.Address
P2P.Annex P2P.Annex
P2P.Auth P2P.Auth
P2P.Http.Types
P2P.Http.Client
P2P.Http.Url
P2P.IO P2P.IO
P2P.Protocol P2P.Protocol
P2P.Proxy P2P.Proxy

View file

@ -10,6 +10,7 @@ flags:
debuglocks: false debuglocks: false
benchmark: true benchmark: true
crypton: true crypton: true
servant: true
packages: packages:
- '.' - '.'
resolver: lts-22.9 resolver: lts-22.9