Merge branch 'proxy'

This commit is contained in:
Joey Hess 2024-06-27 15:43:45 -04:00
commit c3f88923c0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
78 changed files with 3145 additions and 448 deletions

View file

@ -74,6 +74,7 @@ import Types.CatFileHandles
import Types.RemoteConfig
import Types.TransferrerPool
import Types.VectorClock
import Types.Cluster
import Annex.VectorClock.Utility
import Annex.Debug.Utility
import qualified Database.Keys.Handle as Keys
@ -194,6 +195,7 @@ data AnnexState = AnnexState
, preferredcontentmap :: Maybe (FileMatcherMap Annex)
, requiredcontentmap :: Maybe (FileMatcherMap Annex)
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
, clusters :: Maybe Clusters
, forcetrust :: TrustMap
, trustmap :: Maybe TrustMap
, groupmap :: Maybe GroupMap
@ -213,6 +215,7 @@ data AnnexState = AnnexState
, urloptions :: Maybe UrlOptions
, insmudgecleanfilter :: Bool
, getvectorclock :: IO CandidateVectorClock
, proxyremote :: Maybe (Either ClusterUUID (Types.Remote.RemoteA Annex))
}
newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState
@ -247,6 +250,7 @@ newAnnexState c r = do
, preferredcontentmap = Nothing
, requiredcontentmap = Nothing
, remoteconfigmap = Nothing
, clusters = Nothing
, forcetrust = M.empty
, trustmap = Nothing
, groupmap = Nothing
@ -266,6 +270,7 @@ newAnnexState c r = do
, urloptions = Nothing
, insmudgecleanfilter = False
, getvectorclock = vc
, proxyremote = Nothing
}
{- Makes an Annex state object for the specified git repo.
@ -423,6 +428,7 @@ changeGitRepo r = do
{ repo = r'
, gitconfig = gitconfigadjuster $
extractGitConfig FromGitConfig r'
, gitremotes = Nothing
}
{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that

View file

@ -5,12 +5,9 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Action (
action,
verifiedAction,
startup,
quiesce,
stopCoProcesses,
) where
@ -27,11 +24,6 @@ import Annex.CheckIgnore
import Annex.TransferrerPool
import qualified Database.Keys
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Runs an action that may throw exceptions, catching and displaying them. -}
action :: Annex () -> Annex Bool
action a = tryNonAsync a >>= \case
@ -47,34 +39,6 @@ verifiedAction a = tryNonAsync a >>= \case
warning (UnquotedString (show e))
return (False, UnVerified)
{- Actions to perform each time ran. -}
startup :: Annex ()
startup = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif
{- Rn all cleanup actions, save all state, stop all long-running child
- processes.
-

167
Annex/Cluster.hs Normal file
View file

@ -0,0 +1,167 @@
{- clusters
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes, OverloadedStrings #-}
module Annex.Cluster where
import Annex.Common
import qualified Annex
import Types.Cluster
import Logs.Cluster
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Annex.Proxy
import Annex.UUID
import Logs.Location
import Logs.PreferredContent
import Types.Command
import Remote.List
import qualified Remote
import qualified Types.Remote as Remote
import qualified Data.Map as M
import qualified Data.Set as S
import System.Random
{- Proxy to a cluster. -}
proxyCluster
:: ClusterUUID
-> CommandPerform
-> ServerMode
-> ClientSide
-> (forall a. ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform))
-> CommandPerform
proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
getClientProtocolVersion (fromClusterUUID clusteruuid) clientside
withclientversion protoerrhandler
where
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
withclientversion (Just (clientmaxversion, othermsg)) = do
-- The protocol versions supported by the nodes are not
-- known at this point, and would be too expensive to
-- determine. Instead, pick the newest protocol version
-- that we and the client both speak. The proxy code
-- checks protocol versions when operating on multiple
-- nodes, and allows nodes to have different protocol
-- versions.
let protocolversion = min maxProtocolVersion clientmaxversion
sendClientProtocolVersion clientside othermsg protocolversion
(getclientbypass protocolversion) protoerrhandler
withclientversion Nothing = proxydone
getclientbypass protocolversion othermsg =
getClientBypass clientside protocolversion othermsg
(withclientbypass protocolversion) protoerrhandler
withclientbypass protocolversion (bypassuuids, othermsg) = do
selectnode <- clusterProxySelector clusteruuid protocolversion bypassuuids
concurrencyconfig <- getConcurrencyConfig
proxy proxydone proxymethods servermode clientside
(fromClusterUUID clusteruuid)
selectnode concurrencyconfig protocolversion
othermsg protoerrhandler
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Bypass -> Annex ProxySelector
clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters
myclusters <- annexClusters <$> Annex.getGitConfig
allremotes <- concat . Remote.byCost <$> remoteList
hereu <- getUUID
let bypass' = S.insert hereu bypass
let clusterremotes = filter (isnode bypass' allremotes nodeuuids myclusters) allremotes
fastDebug "Annex.Cluster" $ unwords
[ "cluster gateway at", fromUUID hereu
, "connecting to", show (map Remote.name clusterremotes)
, "bypass", show (S.toList bypass)
]
nodes <- mapM (proxySshRemoteSide protocolversion (Bypass bypass')) clusterremotes
return $ ProxySelector
{ proxyCHECKPRESENT = nodecontaining nodes
, proxyGET = nodecontaining nodes
-- The key is sent to multiple nodes at the same time,
-- skipping nodes where it's known/expected to already be
-- present to avoid needing to connect to those, and
-- skipping nodes where it's not preferred content.
, proxyPUT = \af k -> do
locs <- S.fromList <$> loggedLocations k
let l = filter (flip S.notMember locs . Remote.uuid . remote) nodes
l' <- filterM (\n -> isPreferredContent (Just (Remote.uuid (remote n))) mempty (Just k) af True) l
-- PUT to no nodes doesn't work, so fall
-- back to all nodes.
return $ nonempty [l', l] nodes
-- Remove the key from every node that contains it.
-- But, since it's possible the location log for some nodes
-- could be out of date, actually try to remove from every
-- node.
, proxyREMOVE = const (pure nodes)
-- Content is not locked on the cluster as a whole,
-- instead it can be locked on individual nodes that are
-- proxied to the client.
, proxyLOCKCONTENT = const (pure Nothing)
, proxyUNLOCKCONTENT = pure Nothing
}
where
-- Nodes of the cluster have remote.name.annex-cluster-node
-- containing its name.
--
-- Or, a node can be the cluster proxied by another gateway.
isnode bypass' rs nodeuuids myclusters r =
case remoteAnnexClusterNode (Remote.gitconfig r) of
Just names
| any (isclustername myclusters) names ->
flip S.member nodeuuids $
ClusterNodeUUID $ Remote.uuid r
| otherwise -> False
Nothing -> isclusterviagateway bypass' rs r
-- Is this remote the same cluster, proxied via another gateway?
--
-- Must avoid bypassed gateways to prevent cycles.
isclusterviagateway bypass' rs r =
case mkClusterUUID (Remote.uuid r) of
Just cu | cu == clusteruuid ->
case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid | proxyuuid `S.notMember` bypass' ->
not $ null $
filter isclustergateway $
filter (\p -> Remote.uuid p == proxyuuid) rs
_ -> False
_ -> False
isclustergateway r = any (== clusteruuid) $
remoteAnnexClusterGateway $ Remote.gitconfig r
isclustername myclusters name =
M.lookup name myclusters == Just clusteruuid
nodecontaining nodes k = do
locs <- S.fromList <$> loggedLocations k
case filter (flip S.member locs . Remote.uuid . remote) nodes of
[] -> return Nothing
(node:[]) -> return (Just node)
(node:rest) ->
-- The list of nodes is ordered by cost.
-- Use any of the ones with equally low
-- cost.
let lowestcost = Remote.cost (remote node)
samecost = node : takeWhile (\n -> Remote.cost (remote n) == lowestcost) rest
in do
n <- getStdRandom $
randomR (0, length samecost - 1)
return (Just (samecost !! n))
nonempty (l:ls) fallback
| null l = nonempty ls fallback
| otherwise = l
nonempty [] fallback = fallback

View file

@ -58,7 +58,7 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
getcopies fs = do
(untrusted, have) <- trustPartition UnTrusted locs
(numcopies, mincopies) <- getSafestNumMinCopies' afile key fs
return (length have, numcopies, mincopies, S.fromList untrusted)
return (numCopiesCount have, numcopies, mincopies, S.fromList untrusted)
{- Check that we have enough copies still to drop the content.
- When the remote being dropped from is untrusted, it was not

View file

@ -103,8 +103,8 @@ genDescription Nothing = do
Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir]
initialize :: Maybe String -> Maybe RepoVersion -> Annex ()
initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
initialize :: Annex () -> Maybe String -> Maybe RepoVersion -> Annex ()
initialize startupannex mdescription mversion = checkInitializeAllowed $ \initallowed -> do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone
@ -114,14 +114,14 @@ initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
ensureCommit $ Annex.Branch.create
prepUUID
initialize' mversion initallowed
initialize' startupannex mversion initallowed
initSharedClone sharedclone
u <- getUUID
when (u == NoUUID) $
giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report."
{- Avoid overwriting existing description with a default
- description. -}
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
@ -129,8 +129,8 @@ initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
initialize' :: Maybe RepoVersion -> InitializeAllowed -> Annex ()
initialize' mversion _initallowed = do
initialize' :: Annex () -> Maybe RepoVersion -> InitializeAllowed -> Annex ()
initialize' startupannex mversion _initallowed = do
checkLockSupport
checkFifoSupport
checkCrippledFileSystem
@ -162,6 +162,10 @@ initialize' mversion _initallowed = do
createInodeSentinalFile False
fixupUnusualReposAfterInit
-- This is usually run at Annex startup, but when git-annex was
-- not already initialized, it will not yet have run.
startupannex
uninitialize :: Annex ()
uninitialize = do
-- Remove hooks that are written when initializing.
@ -203,12 +207,12 @@ getInitializedVersion = do
-
- Checks repository version and handles upgrades too.
-}
ensureInitialized :: Annex [Remote] -> Annex ()
ensureInitialized remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
ensureInitialized :: Annex () -> Annex [Remote] -> Annex ()
ensureInitialized startupannex remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit = ifM autoInitializeAllowed
( do
tryNonAsync (initialize Nothing Nothing) >>= \case
tryNonAsync (initialize startupannex Nothing Nothing) >>= \case
Right () -> noop
Left e -> giveup $ show e ++ "\n" ++
"git-annex: automatic initialization failed due to above problems"
@ -256,15 +260,16 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
-
- Checks repository version and handles upgrades too.
-}
autoInitialize :: Annex [Remote] -> Annex ()
autoInitialize :: Annex () -> Annex [Remote] -> Annex ()
autoInitialize = autoInitialize' autoInitializeAllowed
autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex ()
autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
autoInitialize' :: Annex Bool -> Annex () -> Annex [Remote] -> Annex ()
autoInitialize' check startupannex remotelist =
getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit =
whenM (initializeAllowed <&&> check) $ do
initialize Nothing Nothing
initialize startupannex Nothing Nothing
autoEnableSpecialRemotes remotelist
{- Checks if a repository is initialized. Does not check version for upgrade. -}

View file

@ -1,6 +1,6 @@
{- git-annex numcopies configuration and checking
-
- Copyright 2014-2021 Joey Hess <id@joeyh.name>
- Copyright 2014-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -20,6 +20,8 @@ module Annex.NumCopies (
defaultNumCopies,
numCopiesCheck,
numCopiesCheck',
numCopiesCheck'',
numCopiesCount,
verifyEnoughCopiesToDrop,
verifiableCopies,
UnVerifiedCopy(..),
@ -30,6 +32,7 @@ import qualified Annex
import Types.NumCopies
import Logs.NumCopies
import Logs.Trust
import Logs.Cluster
import Annex.CheckAttr
import qualified Remote
import qualified Types.Remote as Remote
@ -39,8 +42,10 @@ import Annex.CatFile
import qualified Database.Keys
import Control.Exception
import qualified Control.Monad.Catch as M
import qualified Control.Monad.Catch as MC
import Data.Typeable
import qualified Data.Set as S
import qualified Data.Map as M
defaultNumCopies :: NumCopies
defaultNumCopies = configuredNumCopies 1
@ -197,12 +202,24 @@ numCopiesCheck file key vs = do
numCopiesCheck' :: RawFilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
numCopiesCheck' file vs have = do
needed <- fromNumCopies . fst <$> getFileNumMinCopies file
let nhave = length have
needed <- fst <$> getFileNumMinCopies file
let nhave = numCopiesCount have
explain (ActionItemTreeFile file) $ Just $ UnquotedString $
"has " ++ show nhave ++ " " ++ pluralCopies nhave ++
", and the configured annex.numcopies is " ++ show needed
return $ nhave `vs` needed
return $ numCopiesCheck'' have vs needed
numCopiesCheck'' :: [UUID] -> (Int -> Int -> v) -> NumCopies -> v
numCopiesCheck'' have vs needed =
let nhave = numCopiesCount have
in nhave `vs` fromNumCopies needed
{- When a key is logged as present in a node of the cluster,
- the cluster's UUID will also be in the list, but is not a
- distinct copy.
-}
numCopiesCount :: [UUID] -> Int
numCopiesCount = length . filter (not . isClusterUUID)
data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere
deriving (Ord, Eq)
@ -214,6 +231,7 @@ data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere
verifyEnoughCopiesToDrop
:: String -- message to print when there are no known locations
-> Key
-> Maybe UUID -- repo dropping from
-> Maybe ContentRemovalLock
-> NumCopies
-> MinCopies
@ -223,14 +241,14 @@ verifyEnoughCopiesToDrop
-> (SafeDropProof -> Annex a) -- action to perform the drop
-> Annex a -- action to perform when unable to drop
-> Annex a
verifyEnoughCopiesToDrop nolocmsg key removallock neednum needmin skip preverified tocheck dropaction nodropaction =
verifyEnoughCopiesToDrop nolocmsg key dropfrom removallock neednum needmin skip preverified tocheck dropaction nodropaction =
helper [] [] preverified (nub tocheck) []
where
helper bad missing have [] lockunsupported =
liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case
Right proof -> dropaction proof
Left stillhave -> do
notEnoughCopies key neednum needmin stillhave (skip++missing) bad nolocmsg lockunsupported
notEnoughCopies key dropfrom neednum needmin stillhave (skip++missing) bad nolocmsg lockunsupported
nodropaction
helper bad missing have (c:cs) lockunsupported
| isSafeDrop neednum needmin have removallock =
@ -239,12 +257,17 @@ verifyEnoughCopiesToDrop nolocmsg key removallock neednum needmin skip preverifi
Left stillhave -> helper bad missing stillhave (c:cs) lockunsupported
| otherwise = case c of
UnVerifiedHere -> lockContentShared key contverified
UnVerifiedRemote r -> checkremote r contverified $
let lockunsupported' = r : lockunsupported
in Remote.hasKey r key >>= \case
Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported'
Left _ -> helper (r:bad) missing have cs lockunsupported'
Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported'
UnVerifiedRemote r
-- Skip cluster uuids because locking is
-- not supported with them, instead will
-- lock individual nodes.
| isClusterUUID (Remote.uuid r) -> helper bad missing have cs lockunsupported
| otherwise -> checkremote r contverified $
let lockunsupported' = r : lockunsupported
in Remote.hasKey r key >>= \case
Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported'
Left _ -> helper (r:bad) missing have cs lockunsupported'
Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported'
where
contverified vc = helper bad missing (vc : have) cs lockunsupported
@ -264,11 +287,11 @@ verifyEnoughCopiesToDrop nolocmsg key removallock neednum needmin skip preverifi
-- of exceptions by using DropException.
let a = lockcontent key $ \v ->
cont v `catchNonAsync` (throw . DropException)
a `M.catches`
[ M.Handler (\ (e :: AsyncException) -> throwM e)
, M.Handler (\ (e :: SomeAsyncException) -> throwM e)
, M.Handler (\ (DropException e') -> throwM e')
, M.Handler (\ (_e :: SomeException) -> fallback)
a `MC.catches`
[ MC.Handler (\ (e :: AsyncException) -> throwM e)
, MC.Handler (\ (e :: SomeAsyncException) -> throwM e)
, MC.Handler (\ (DropException e') -> throwM e')
, MC.Handler (\ (_e :: SomeException) -> fallback)
]
Nothing -> fallback
@ -277,8 +300,8 @@ data DropException = DropException SomeException
instance Exception DropException
notEnoughCopies :: Key -> NumCopies -> MinCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex ()
notEnoughCopies key neednum needmin have skip bad nolocmsg lockunsupported = do
notEnoughCopies :: Key -> Maybe UUID -> NumCopies -> MinCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex ()
notEnoughCopies key dropfrom neednum needmin have skip bad nolocmsg lockunsupported = do
showNote "unsafe"
if length have < fromNumCopies neednum
then showLongNote $ UnquotedString $
@ -297,7 +320,29 @@ notEnoughCopies key neednum needmin have skip bad nolocmsg lockunsupported = do
++ Remote.listRemoteNames lockunsupported
Remote.showTriedRemotes bad
Remote.showLocations True key (map toUUID have++skip) nolocmsg
-- When dropping from a cluster, don't suggest making the nodes of
-- the cluster available
clusternodes <- case mkClusterUUID =<< dropfrom of
Nothing -> pure []
Just cu -> do
clusters <- getClusters
pure $ maybe [] (map fromClusterNodeUUID . S.toList) $
M.lookup cu (clusterUUIDs clusters)
let excludeset = S.fromList $ map toUUID have++skip++clusternodes
-- Don't suggest making a cluster available when dropping from its
-- node.
let exclude u
| u `S.member` excludeset = pure True
| otherwise = case (dropfrom, mkClusterUUID u) of
(Just dropfrom', Just cu) -> do
clusters <- getClusters
pure $ case M.lookup cu (clusterUUIDs clusters) of
Just nodes ->
ClusterNodeUUID dropfrom'
`S.member` nodes
Nothing -> False
_ -> pure False
Remote.showLocations True key exclude nolocmsg
pluralCopies :: Int -> String
pluralCopies 1 = "copy"
@ -312,17 +357,27 @@ pluralCopies _ = "copies"
- The return lists also exclude any repositories that are untrusted,
- since those should not be used for verification.
-
- When dropping from a cluster UUID, its nodes are excluded.
-
- Cluster UUIDs are also excluded since locking a key on a cluster
- is done by locking on individual nodes.
-
- The UnVerifiedCopy list is cost ordered.
- The VerifiedCopy list contains repositories that are trusted to
- contain the key.
-}
verifiableCopies :: Key -> [UUID] -> Annex ([UnVerifiedCopy], [VerifiedCopy])
verifiableCopies key exclude = do
locs <- Remote.keyLocations key
locs <- filter (not . isClusterUUID) <$> Remote.keyLocations key
(remotes, trusteduuids) <- Remote.remoteLocations (Remote.IncludeIgnored False) locs
=<< trustGet Trusted
clusternodes <- if any isClusterUUID exclude
then do
clusters <- getClusters
pure $ concatMap (getclusternodes clusters) exclude
else pure []
untrusteduuids <- trustGet UnTrusted
let exclude' = exclude ++ untrusteduuids
let exclude' = exclude ++ untrusteduuids ++ clusternodes
let remotes' = Remote.remotesWithoutUUID remotes (exclude' ++ trusteduuids)
let verified = map (mkVerifiedCopy TrustedCopy) $
filter (`notElem` exclude') trusteduuids
@ -331,3 +386,8 @@ verifiableCopies key exclude = do
then [UnVerifiedHere]
else []
return (herec ++ map UnVerifiedRemote remotes', verified)
where
getclusternodes clusters u = case mkClusterUUID u of
Just cu -> maybe [] (map fromClusterNodeUUID . S.toList) $
M.lookup cu (clusterUUIDs clusters)
Nothing -> []

26
Annex/Proxy.hs Normal file
View file

@ -0,0 +1,26 @@
{- proxying
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Proxy where
import Annex.Common
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
-- FIXME: Support special remotes.
proxySshRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxySshRemoteSide clientmaxversion bypass r = mkRemoteSide r $
openP2PShellConnection' r clientmaxversion bypass >>= \case
Just conn@(OpenConnection (remoterunst, remoteconn, _)) ->
return $ Just
( remoterunst
, remoteconn
, void $ liftIO $ closeP2PShellConnection conn
)
_ -> return Nothing

67
Annex/Startup.hs Normal file
View file

@ -0,0 +1,67 @@
{- git-annex startup
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Startup where
import Annex.Common
import qualified Annex
import Logs.Cluster
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Run when starting up the main git-annex program. -}
startup :: Annex ()
startup = do
startupSignals
gc <- Annex.getGitConfig
when (isinitialized gc)
startupAnnex
where
isinitialized gc = annexUUID gc /= NoUUID
&& isJust (annexVersion gc)
{- Run when starting up the main git-annex program when
- git-annex has already been initialized.
- Alternatively, run after initialization.
-}
startupAnnex :: Annex ()
startupAnnex = doQuietAction $
-- Logs.Location needs clusters to be loaded before it is used,
-- in order for a cluster to be treated as the location of keys
-- that are located in any of its nodes.
void loadClusters
startupSignals :: Annex ()
startupSignals = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif

View file

@ -6,7 +6,7 @@
- UUIDs of remotes are cached in git config, using keys named
- remote.<name>.annex-uuid
-
- Copyright 2010-2016 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -15,6 +15,7 @@
module Annex.UUID (
configkeyUUID,
configRepoUUID,
getUUID,
getRepoUUID,
getUncachedUUID,
@ -47,6 +48,9 @@ import Data.String
configkeyUUID :: ConfigKey
configkeyUUID = annexConfig "uuid"
configRepoUUID :: Git.Repo -> ConfigKey
configRepoUUID r = remoteAnnexConfig r "uuid"
{- Generates a random UUID, that does not include the MAC address. -}
genUUID :: IO UUID
genUUID = toUUID <$> U4.nextRandom
@ -82,7 +86,7 @@ getRepoUUID r = do
updatecache u = do
g <- gitRepo
when (g /= r) $ storeUUIDIn cachekey u
cachekey = remoteAnnexConfig r "uuid"
cachekey = configRepoUUID r
removeRepoUUID :: Annex ()
removeRepoUUID = do

View file

@ -19,6 +19,7 @@ import qualified Annex
import Annex.UUID
import Annex.AdjustedBranch
import Annex.Action
import Annex.Startup
import Types.StandardGroups
import Logs.PreferredContent
import qualified Annex.Branch
@ -85,7 +86,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
initRepo' desc mgroup = unlessM isInitialized $ do
initialize desc Nothing
initialize startupAnnex desc Nothing
u <- getUUID
maybe noop (defaultStandardGroup u) mgroup
{- Ensure branch gets committed right away so it is

View file

@ -19,7 +19,6 @@ import Utility.Env.Set
import Types.Distribution
import Types.Transfer
import Logs.Web
import Logs.Presence
import Logs.Location
import Annex.Content
import Annex.UUID

View file

@ -1,9 +1,18 @@
git-annex (10.20240532) UNRELEASED; urgency=medium
* git-annex remotes can now act as proxies that provide access to
their remotes. Configure this with remote.name.annex-proxy
and the git-annex update proxy command.
* Clusters are now supported. These are collections of nodes that can
be accessed as a single entity, accessed by one or more gateway
repositories.
* Added git-annex initcluster, updatecluster, and extendcluster commands.
* Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck
complaining about missing tree objects.
* Tab completion of options like --from now includes special remotes.
* Tab completion of options like --from now includes special remotes,
as well as proxied remotes and clusters.
* P2P protocol version 2.
* Fix Windows build with Win32 2.13.4+
Thanks, Oleg Tolmatcev
* When --debugfilter or annex.debugfilter is set, avoid propigating

View file

@ -23,6 +23,7 @@ import qualified Annex
import qualified Git
import qualified Git.AutoCorrect
import qualified Git.Config
import Annex.Startup
import Annex.Action
import Annex.Environment
import Command

View file

@ -149,7 +149,7 @@ commandAction start = do
showEndMessage startmsg False
return False
{- Waits for all worker threads to finish and merges their AnnexStates
{- Waits for all worker thrneads to finish and merges their AnnexStates
- back into the current Annex's state.
-}
finishCommandActions :: Annex ()

View file

@ -124,6 +124,10 @@ import qualified Command.Smudge
import qualified Command.FilterProcess
import qualified Command.Restage
import qualified Command.Undo
import qualified Command.InitCluster
import qualified Command.UpdateCluster
import qualified Command.ExtendCluster
import qualified Command.UpdateProxy
import qualified Command.Version
import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT
@ -247,6 +251,10 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.FilterProcess.cmd
, Command.Restage.cmd
, Command.Undo.cmd
, Command.InitCluster.cmd
, Command.UpdateCluster.cmd
, Command.ExtendCluster.cmd
, Command.UpdateProxy.cmd
, Command.Version.cmd
, Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT

View file

@ -1,6 +1,6 @@
{- git-annex-shell main program
-
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -8,6 +8,7 @@
module CmdLine.GitAnnexShell where
import Annex.Common
import qualified Annex
import qualified Git.Construct
import qualified Git.Config
import CmdLine
@ -19,6 +20,11 @@ import CmdLine.GitAnnexShell.Fields
import Remote.GCrypt (getGCryptUUID)
import P2P.Protocol (ServerMode(..))
import Git.Types
import qualified Types.Remote as R
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Remote
import qualified Command.ConfigList
import qualified Command.NotifyChanges
@ -30,6 +36,7 @@ import qualified Command.SendKey
import qualified Command.DropKey
import qualified Data.Map as M
import qualified Data.Set as S
cmdsMap :: M.Map ServerMode [Command]
cmdsMap = M.fromList $ map mk
@ -39,20 +46,22 @@ cmdsMap = M.fromList $ map mk
]
where
readonlycmds = map addAnnexOptions
[ Command.ConfigList.cmd
[ notProxyable Command.ConfigList.cmd
, gitAnnexShellCheck Command.NotifyChanges.cmd
-- p2pstdio checks the environment variables to
-- determine the security policy to use
-- determine the security policy to use, so is safe to
-- include in the readonly list even though it is not
-- always readonly
, gitAnnexShellCheck Command.P2PStdIO.cmd
, gitAnnexShellCheck Command.InAnnex.cmd
, gitAnnexShellCheck Command.SendKey.cmd
, notProxyable (gitAnnexShellCheck Command.InAnnex.cmd)
, notProxyable (gitAnnexShellCheck Command.SendKey.cmd)
]
appendcmds = readonlycmds ++ map addAnnexOptions
[ gitAnnexShellCheck Command.RecvKey.cmd
[ notProxyable (gitAnnexShellCheck Command.RecvKey.cmd)
]
allcmds = appendcmds ++ map addAnnexOptions
[ gitAnnexShellCheck Command.DropKey.cmd
, Command.GCryptSetup.cmd
[ notProxyable (gitAnnexShellCheck Command.DropKey.cmd)
, notProxyable Command.GCryptSetup.cmd
]
mk (s, l) = (s, map (adddirparam . noMessages) l)
@ -77,17 +86,23 @@ commonShellOptions =
where
checkUUID expected = getUUID >>= check
where
check u | u == toUUID expected = noop
check NoUUID = checkGCryptUUID expected
check u = unexpectedUUID expected u
check u
| u == toUUID expected = noop
| otherwise =
unlessM (checkProxy (toUUID expected) u) $
unexpectedUUID expected u
checkGCryptUUID expected = check =<< getGCryptUUID True =<< gitRepo
where
check (Just u) | u == toUUID expected = noop
check Nothing = unexpected expected "uninitialized repository"
check (Just u) = unexpectedUUID expected u
unexpectedUUID expected u = unexpected expected $ "UUID " ++ fromUUID u
unexpected expected s = giveup $
"expected repository UUID " ++ expected ++ " but found " ++ s
run :: [String] -> IO ()
run [] = failure
@ -104,6 +119,11 @@ run c@(cmd:_)
| cmd `elem` builtins = failure
| otherwise = external c
failure :: IO ()
failure = giveup $ "bad parameters\n\n" ++ usage h cmdsList
where
h = "git-annex-shell [-c] command [parameters ...] [option ...]"
builtins :: [String]
builtins = map cmdname cmdsList
@ -165,7 +185,60 @@ checkField (field, val)
| field == fieldName autoInit = fieldCheck autoInit val
| otherwise = False
failure :: IO ()
failure = giveup $ "bad parameters\n\n" ++ usage h cmdsList
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkProxy :: UUID -> UUID -> Annex Bool
checkProxy remoteuuid ouruuid = M.lookup ouruuid <$> getProxies >>= \case
Nothing -> return False
-- This repository has (or had) proxying enabled. So it's
-- ok to display error messages that talk about proxies.
Just proxies ->
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
h = "git-annex-shell [-c] command [parameters ...] [option ...]"
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . byCost <$> remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = uuid r == remoteuuid
let samename r p = name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Right r) }
return True
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch making git-annex-shell unexpectedly
-- proxy for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (R.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (R.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . R.gitconfig) $
filter (\p -> R.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Left cu) }
return True
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> giveup $ "not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return False

View file

@ -1,6 +1,6 @@
{- git-annex-shell checks
-
- Copyright 2012 Joey Hess <id@joeyh.name>
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -82,3 +82,12 @@ gitAnnexShellCheck = addCheck GitAnnexShellOk okforshell . dontCheck repoExists
where
okforshell = unlessM (isInitialized <||> isJust . gcryptId <$> Annex.getGitConfig) $
giveup "Not a git-annex or gcrypt repository."
{- Used for Commands that don't support proxying. -}
notProxyable :: Command -> Command
notProxyable c = addCheck GitAnnexShellNotProxyable checkok c
where
checkok = Annex.getState Annex.proxyremote >>= \case
Nothing -> return ()
Just _ -> giveup $ "Cannot proxy " ++ cmdname c ++ " command."

View file

@ -30,6 +30,7 @@ import qualified Logs.Remote
import qualified Remote.External
import Remote.Helper.Encryptable (parseEncryptionMethod)
import Annex.Transfer
import Annex.Startup
import Backend.GitRemoteAnnex
import Config
import Types.Key
@ -1173,7 +1174,7 @@ cleanupInitialization sab alternatejournaldir = void $ tryNonAsync $ do
inRepo $ Git.Branch.delete Annex.Branch.fullname
ifM (Annex.Branch.hasSibling <&&> nonbuggygitversion)
( do
autoInitialize' (pure True) remoteList
autoInitialize' (pure True) startupAnnex remoteList
differences <- allDifferences <$> recordedDifferences
when (differences /= mempty) $
deletebundleobjects

View file

@ -23,6 +23,7 @@ import CmdLine.Batch as ReExported
import Options.Applicative as ReExported hiding (command)
import qualified Git
import Annex.Init
import Annex.Startup
import Utility.Daemon
import Types.Transfer
import Types.ActionItem as ReExported
@ -125,7 +126,7 @@ commonChecks :: [CommandCheck]
commonChecks = [repoExists]
repoExists :: CommandCheck
repoExists = CommandCheck RepoExists (ensureInitialized remoteList)
repoExists = CommandCheck RepoExists (ensureInitialized startupAnnex remoteList)
notBareRepo :: Command -> Command
notBareRepo = addCheck CheckNotBareRepo checkNotBareRepo

View file

@ -17,6 +17,7 @@ import qualified BuildInfo
import Utility.HumanTime
import Assistant.Install
import Remote.List
import Annex.Startup
import Control.Concurrent.Async
@ -63,7 +64,7 @@ start o
stop
| otherwise = do
liftIO ensureInstalled
ensureInitialized remoteList
ensureInitialized startupAnnex remoteList
Command.Watch.start True (daemonOptions o) (startDelayOption o)
startNoRepo :: AssistantOptions -> IO ()

View file

@ -16,6 +16,7 @@ import Git.Types
import Remote.GCrypt (coreGCryptId)
import qualified CmdLine.GitAnnexShell.Fields as Fields
import CmdLine.GitAnnexShell.Checks
import Annex.Startup
cmd :: Command
cmd = noCommit $ dontCheck repoExists $
@ -47,7 +48,7 @@ findOrGenUUID = do
else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit))
( do
liftIO checkNotReadOnly
initialize Nothing Nothing
initialize startupAnnex Nothing Nothing
getUUID
, return NoUUID
)

View file

@ -205,7 +205,7 @@ doDrop pcc dropfrom contentlock key afile numcopies mincopies skip preverified c
ifM (Annex.getRead Annex.force)
( dropaction Nothing
, ifM (checkRequiredContent pcc dropfrom key afile)
( verifyEnoughCopiesToDrop nolocmsg key
( verifyEnoughCopiesToDrop nolocmsg key (Just dropfrom)
contentlock numcopies mincopies
skip preverified check
(dropaction . Just)
@ -253,7 +253,7 @@ checkDropAuto automode mremote afile key a =
uuid <- getUUID
let remoteuuid = fromMaybe uuid $ Remote.uuid <$> mremote
locs' <- trustExclude UnTrusted $ filter (/= remoteuuid) locs
if length locs' >= fromNumCopies numcopies
if numCopiesCheck'' locs' (>=) numcopies
then a numcopies mincopies
else stop
| otherwise = a numcopies mincopies

58
Command/ExtendCluster.hs Normal file
View file

@ -0,0 +1,58 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.ExtendCluster where
import Command
import qualified Annex
import Types.Cluster
import Config
import Types.GitConfig
import qualified Remote
import qualified Command.UpdateCluster
import qualified Data.Map as M
cmd :: Command
cmd = command "extendcluster" SectionSetup "add an gateway to a cluster"
(paramPair paramRemote paramName) (withParams seek)
seek :: CmdParams -> CommandSeek
seek (remotename:clustername:[]) = Remote.byName (Just clusterremotename) >>= \case
Just clusterremote -> Remote.byName (Just remotename) >>= \case
Just gatewayremote ->
case mkClusterUUID (Remote.uuid clusterremote) of
Just cu -> do
commandAction $ start cu clustername gatewayremote
Command.UpdateCluster.seek []
Nothing -> giveup $ clusterremotename
++ " is not a cluster remote."
Nothing -> giveup $ "No remote named " ++ remotename ++ " exists."
Nothing -> giveup $ "Expected to find a cluster remote named "
++ clusterremotename
++ " that is accessed via " ++ remotename
++ ", but there is no such remote. Perhaps you need to"
++ "git fetch from " ++ remotename
++ ", or git-annex updatecluster needs to be run there?"
where
clusterremotename = remotename ++ "-" ++ clustername
seek _ = giveup "Expected two parameters, gateway and clustername."
start :: ClusterUUID -> String -> Remote -> CommandStart
start cu clustername gatewayremote = starting "extendcluster" ai si $ do
myclusters <- annexClusters <$> Annex.getGitConfig
let setcus f = setConfig f (fromUUID (fromClusterUUID cu))
unless (M.member clustername myclusters) $ do
setcus $ annexConfig ("cluster." <> encodeBS clustername)
setcus $ remoteAnnexConfig gatewayremote $
remoteGitConfigKey ClusterGatewayField
next $ return True
where
ai = ActionItemOther (Just (UnquotedString clustername))
si = SeekInput [clustername]

View file

@ -573,7 +573,7 @@ checkKeyNumCopies key afile numcopies = do
locs <- loggedLocations key
(untrustedlocations, otherlocations) <- trustPartition UnTrusted locs
(deadlocations, safelocations) <- trustPartition DeadTrusted otherlocations
let present = length safelocations
let present = numCopiesCount safelocations
if present < fromNumCopies numcopies
then ifM (checkDead key)
( do

View file

@ -108,7 +108,8 @@ getKey' key afile = dispatch
Remote.showTriedRemotes remotes
showlocs (map Remote.uuid remotes)
return False
showlocs exclude = Remote.showLocations False key exclude
showlocs exclude = Remote.showLocations False key
(\u -> pure (u `elem` exclude))
"No other repository is known to contain the file."
-- This check is to avoid an ugly message if a remote is a
-- drive that is not mounted.

View file

@ -319,7 +319,7 @@ verifyExisting key destfile (yes, no) = do
(needcopies, mincopies) <- getFileNumMinCopies destfile
(tocheck, preverified) <- verifiableCopies key []
verifyEnoughCopiesToDrop [] key Nothing needcopies mincopies [] preverified tocheck
verifyEnoughCopiesToDrop [] key Nothing Nothing needcopies mincopies [] preverified tocheck
(const yes) no
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> [String] -> CommandSeek

View file

@ -12,6 +12,7 @@ module Command.Init where
import Command
import Annex.Init
import Annex.Version
import Annex.Startup
import Types.RepoVersion
import qualified Annex.SpecialRemote
@ -77,7 +78,7 @@ perform os = do
Just v | v /= wantversion ->
giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version."
_ -> noop
initialize
initialize startupAnnex
(if null (initDesc os) then Nothing else Just (initDesc os))
(initVersion os)
unless (noAutoEnable os)

50
Command/InitCluster.hs Normal file
View file

@ -0,0 +1,50 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.InitCluster where
import Command
import qualified Annex
import Types.Cluster
import Logs.UUID
import Config
import Annex.UUID
import Git.Types
import Git.Remote (isLegalName)
import qualified Data.Map as M
cmd :: Command
cmd = command "initcluster" SectionSetup "initialize a new cluster"
(paramPair paramName paramDesc) (withParams seek)
seek :: CmdParams -> CommandSeek
seek (clustername:desc:[]) = commandAction $
start clustername (toUUIDDesc desc)
seek (clustername:[]) = commandAction $
start clustername $ toUUIDDesc ("cluster " ++ clustername)
seek _ = giveup "Expected two parameters, name and description."
start :: RemoteName -> UUIDDesc -> CommandStart
start clustername desc = starting "initcluster" ai si $ do
unless (isLegalName clustername) $
giveup "That cluster name is not a valid git remote name."
myclusters <- annexClusters <$> Annex.getGitConfig
unless (M.member clustername myclusters) $ do
cu <- fromMaybe (giveup "unable to generate a cluster UUID")
<$> genClusterUUID <$> liftIO genUUID
setConfig (annexConfig ("cluster." <> encodeBS clustername))
(fromUUID (fromClusterUUID cu))
describeUUID (fromClusterUUID cu) desc
next $ return True
where
ai = ActionItemOther (Just (UnquotedString clustername))
si = SeekInput [clustername]

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -16,11 +16,11 @@ import Annex.Content
import qualified Remote
import Annex.UUID
import Annex.Transfer
import Logs.Presence
import Logs.Trust
import Logs.File
import Logs.Location
import Annex.NumCopies
import Types.Cluster
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
@ -194,7 +194,7 @@ toPerform' mcontentlock dest removewhen key afile fastcheck isthere = do
DropCheckNumCopies -> do
(numcopies, mincopies) <- getSafestNumMinCopies afile key
(tocheck, verified) <- verifiableCopies key [srcuuid]
verifyEnoughCopiesToDrop "" key (Just contentlock)
verifyEnoughCopiesToDrop "" key (Just srcuuid) (Just contentlock)
numcopies mincopies [srcuuid] verified
(UnVerifiedRemote dest : tocheck)
(drophere setpresentremote contentlock . showproof)
@ -300,7 +300,7 @@ fromDrop src destuuid deststartedwithcopy key afile adjusttocheck =
DropCheckNumCopies -> do
(numcopies, mincopies) <- getSafestNumMinCopies afile key
(tocheck, verified) <- verifiableCopies key [Remote.uuid src]
verifyEnoughCopiesToDrop "" key Nothing numcopies mincopies [Remote.uuid src] verified
verifyEnoughCopiesToDrop "" key (Just (Remote.uuid src)) Nothing numcopies mincopies [Remote.uuid src] verified
(adjusttocheck tocheck) (dropremote . showproof) faileddropremote
DropWorse -> faileddropremote
where
@ -503,7 +503,8 @@ fromToPerform src dest removewhen key afile = do
- On the other hand, when the destination repository did not start
- with a copy of a file, it can be dropped from the source without
- making numcopies worse, so the move is allowed even if numcopies
- is not met.
- is not met. (However, when the source is a cluster, dropping from it
- drops from all nodes, and so numcopies must be checked.)
-
- Similarly, a file can move from an untrusted repository to another
- untrusted repository, even if that is the only copy of the file.
@ -520,7 +521,7 @@ fromToPerform src dest removewhen key afile = do
willDropMakeItWorse :: UUID -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> Annex DropCheck
willDropMakeItWorse srcuuid destuuid (DestStartedWithCopy deststartedwithcopy _) key afile =
ifM (Command.Drop.checkRequiredContent (Command.Drop.PreferredContentChecked False) srcuuid key afile)
( if deststartedwithcopy
( if deststartedwithcopy || isClusterUUID srcuuid
then unlessforced DropCheckNumCopies
else ifM checktrustlevel
( return DropAllowed

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2018 Joey Hess <id@joeyh.name>
- Copyright 2018-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -10,9 +10,16 @@ module Command.P2PStdIO where
import Command
import P2P.IO
import P2P.Annex
import P2P.Proxy
import qualified P2P.Protocol as P2P
import qualified Annex
import Annex.Proxy
import Annex.UUID
import qualified CmdLine.GitAnnexShell.Checks as Checks
import Logs.Location
import Logs.Cluster
import Annex.Cluster
import qualified Remote
import System.IO.Error
@ -34,16 +41,71 @@ start theiruuid = startingCustomOutput (ActionItemOther Nothing) $ do
(True, _) -> P2P.ServeReadOnly
(False, True) -> P2P.ServeAppendOnly
(False, False) -> P2P.ServeReadWrite
Annex.getState Annex.proxyremote >>= \case
Nothing ->
performLocal theiruuid servermode
Just (Right r) ->
performProxy theiruuid servermode r
Just (Left clusteruuid) ->
performProxyCluster theiruuid clusteruuid servermode
performLocal :: UUID -> P2P.ServerMode -> CommandPerform
performLocal theiruuid servermode = do
myuuid <- getUUID
let conn = stdioP2PConnection Nothing
let server = do
P2P.net $ P2P.sendMessage (P2P.AUTH_SUCCESS myuuid)
P2P.serveAuthed servermode myuuid
runst <- liftIO $ mkRunState $ Serving theiruuid Nothing
runFullProto runst conn server >>= \case
Right () -> done
-- Avoid displaying an error when the client hung up on us.
Left (ProtoFailureIOError e) | isEOFError e -> done
Left e -> giveup (describeProtoFailure e)
p2pErrHandler (const p2pDone) (runFullProto runst conn server)
performProxy :: UUID -> P2P.ServerMode -> Remote -> CommandPerform
performProxy clientuuid servermode r = do
clientside <- proxyClientSide clientuuid
getClientProtocolVersion (Remote.uuid r) clientside
(withclientversion clientside)
p2pErrHandler
where
done = next $ return True
withclientversion clientside (Just (clientmaxversion, othermsg)) = do
remoteside <- proxySshRemoteSide clientmaxversion mempty r
protocolversion <- either (const (min P2P.maxProtocolVersion clientmaxversion)) id
<$> runRemoteSide remoteside
(P2P.net P2P.getProtocolVersion)
let closer = do
closeRemoteSide remoteside
p2pDone
concurrencyconfig <- noConcurrencyConfig
let runproxy othermsg' = proxy closer proxymethods
servermode clientside
(Remote.uuid r)
(singleProxySelector remoteside)
concurrencyconfig
protocolversion othermsg' p2pErrHandler
sendClientProtocolVersion clientside othermsg protocolversion
runproxy p2pErrHandler
withclientversion _ Nothing = p2pDone
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
performProxyCluster :: UUID -> ClusterUUID -> P2P.ServerMode -> CommandPerform
performProxyCluster clientuuid clusteruuid servermode = do
clientside <- proxyClientSide clientuuid
proxyCluster clusteruuid p2pDone servermode clientside p2pErrHandler
proxyClientSide :: UUID -> Annex ClientSide
proxyClientSide clientuuid = do
clientrunst <- liftIO (mkRunState $ Serving clientuuid Nothing)
return $ ClientSide clientrunst (stdioP2PConnection Nothing)
p2pErrHandler :: (a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform
p2pErrHandler cont a = a >>= \case
-- Avoid displaying an error when the client hung up on us.
Left (ProtoFailureIOError e) | isEOFError e -> p2pDone
Left e -> giveup (describeProtoFailure e)
Right v -> cont v
p2pDone :: CommandPerform
p2pDone = next $ return True

View file

@ -10,6 +10,7 @@ module Command.Reinit where
import Command
import Annex.Init
import Annex.UUID
import Annex.Startup
import qualified Remote
import qualified Annex.SpecialRemote
@ -36,6 +37,6 @@ perform s = do
then return $ toUUID s
else Remote.nameToUUID s
storeUUID u
checkInitializeAllowed $ initialize' Nothing
checkInitializeAllowed $ initialize' startupAnnex Nothing
Annex.SpecialRemote.autoEnable
next $ return True

84
Command/UpdateCluster.hs Normal file
View file

@ -0,0 +1,84 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.UpdateCluster where
import Command
import qualified Annex
import Types.Cluster
import Logs.Cluster
import qualified Remote as R
import qualified Types.Remote as R
import qualified Command.UpdateProxy
import Utility.SafeOutput
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noMessages $ command "updatecluster" SectionSetup
"update records of cluster nodes"
paramNothing (withParams seek)
seek :: CmdParams -> CommandSeek
seek = withNothing $ do
commandAction start
commandAction Command.UpdateProxy.start
start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let getnode r = do
clusternames <- remoteAnnexClusterNode (R.gitconfig r)
return $ M.fromList $ zip clusternames (repeat (S.singleton r))
let myclusternodes = M.unionsWith S.union (mapMaybe getnode rs)
myclusters <- annexClusters <$> Annex.getGitConfig
recordedclusters <- getClusters
descs <- R.uuidDescriptions
-- Update the cluster log to list the currently configured nodes
-- of each configured cluster.
forM_ (M.toList myclusters) $ \(clustername, cu) -> do
let mynodesremotes = fromMaybe mempty $
M.lookup clustername myclusternodes
let mynodes = S.map (ClusterNodeUUID . R.uuid) mynodesremotes
let recordednodes = fromMaybe mempty $ M.lookup cu $
clusterUUIDs recordedclusters
proxiednodes <- findProxiedClusterNodes recordednodes
let allnodes = S.union mynodes proxiednodes
if recordednodes == allnodes
then liftIO $ putStrLn $ safeOutput $
"No cluster node changes for cluster: " ++ clustername
else do
describechanges descs clustername recordednodes allnodes mynodesremotes
recordCluster cu allnodes
next $ return True
where
describechanges descs clustername oldnodes allnodes mynodesremotes = do
forM_ (S.toList mynodesremotes) $ \r ->
unless (S.member (ClusterNodeUUID (R.uuid r)) oldnodes) $
liftIO $ putStrLn $ safeOutput $
"Added node " ++ R.name r ++ " to cluster: " ++ clustername
forM_ (S.toList oldnodes) $ \n ->
unless (S.member n allnodes) $ do
let desc = maybe (fromUUID (fromClusterNodeUUID n)) fromUUIDDesc $
M.lookup (fromClusterNodeUUID n) descs
liftIO $ putStrLn $ safeOutput $
"Removed node " ++ desc ++ " from cluster: " ++ clustername
-- Finds nodes that are proxied by other cluster gateways.
findProxiedClusterNodes :: S.Set ClusterNodeUUID -> Annex (S.Set ClusterNodeUUID)
findProxiedClusterNodes recordednodes =
(S.fromList . map asclusternode . filter isproxynode) <$> R.remoteList
where
isproxynode r =
asclusternode r `S.member` recordednodes
&& isJust (remoteAnnexProxiedBy (R.gitconfig r))
asclusternode = ClusterNodeUUID . R.uuid

96
Command/UpdateProxy.hs Normal file
View file

@ -0,0 +1,96 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.UpdateProxy where
import Command
import qualified Annex
import Logs.Proxy
import Logs.Cluster
import Annex.UUID
import qualified Remote as R
import qualified Types.Remote as R
import Utility.SafeOutput
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noMessages $ command "updateproxy" SectionSetup
"update records with proxy configuration"
paramNothing (withParams seek)
seek :: CmdParams -> CommandSeek
seek = withNothing (commandAction start)
start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let remoteproxies = S.fromList $ map mkproxy $
filter (isproxy . R.gitconfig) rs
clusterproxies <- getClusterProxies remoteproxies
let proxies = S.union remoteproxies clusterproxies
u <- getUUID
oldproxies <- fromMaybe mempty . M.lookup u <$> getProxies
if oldproxies == proxies
then liftIO $ putStrLn "No proxy changes to record."
else do
describechanges oldproxies proxies
recordProxies proxies
next $ return True
where
describechanges oldproxies proxies =
forM_ (S.toList $ S.union oldproxies proxies) $ \p ->
case (S.member p oldproxies, S.member p proxies) of
(False, True) -> liftIO $
putStrLn $ safeOutput $
"Started proxying for " ++ proxyRemoteName p
(True, False) -> liftIO $
putStrLn $ safeOutput $
"Stopped proxying for " ++ proxyRemoteName p
_ -> noop
isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c))
mkproxy r = Proxy (R.uuid r) (R.name r)
-- Automatically proxy nodes of any cluster this repository is configured
-- to serve as a gateway for. Also proxy other cluster nodes that are
-- themselves proxied via other remotes.
getClusterProxies :: S.Set Proxy -> Annex (S.Set Proxy)
getClusterProxies remoteproxies = do
myclusters <- (map mkclusterproxy . M.toList . annexClusters)
<$> Annex.getGitConfig
remoteproxiednodes <- findRemoteProxiedClusterNodes
let myproxieduuids = S.map proxyRemoteUUID remoteproxies
<> S.fromList (map proxyRemoteUUID myclusters)
-- filter out nodes we proxy for from the remote proxied nodes
-- to avoid cycles
let remoteproxiednodes' = filter
(\n -> proxyRemoteUUID n `S.notMember` myproxieduuids)
remoteproxiednodes
return (S.fromList (myclusters ++ remoteproxiednodes'))
where
mkclusterproxy (remotename, cu) =
Proxy (fromClusterUUID cu) remotename
findRemoteProxiedClusterNodes :: Annex [Proxy]
findRemoteProxiedClusterNodes = do
myclusters <- (S.fromList . M.elems . annexClusters)
<$> Annex.getGitConfig
clusternodes <- clusterNodeUUIDs <$> getClusters
let isproxiedclusternode r
| isJust (remoteAnnexProxiedBy (R.gitconfig r)) =
case M.lookup (ClusterNodeUUID (R.uuid r)) clusternodes of
Nothing -> False
Just s -> not $ S.null $
S.intersection s myclusters
| otherwise = False
(map asproxy . filter isproxiedclusternode)
<$> R.remoteList
where
asproxy r = Proxy (R.uuid r) (R.name r)

View file

@ -11,6 +11,7 @@ import Command
import Upgrade
import Annex.Version
import Annex.Init
import Annex.Startup
cmd :: Command
cmd = dontCheck
@ -46,6 +47,6 @@ start (UpgradeOptions { autoOnly = True }) =
start _ =
starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do
whenM (isNothing <$> getVersion) $ do
initialize Nothing Nothing
initialize startupAnnex Nothing Nothing
r <- upgrade False latestVersion
next $ return r

View file

@ -15,6 +15,7 @@ import Logs.Trust
import Logs.Web
import Remote.Web (getWebUrls)
import Annex.UUID
import Annex.NumCopies
import qualified Utility.Format
import qualified Command.Find
@ -86,7 +87,7 @@ perform o remotemap key ai = do
(untrustedlocations, safelocations) <- trustPartition UnTrusted locations
case formatOption o of
Nothing -> do
let num = length safelocations
let num = numCopiesCount safelocations
showNote $ UnquotedString $ show num ++ " " ++ copiesplural num
pp <- ppwhereis "whereis" safelocations urls
unless (null safelocations) $

View file

@ -184,12 +184,6 @@ commit commitmode allowempty message branch parentrefs repo =
update' branch sha repo
return $ Just sha
Nothing -> return Nothing
where
cancommit tree
| allowempty = return True
| otherwise = case parentrefs of
[p] -> maybe False (tree /=) <$> Git.Ref.tree p repo
_ -> return True
{- Same as commit but without updating any branch. -}
commitSha :: CommitMode -> Bool -> String -> [Ref] -> Repo -> IO (Maybe Sha)

View file

@ -65,9 +65,13 @@ makeLegalName s = case filter legal $ replace "/" "_" s of
{- Only alphanumerics, and a few common bits of punctuation common
- in hostnames. -}
legal '_' = True
legal '-' = True
legal '.' = True
legal c = isAlphaNum c
isLegalName :: String -> Bool
isLegalName s = s == makeLegalName s
data RemoteLocation = RemoteUrl String | RemotePath FilePath
deriving (Eq, Show)

View file

@ -84,6 +84,9 @@ instance Default ConfigValue where
fromConfigKey :: ConfigKey -> String
fromConfigKey (ConfigKey s) = decodeBS s
fromConfigKey' :: ConfigKey -> S.ByteString
fromConfigKey' (ConfigKey s) = s
instance Show ConfigKey where
show = fromConfigKey

View file

@ -408,7 +408,7 @@ limitCopies want = case splitc ':' want of
go' n good notpresent key = do
us <- filter (`S.notMember` notpresent)
<$> (filterM good =<< Remote.keyLocations key)
return $ length us >= n
return $ numCopiesCount us >= n
checktrust checker u = checker <$> lookupTrust u
checkgroup g u = S.member g <$> lookupGroups u
parsetrustspec s
@ -442,7 +442,8 @@ limitLackingCopies desc approx want = case readish want of
MatchingUserInfo {} -> approxNumCopies
us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ fromNumCopies numcopies - length us >= needed
let vs nhave numcopies' = numcopies' - nhave >= needed
return $ numCopiesCheck'' us vs numcopies
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
{- Match keys that are unused.

View file

@ -98,6 +98,8 @@ topLevelOldUUIDBasedLogs =
topLevelNewUUIDBasedLogs :: [RawFilePath]
topLevelNewUUIDBasedLogs =
[ exportLog
, proxyLog
, clusterLog
]
{- Other top-level logs. -}
@ -154,6 +156,12 @@ multicastLog = "multicast.log"
exportLog :: RawFilePath
exportLog = "export.log"
proxyLog :: RawFilePath
proxyLog = "proxy.log"
clusterLog :: RawFilePath
clusterLog = "cluster.log"
{- This is not a log file, it's where exported treeishes get grafted into
- the git-annex branch. -}
exportTreeGraftPoint :: RawFilePath

41
Logs/Cluster.hs Normal file
View file

@ -0,0 +1,41 @@
{- git-annex cluster log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, TupleSections #-}
module Logs.Cluster (
module Types.Cluster,
getClusters,
loadClusters,
recordCluster,
) where
import qualified Annex
import Annex.Common
import Types.Cluster
import Logs.Cluster.Basic
import Logs.Trust
import qualified Data.Map as M
import qualified Data.Set as S
getClusters :: Annex Clusters
getClusters = maybe loadClusters return =<< Annex.getState Annex.clusters
{- Loads the clusters and caches it for later.
-
- This takes care of removing dead nodes from clusters,
- to avoid inserting the cluster uuid into the location
- log when only dead nodes contain the content of a key.
-}
loadClusters :: Annex Clusters
loadClusters = do
dead <- (S.fromList . map ClusterNodeUUID)
<$> trustGet DeadTrusted
clusters <- getClustersWith (M.map (`S.difference` dead))
Annex.changeState $ \s -> s { Annex.clusters = Just clusters }
return clusters

91
Logs/Cluster/Basic.hs Normal file
View file

@ -0,0 +1,91 @@
{- git-annex cluster log, basics
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, TupleSections #-}
module Logs.Cluster.Basic (
module Types.Cluster,
getClustersWith,
recordCluster,
) where
import qualified Annex
import Annex.Common
import qualified Annex.Branch
import Types.Cluster
import Logs
import Logs.UUIDBased
import Logs.MapLog
import qualified Data.Set as S
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.ByteString.Lazy as L
{- Gets the clusters. Note that this includes any dead nodes,
- unless a function is provided to remove them.
-}
getClustersWith
:: (M.Map ClusterUUID (S.Set ClusterNodeUUID)
-> M.Map ClusterUUID (S.Set ClusterNodeUUID))
-> Annex Clusters
getClustersWith removedeadnodes = do
m <- removedeadnodes
. convclusteruuids
. M.map value
. fromMapLog
. parseClusterLog
<$> Annex.Branch.get clusterLog
return $ Clusters
{ clusterUUIDs = m
, clusterNodeUUIDs = M.foldlWithKey inverter mempty m
}
where
convclusteruuids :: M.Map UUID (S.Set ClusterNodeUUID) -> M.Map ClusterUUID (S.Set ClusterNodeUUID)
convclusteruuids = M.fromList
. mapMaybe (\(mk, v) -> (, v) <$> mk)
. M.toList . M.mapKeys mkClusterUUID
inverter m k v = M.unionWith (<>) m
(M.fromList (map (, S.singleton k) (S.toList v)))
recordCluster :: ClusterUUID -> S.Set ClusterNodeUUID -> Annex ()
recordCluster clusteruuid nodeuuids = do
-- If a private UUID has been configured as a cluster node,
-- avoid leaking it into the git-annex log.
privateuuids <- annexPrivateRepos <$> Annex.getGitConfig
let nodeuuids' = S.filter
(\(ClusterNodeUUID n) -> S.notMember n privateuuids)
nodeuuids
c <- currentVectorClock
Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $
(buildLogNew buildClusterNodeList)
. changeLog c (fromClusterUUID clusteruuid) nodeuuids'
. parseClusterLog
buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder
buildClusterNodeList = assemble
. map (buildUUID . fromClusterNodeUUID)
. S.toList
where
assemble [] = mempty
assemble (x:[]) = x
assemble (x:y:l) = x <> " " <> assemble (y:l)
parseClusterLog :: L.ByteString -> Log (S.Set ClusterNodeUUID)
parseClusterLog = parseLogNew parseClusterNodeList
parseClusterNodeList :: A.Parser (S.Set ClusterNodeUUID)
parseClusterNodeList = S.fromList <$> many parseword
where
parseword = parsenode
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
parsenode = ClusterNodeUUID
<$> (toUUID <$> A8.takeWhile1 (/= ' '))

View file

@ -22,9 +22,6 @@ module Logs.Export (
getExportExcluded,
) where
import qualified Data.Map as M
import qualified Data.ByteString as B
import Annex.Common
import qualified Annex.Branch
import qualified Git
@ -38,6 +35,8 @@ import qualified Git.LsTree
import qualified Git.Tree
import Annex.UUID
import qualified Data.Map as M
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Data.Either
import Data.Char

View file

@ -8,7 +8,7 @@
- Repositories record their UUID and the date when they --get or --drop
- a value.
-
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -41,6 +41,7 @@ import Annex.Common
import qualified Annex.Branch
import Logs
import Logs.Presence
import Types.Cluster
import Annex.UUID
import Annex.CatFile
import Annex.VectorClock
@ -49,6 +50,8 @@ import qualified Annex
import Data.Time.Clock
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
{- Log a change in the presence of a key's value in current repository. -}
logStatus :: Key -> LogStatus -> Annex ()
@ -66,15 +69,22 @@ logStatusAfter key a = ifM a
, return False
)
{- Log a change in the presence of a key's value in a repository. -}
{- Log a change in the presence of a key's value in a repository.
-
- Cluster UUIDs are not logged. Instead, when a node of a cluster is
- logged to contain a key, loading the log will include the cluster's
- UUID.
-}
logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key u@(UUID _) s = do
config <- Annex.getGitConfig
maybeAddLog
(Annex.Branch.RegardingUUID [u])
(locationLogFile config key)
s
(LogInfo (fromUUID u))
logChange key u@(UUID _) s
| isClusterUUID u = noop
| otherwise = do
config <- Annex.getGitConfig
maybeAddLog
(Annex.Branch.RegardingUUID [u])
(locationLogFile config key)
s
(LogInfo (fromUUID u))
logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have
@ -97,14 +107,29 @@ loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
{- Parses the content of a log file and gets the locations in it. -}
parseLoggedLocations :: L.ByteString -> [UUID]
parseLoggedLocations l = map (toUUID . fromLogInfo . info)
(filterPresent (parseLog l))
parseLoggedLocations :: Clusters -> L.ByteString -> [UUID]
parseLoggedLocations clusters l = addClusterUUIDs clusters $
map (toUUID . fromLogInfo . info)
(filterPresent (parseLog l))
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
getLoggedLocations getter key = do
config <- Annex.getGitConfig
map (toUUID . fromLogInfo) <$> getter (locationLogFile config key)
locs <- map (toUUID . fromLogInfo) <$> getter (locationLogFile config key)
clusters <- getClusters
return $ addClusterUUIDs clusters locs
-- Add UUIDs of any clusters whose nodes are in the list.
addClusterUUIDs :: Clusters -> [UUID] -> [UUID]
addClusterUUIDs clusters locs
| M.null clustermap = locs
-- ^ optimisation for common case of no clusters
| otherwise = clusterlocs ++ locs
where
clustermap = clusterNodeUUIDs clusters
clusterlocs = map fromClusterUUID $ S.toList $
S.unions $ mapMaybe findclusters locs
findclusters u = M.lookup (ClusterNodeUUID u) clustermap
{- Is there a location log for the key? True even for keys with no
- remaining locations. -}
@ -204,6 +229,7 @@ overLocationLogs'
-> Annex v
overLocationLogs' iv discarder keyaction = do
config <- Annex.getGitConfig
clusters <- getClusters
let getk = locationLogFileKey config
let go v reader = reader >>= \case
@ -214,11 +240,16 @@ overLocationLogs' iv discarder keyaction = do
ifM (checkDead k)
( go v reader
, do
!v' <- keyaction k (maybe [] parseLoggedLocations content) v
!v' <- keyaction k (maybe [] (parseLoggedLocations clusters) content) v
go v' reader
)
Nothing -> return v
Annex.Branch.overBranchFileContents getk (go iv) >>= \case
Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on allu keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
-- Cannot import Logs.Cluster due to a cycle.
-- Annex.clusters gets populated when starting up git-annex.
getClusters :: Annex Clusters
getClusters = fromMaybe noClusters <$> Annex.getState Annex.clusters

88
Logs/Proxy.hs Normal file
View file

@ -0,0 +1,88 @@
{- git-annex proxy log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Logs.Proxy (
Proxy(..),
getProxies,
recordProxies,
) where
import qualified Annex
import Annex.Common
import qualified Annex.Branch
import qualified Git.Remote
import Git.Types
import Logs
import Logs.UUIDBased
import Logs.MapLog
import Annex.UUID
import qualified Data.Set as S
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.ByteString.Lazy as L
data Proxy = Proxy
{ proxyRemoteUUID :: UUID
, proxyRemoteName :: RemoteName
} deriving (Show, Eq, Ord)
getProxies :: Annex (M.Map UUID (S.Set Proxy))
getProxies = M.map (validateProxies . value) . fromMapLog . parseProxyLog
<$> Annex.Branch.get proxyLog
recordProxies :: S.Set Proxy -> Annex ()
recordProxies proxies = do
-- If a private UUID has been configured as a proxy, avoid leaking
-- it into the git-annex log.
privateuuids <- annexPrivateRepos <$> Annex.getGitConfig
let proxies' = S.filter
(\p -> S.notMember (proxyRemoteUUID p) privateuuids) proxies
c <- currentVectorClock
u <- getUUID
Annex.Branch.change (Annex.Branch.RegardingUUID [u]) proxyLog $
(buildLogNew buildProxyList)
. changeLog c u proxies'
. parseProxyLog
buildProxyList :: S.Set Proxy -> Builder
buildProxyList = assemble . map fmt . S.toList
where
fmt p = buildUUID (proxyRemoteUUID p)
<> colon
<> byteString (encodeBS (proxyRemoteName p))
colon = charUtf8 ':'
assemble [] = mempty
assemble (x:[]) = x
assemble (x:y:l) = x <> " " <> assemble (y:l)
parseProxyLog :: L.ByteString -> Log (S.Set Proxy)
parseProxyLog = parseLogNew parseProxyList
parseProxyList :: A.Parser (S.Set Proxy)
parseProxyList = S.fromList <$> many parseword
where
parseword = parseproxy
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
parseproxy = Proxy
<$> (toUUID <$> A8.takeWhile1 (/= colon))
<* (const () <$> A8.char colon)
<*> (decodeBS <$> A8.takeWhile1 (/= ' '))
colon = ':'
-- Filter out any proxies that have a name that is not allowed as a git
-- remote name. This avoids any security problems with eg escape
-- characters in names, and ensures the name can be used anywhere a usual
-- git remote name can be used without causing issues.
validateProxies :: S.Set Proxy -> S.Set Proxy
validateProxies = S.filter $ Git.Remote.isLegalName . proxyRemoteName

View file

@ -2,13 +2,14 @@
-
- See doc/design/p2p_protocol.mdwn
-
- Copyright 2016-2021 Joey Hess <id@joeyh.name>
- Copyright 2016-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DeriveFunctor, TemplateHaskell, FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, RankNTypes #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module P2P.Protocol where
@ -37,6 +38,7 @@ import System.IO
import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import Data.Char
import Control.Applicative
import Prelude
@ -54,7 +56,7 @@ defaultProtocolVersion :: ProtocolVersion
defaultProtocolVersion = ProtocolVersion 0
maxProtocolVersion :: ProtocolVersion
maxProtocolVersion = ProtocolVersion 1
maxProtocolVersion = ProtocolVersion 2
newtype ProtoAssociatedFile = ProtoAssociatedFile AssociatedFile
deriving (Show)
@ -65,6 +67,9 @@ data Service = UploadPack | ReceivePack
data Validity = Valid | Invalid
deriving (Show)
newtype Bypass = Bypass (S.Set UUID)
deriving (Show, Monoid, Semigroup)
-- | Messages in the protocol. The peer that makes the connection
-- always initiates requests, and the other peer makes responses to them.
@ -85,8 +90,12 @@ data Message
| PUT ProtoAssociatedFile Key
| PUT_FROM Offset
| ALREADY_HAVE
| ALREADY_HAVE_PLUS [UUID]
| SUCCESS
| SUCCESS_PLUS [UUID]
| FAILURE
| FAILURE_PLUS [UUID]
| BYPASS Bypass
| DATA Len -- followed by bytes of data
| VALIDITY Validity
| ERROR String
@ -109,8 +118,12 @@ instance Proto.Sendable Message where
formatMessage (PUT af key) = ["PUT", Proto.serialize af, Proto.serialize key]
formatMessage (PUT_FROM offset) = ["PUT-FROM", Proto.serialize offset]
formatMessage ALREADY_HAVE = ["ALREADY-HAVE"]
formatMessage (ALREADY_HAVE_PLUS uuids) = ("ALREADY-HAVE-PLUS":map Proto.serialize uuids)
formatMessage SUCCESS = ["SUCCESS"]
formatMessage (SUCCESS_PLUS uuids) = ("SUCCESS-PLUS":map Proto.serialize uuids)
formatMessage FAILURE = ["FAILURE"]
formatMessage (FAILURE_PLUS uuids) = ("FAILURE-PLUS":map Proto.serialize uuids)
formatMessage (BYPASS (Bypass uuids)) = ("BYPASS":map Proto.serialize (S.toList uuids))
formatMessage (VALIDITY Valid) = ["VALID"]
formatMessage (VALIDITY Invalid) = ["INVALID"]
formatMessage (DATA len) = ["DATA", Proto.serialize len]
@ -133,8 +146,12 @@ instance Proto.Receivable Message where
parseCommand "PUT" = Proto.parse2 PUT
parseCommand "PUT-FROM" = Proto.parse1 PUT_FROM
parseCommand "ALREADY-HAVE" = Proto.parse0 ALREADY_HAVE
parseCommand "ALREADY-HAVE-PLUS" = Proto.parseList ALREADY_HAVE_PLUS
parseCommand "SUCCESS" = Proto.parse0 SUCCESS
parseCommand "SUCCESS-PLUS" = Proto.parseList SUCCESS_PLUS
parseCommand "FAILURE" = Proto.parse0 FAILURE
parseCommand "FAILURE-PLUS" = Proto.parseList FAILURE_PLUS
parseCommand "BYPASS" = Proto.parseList (BYPASS . Bypass . S.fromList)
parseCommand "DATA" = Proto.parse1 DATA
parseCommand "ERROR" = Proto.parse1 ERROR
parseCommand "VALID" = Proto.parse0 (VALIDITY Valid)
@ -164,12 +181,15 @@ instance Proto.Serializable Service where
-- its serialization cannot contain any whitespace. This is handled
-- by replacing whitespace with '%' (and '%' with '%%')
--
-- When deserializing an AssociatedFile from a peer, it's sanitized,
-- to avoid any unusual characters that might cause problems when it's
-- displayed to the user.
-- When deserializing an AssociatedFile from a peer, that escaping is
-- reversed. Unfortunately, an input tab will be deescaped to a space
-- though. And it's sanitized, to avoid any control characters that might
-- cause problems when it's displayed to the user.
--
-- These mungings are ok, because a ProtoAssociatedFile is only ever displayed
-- to the user and does not need to match a file on disk.
-- These mungings are ok, because a ProtoAssociatedFile is normally
-- only displayed to the user and so does not need to match a file on disk.
-- It may also be used in checking preferred content, which is very
-- unlikely to care about spaces vs tabs or control characters.
instance Proto.Serializable ProtoAssociatedFile where
serialize (ProtoAssociatedFile (AssociatedFile Nothing)) = ""
serialize (ProtoAssociatedFile (AssociatedFile (Just af))) =
@ -244,7 +264,7 @@ data LocalF c
| ContentSize Key (Maybe Len -> c)
-- ^ Gets size of the content of a key, when the full content is
-- present.
| ReadContent Key AssociatedFile Offset (L.ByteString -> Proto Validity -> Proto Bool) (Bool -> c)
| ReadContent Key AssociatedFile Offset (L.ByteString -> Proto Validity -> Proto (Maybe [UUID])) (Maybe [UUID] -> c)
-- ^ Reads the content of a key and sends it to the callback.
-- Must run the callback, or terminate the protocol connection.
--
@ -324,6 +344,15 @@ negotiateProtocolVersion preferredversion = do
Just (ERROR _) -> return ()
_ -> net $ sendMessage (ERROR "expected VERSION")
sendBypass :: Bypass -> Proto ()
sendBypass bypass@(Bypass s)
| S.null s = return ()
| otherwise = do
ver <- net getProtocolVersion
if ver >= ProtocolVersion 2
then net $ sendMessage (BYPASS bypass)
else return ()
checkPresent :: Key -> Proto Bool
checkPresent key = do
net $ sendMessage (CHECKPRESENT key)
@ -349,10 +378,10 @@ lockContentWhile runproto key a = bracket setup cleanup a
cleanup True = runproto () $ net $ sendMessage UNLOCKCONTENT
cleanup False = return ()
remove :: Key -> Proto Bool
remove :: Key -> Proto (Bool, Maybe [UUID])
remove key = do
net $ sendMessage (REMOVE key)
checkSuccess
checkSuccessFailurePlus
get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
get dest key iv af m p =
@ -362,16 +391,17 @@ get dest key iv af m p =
sizer = fileSize dest
storer = storeContentTo dest iv
put :: Key -> AssociatedFile -> MeterUpdate -> Proto Bool
put :: Key -> AssociatedFile -> MeterUpdate -> Proto (Maybe [UUID])
put key af p = do
net $ sendMessage (PUT (ProtoAssociatedFile af) key)
r <- net receiveMessage
case r of
Just (PUT_FROM offset) -> sendContent key af offset p
Just ALREADY_HAVE -> return True
Just ALREADY_HAVE -> return (Just [])
Just (ALREADY_HAVE_PLUS uuids) -> return (Just uuids)
_ -> do
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
return False
return Nothing
data ServerHandler a
= ServerGot a
@ -440,8 +470,6 @@ data ServerMode
serveAuthed :: ServerMode -> UUID -> Proto ()
serveAuthed servermode myuuid = void $ serverLoop handler
where
readonlyerror = net $ sendMessage (ERROR "this repository is read-only; write access denied")
appendonlyerror = net $ sendMessage (ERROR "this repository is append-only; removal denied")
handler (VERSION theirversion) = do
let v = min theirversion maxProtocolVersion
net $ setProtocolVersion v
@ -459,45 +487,42 @@ serveAuthed servermode myuuid = void $ serverLoop handler
handler (CHECKPRESENT key) = do
sendSuccess =<< local (checkContentPresent key)
return ServerContinue
handler (REMOVE key) = case servermode of
ServeReadWrite -> do
sendSuccess =<< local (removeContent key)
return ServerContinue
ServeAppendOnly -> do
appendonlyerror
return ServerContinue
ServeReadOnly -> do
readonlyerror
return ServerContinue
handler (PUT (ProtoAssociatedFile af) key) = case servermode of
ServeReadWrite -> handleput af key
ServeAppendOnly -> handleput af key
ServeReadOnly -> do
readonlyerror
return ServerContinue
handler (REMOVE key) =
checkREMOVEServerMode servermode $ \case
Nothing -> do
sendSuccess =<< local (removeContent key)
return ServerContinue
Just notallowed -> do
notallowed
return ServerContinue
handler (PUT (ProtoAssociatedFile af) key) =
checkPUTServerMode servermode $ \case
Nothing -> handleput af key
Just notallowed -> do
notallowed
return ServerContinue
handler (GET offset (ProtoAssociatedFile af) key) = do
void $ sendContent key af offset nullMeterUpdate
-- setPresent not called because the peer may have
-- requested the data but not permanently stored it.
return ServerContinue
handler (CONNECT service) = do
let goahead = net $ relayService service
case (servermode, service) of
(ServeReadWrite, _) -> goahead
(ServeAppendOnly, UploadPack) -> goahead
-- git protocol could be used to overwrite
-- refs or something, so don't allow
(ServeAppendOnly, ReceivePack) -> readonlyerror
(ServeReadOnly, UploadPack) -> goahead
(ServeReadOnly, ReceivePack) -> readonlyerror
-- After connecting to git, there may be unconsumed data
-- from the git processes hanging around (even if they
-- exited successfully), so stop serving this connection.
return $ ServerGot ()
let endit = return $ ServerGot ()
checkCONNECTServerMode service servermode $ \case
Nothing -> do
net $ relayService service
endit
Just notallowed -> do
notallowed
endit
handler NOTIFYCHANGE = do
refs <- local waitRefChange
net $ sendMessage (CHANGED refs)
return ServerContinue
handler (BYPASS _) = return ServerContinue
handler _ = return ServerUnexpected
handleput af key = do
@ -512,7 +537,40 @@ serveAuthed servermode myuuid = void $ serverLoop handler
local $ setPresent key myuuid
return ServerContinue
sendContent :: Key -> AssociatedFile -> Offset -> MeterUpdate -> Proto Bool
sendReadOnlyError :: Proto ()
sendReadOnlyError = net $ sendMessage $
ERROR "this repository is read-only; write access denied"
sendAppendOnlyError :: Proto ()
sendAppendOnlyError = net $ sendMessage $
ERROR "this repository is append-only; removal denied"
checkPUTServerMode :: Monad m => ServerMode -> (Maybe (Proto ()) -> m a) -> m a
checkPUTServerMode servermode a =
case servermode of
ServeReadWrite -> a Nothing
ServeAppendOnly -> a Nothing
ServeReadOnly -> a (Just sendReadOnlyError)
checkREMOVEServerMode :: Monad m => ServerMode -> (Maybe (Proto ()) -> m a) -> m a
checkREMOVEServerMode servermode a =
case servermode of
ServeReadWrite -> a Nothing
ServeAppendOnly -> a (Just sendAppendOnlyError)
ServeReadOnly -> a (Just sendReadOnlyError)
checkCONNECTServerMode :: Monad m => Service -> ServerMode -> (Maybe (Proto ()) -> m a) -> m a
checkCONNECTServerMode service servermode a =
case (servermode, service) of
(ServeReadWrite, _) -> a Nothing
(ServeAppendOnly, UploadPack) -> a Nothing
-- git protocol could be used to overwrite
-- refs or something, so don't allow
(ServeAppendOnly, ReceivePack) -> a (Just sendReadOnlyError)
(ServeReadOnly, UploadPack) -> a Nothing
(ServeReadOnly, ReceivePack) -> a (Just sendReadOnlyError)
sendContent :: Key -> AssociatedFile -> Offset -> MeterUpdate -> Proto (Maybe [UUID])
sendContent key af offset@(Offset n) p = go =<< local (contentSize key)
where
go (Just (Len totallen)) = do
@ -531,7 +589,7 @@ sendContent key af offset@(Offset n) p = go =<< local (contentSize key)
ver <- net getProtocolVersion
when (ver >= ProtocolVersion 1) $
net . sendMessage . VALIDITY =<< validitycheck
checkSuccess
checkSuccessPlus
receiveContent
:: Observable t
@ -579,6 +637,32 @@ checkSuccess = do
net $ sendMessage (ERROR "expected SUCCESS or FAILURE")
return False
checkSuccessPlus :: Proto (Maybe [UUID])
checkSuccessPlus =
checkSuccessFailurePlus >>= return . \case
(True, v) -> v
(False, _) -> Nothing
checkSuccessFailurePlus :: Proto (Bool, Maybe [UUID])
checkSuccessFailurePlus = do
ver <- net getProtocolVersion
if ver >= ProtocolVersion 2
then do
ack <- net receiveMessage
case ack of
Just SUCCESS -> return (True, Just [])
Just (SUCCESS_PLUS l) -> return (True, Just l)
Just FAILURE -> return (False, Nothing)
Just (FAILURE_PLUS l) -> return (False, Just l)
_ -> do
net $ sendMessage (ERROR "expected SUCCESS or SUCCESS-PLUS or FAILURE or FAILURE-PLUS")
return (False, Nothing)
else do
ok <- checkSuccess
if ok
then return (True, Just [])
else return (False, Nothing)
sendSuccess :: Bool -> Proto ()
sendSuccess True = net $ sendMessage SUCCESS
sendSuccess False = net $ sendMessage FAILURE

576
P2P/Proxy.hs Normal file
View file

@ -0,0 +1,576 @@
{- P2P protocol proxying
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes, FlexibleContexts, ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
module P2P.Proxy where
import Annex.Common
import qualified Annex
import P2P.Protocol
import P2P.IO
import Utility.Metered
import Git.FilePath
import Types.Concurrency
import Annex.Concurrent
import qualified Remote
import Data.Either
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Control.Concurrent.MSem as MSem
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import GHC.Conc
type ProtoCloser = Annex ()
data ClientSide = ClientSide RunState P2PConnection
data RemoteSide = RemoteSide
{ remote :: Remote
, remoteConnect :: Annex (Maybe (RunState, P2PConnection, ProtoCloser))
, remoteTMVar :: TMVar (RunState, P2PConnection, ProtoCloser)
}
mkRemoteSide :: Remote -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide
mkRemoteSide r remoteconnect = RemoteSide
<$> pure r
<*> pure remoteconnect
<*> liftIO (atomically newEmptyTMVar)
runRemoteSide :: RemoteSide -> Proto a -> Annex (Either ProtoFailure a)
runRemoteSide remoteside a =
liftIO (atomically $ tryReadTMVar $ remoteTMVar remoteside) >>= \case
Just (runst, conn, _closer) -> liftIO $ runNetProto runst conn a
Nothing -> remoteConnect remoteside >>= \case
Just (runst, conn, closer) -> do
liftIO $ atomically $ putTMVar
(remoteTMVar remoteside)
(runst, conn, closer)
liftIO $ runNetProto runst conn a
Nothing -> giveup "Unable to connect to remote."
closeRemoteSide :: RemoteSide -> Annex ()
closeRemoteSide remoteside =
liftIO (atomically $ tryReadTMVar $ remoteTMVar remoteside) >>= \case
Just (_, _, closer) -> closer
Nothing -> return ()
{- Selects what remotes to proxy to for top-level P2P protocol
- actions.
- -}
data ProxySelector = ProxySelector
{ proxyCHECKPRESENT :: Key -> Annex (Maybe RemoteSide)
, proxyLOCKCONTENT :: Key -> Annex (Maybe RemoteSide)
, proxyUNLOCKCONTENT :: Annex (Maybe RemoteSide)
, proxyREMOVE :: Key -> Annex [RemoteSide]
-- ^ remove from all of these remotes
, proxyGET :: Key -> Annex (Maybe RemoteSide)
, proxyPUT :: AssociatedFile -> Key -> Annex [RemoteSide]
-- ^ put to some/all of these remotes
}
singleProxySelector :: RemoteSide -> ProxySelector
singleProxySelector r = ProxySelector
{ proxyCHECKPRESENT = const (pure (Just r))
, proxyLOCKCONTENT = const (pure (Just r))
, proxyUNLOCKCONTENT = pure (Just r)
, proxyREMOVE = const (pure [r])
, proxyGET = const (pure (Just r))
, proxyPUT = const (const (pure [r]))
}
{- To keep this module limited to P2P protocol actions,
- all other actions that a proxy needs to do are provided
- here. -}
data ProxyMethods = ProxyMethods
{ removedContent :: UUID -> Key -> Annex ()
-- ^ called when content is removed from a repository
, addedContent :: UUID -> Key -> Annex ()
-- ^ called when content is added to a repository
}
{- Type of function that takes a error handler, which is
- used to handle a ProtoFailure when receiving a message
- from the client or remote.
-}
type ProtoErrorHandled r =
(forall t. ((t -> Annex r) -> Annex (Either ProtoFailure t) -> Annex r)) -> Annex r
{- This is the first thing run when proxying with a client.
- The client has already authenticated. Most clients will send a
- VERSION message, although version 0 clients will not and will send
- some other message, which is returned to handle later.
-
- But before the client will send VERSION, it needs to see AUTH_SUCCESS.
- So send that, although the connection with the remote is not actually
- brought up yet.
-}
getClientProtocolVersion
:: UUID
-> ClientSide
-> (Maybe (ProtocolVersion, Maybe Message) -> Annex r)
-> ProtoErrorHandled r
getClientProtocolVersion remoteuuid (ClientSide clientrunst clientconn) cont protoerrhandler =
protoerrhandler cont $ client $ getClientProtocolVersion' remoteuuid
where
client = liftIO . runNetProto clientrunst clientconn
getClientProtocolVersion'
:: UUID
-> Proto (Maybe (ProtocolVersion, Maybe Message))
getClientProtocolVersion' remoteuuid = do
net $ sendMessage (AUTH_SUCCESS remoteuuid)
msg <- net receiveMessage
case msg of
Nothing -> return Nothing
Just (VERSION v) ->
-- If the client sends a newer version than we
-- understand, reduce it; we need to parse the
-- protocol too.
let v' = min v maxProtocolVersion
in return (Just (v', Nothing))
Just othermsg -> return
(Just (defaultProtocolVersion, Just othermsg))
{- Send negotiated protocol version to the client.
- With a version 0 client, preserves the other protocol message
- received in getClientProtocolVersion. -}
sendClientProtocolVersion
:: ClientSide
-> Maybe Message
-> ProtocolVersion
-> (Maybe Message -> Annex r)
-> ProtoErrorHandled r
sendClientProtocolVersion (ClientSide clientrunst clientconn) othermsg protocolversion cont protoerrhandler =
case othermsg of
Nothing -> protoerrhandler (\() -> cont Nothing) $
client $ net $ sendMessage $ VERSION protocolversion
Just _ -> cont othermsg
where
client = liftIO . runNetProto clientrunst clientconn
{- When speaking to a version 2 client, get the BYPASS message which may be
- sent immediately after VERSION. Returns any other message to be handled
- later. -}
getClientBypass
:: ClientSide
-> ProtocolVersion
-> Maybe Message
-> ((Bypass, Maybe Message) -> Annex r)
-> ProtoErrorHandled r
getClientBypass (ClientSide clientrunst clientconn) (ProtocolVersion protocolversion) Nothing cont protoerrhandler
| protocolversion < 2 = cont (Bypass S.empty, Nothing)
| otherwise = protoerrhandler cont $
client $ net receiveMessage >>= return . \case
Just (BYPASS bypass) -> (bypass, Nothing)
Just othermsg -> (Bypass S.empty, Just othermsg)
Nothing -> (Bypass S.empty, Nothing)
where
client = liftIO . runNetProto clientrunst clientconn
getClientBypass _ _ (Just othermsg) cont _ =
-- Pass along non-BYPASS message from version 0 client.
cont (Bypass S.empty, (Just othermsg))
{- Proxy between the client and the remote. This picks up after
- sendClientProtocolVersion.
-}
proxy
:: Annex r
-> ProxyMethods
-> ServerMode
-> ClientSide
-> UUID
-> ProxySelector
-> ConcurrencyConfig
-> ProtocolVersion
-- ^ Protocol version being spoken between the proxy and the
-- client. When there are multiple remotes, some may speak an
-- earlier version.
-> Maybe Message
-- ^ non-VERSION message that was received from the client when
-- negotiating protocol version, and has not been responded to yet
-> ProtoErrorHandled r
proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remoteuuid proxyselector concurrencyconfig (ProtocolVersion protocolversion) othermsg protoerrhandler = do
case othermsg of
Nothing -> proxynextclientmessage ()
Just message -> proxyclientmessage (Just message)
where
client = liftIO . runNetProto clientrunst clientconn
proxynextclientmessage () = protoerrhandler proxyclientmessage $
client (net receiveMessage)
servermodechecker c a = c servermode $ \case
Nothing -> a
Just notallowed ->
protoerrhandler proxynextclientmessage $
client notallowed
proxyclientmessage Nothing = proxydone
proxyclientmessage (Just message) = case message of
CHECKPRESENT k -> proxyCHECKPRESENT proxyselector k >>= \case
Just remoteside ->
proxyresponse remoteside message
(const proxynextclientmessage)
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
LOCKCONTENT k -> proxyLOCKCONTENT proxyselector k >>= \case
Just remoteside ->
proxyresponse remoteside message
(const proxynextclientmessage)
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
UNLOCKCONTENT -> proxyUNLOCKCONTENT proxyselector >>= \case
Just remoteside ->
proxynoresponse remoteside message
proxynextclientmessage
Nothing -> proxynextclientmessage ()
REMOVE k -> do
remotesides <- proxyREMOVE proxyselector k
servermodechecker checkREMOVEServerMode $
handleREMOVE remotesides k message
GET _ _ k -> proxyGET proxyselector k >>= \case
Just remoteside -> handleGET remoteside message
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
ERROR "content not present"
PUT paf k -> do
af <- getassociatedfile paf
remotesides <- proxyPUT proxyselector af k
servermodechecker checkPUTServerMode $
handlePUT remotesides k message
BYPASS _ -> proxynextclientmessage ()
-- These messages involve the git repository, not the
-- annex. So they affect the git repository of the proxy,
-- not the remote.
CONNECT service ->
servermodechecker (checkCONNECTServerMode service) $
-- P2P protocol does not continue after
-- relaying from git.
protoerrhandler (\() -> proxydone) $
client $ net $ relayService service
NOTIFYCHANGE -> protoerr
-- Messages that the client should only send after one of
-- the messages above.
SUCCESS -> protoerr
SUCCESS_PLUS _ -> protoerr
FAILURE -> protoerr
FAILURE_PLUS _ -> protoerr
DATA _ -> protoerr
VALIDITY _ -> protoerr
-- If the client errors out, give up.
ERROR msg -> giveup $ "client error: " ++ msg
-- Messages that only the server should send.
CONNECTDONE _ -> protoerr
CHANGED _ -> protoerr
AUTH_SUCCESS _ -> protoerr
AUTH_FAILURE -> protoerr
PUT_FROM _ -> protoerr
ALREADY_HAVE -> protoerr
ALREADY_HAVE_PLUS _ -> protoerr
-- Early messages that the client should not send now.
AUTH _ _ -> protoerr
VERSION _ -> protoerr
-- Send a message to the remote, send its response back to the
-- client, and pass it to the continuation.
proxyresponse remoteside message a =
getresponse (runRemoteSide remoteside) message $ \resp ->
protoerrhandler (a resp) $
client $ net $ sendMessage resp
-- Send a message to the remote, that it will not respond to.
proxynoresponse remoteside message a =
protoerrhandler a $
runRemoteSide remoteside $ net $ sendMessage message
-- Send a message to the endpoint and get back its response.
getresponse endpoint message handleresp =
protoerrhandler (withresp handleresp) $
endpoint $ net $ do
sendMessage message
receiveMessage
withresp a (Just resp) = a resp
-- Whichever of the remote or client the message was read from
-- hung up.
withresp _ Nothing = proxydone
-- Read a message from one party, send it to the other,
-- and then pass the message to the continuation.
relayonemessage from to cont =
flip protoerrhandler (from $ net $ receiveMessage) $
withresp $ \message ->
protoerrhandler (cont message) $
to $ net $ sendMessage message
protoerr = do
_ <- client $ net $ sendMessage (ERROR "protocol error")
giveup "protocol error"
handleREMOVE [] _ _ =
-- When no places are provided to remove from,
-- don't report a successful remote.
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
handleREMOVE remotesides k message = do
v <- forMC concurrencyconfig remotesides $ \r ->
runRemoteSideOrSkipFailed r $ do
net $ sendMessage message
net receiveMessage >>= return . \case
Just SUCCESS ->
Just (True, [Remote.uuid (remote r)])
Just (SUCCESS_PLUS us) ->
Just (True, Remote.uuid (remote r):us)
Just FAILURE ->
Just (False, [])
Just (FAILURE_PLUS us) ->
Just (False, us)
_ -> Nothing
let v' = map join v
let us = concatMap snd $ catMaybes v'
mapM_ (\u -> removedContent proxymethods u k) us
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
let nonplussed = all (== remoteuuid) us
|| protocolversion < 2
in if all (maybe False fst) v'
then if nonplussed
then SUCCESS
else SUCCESS_PLUS us
else if nonplussed
then FAILURE
else FAILURE_PLUS us
handleGET remoteside message = getresponse (runRemoteSide remoteside) message $
withDATA (relayGET remoteside)
handlePUT (remoteside:[]) k message
| Remote.uuid (remote remoteside) == remoteuuid =
getresponse (runRemoteSide remoteside) message $ \resp -> case resp of
ALREADY_HAVE -> protoerrhandler proxynextclientmessage $
client $ net $ sendMessage resp
ALREADY_HAVE_PLUS _ -> protoerrhandler proxynextclientmessage $
client $ net $ sendMessage resp
PUT_FROM _ ->
getresponse client resp $
withDATA (relayPUT remoteside k)
_ -> protoerr
handlePUT [] _ _ =
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage ALREADY_HAVE
handlePUT remotesides k message =
handlePutMulti remotesides k message
withDATA a message@(DATA len) = a len message
withDATA _ _ = protoerr
relayGET remoteside len = relayDATAStart client $
relayDATACore len (runRemoteSide remoteside) client $
relayDATAFinish (runRemoteSide remoteside) client $
relayonemessage client (runRemoteSide remoteside) $
const proxynextclientmessage
relayPUT remoteside k len = relayDATAStart (runRemoteSide remoteside) $
relayDATACore len client (runRemoteSide remoteside) $
relayDATAFinish client (runRemoteSide remoteside) $
relayonemessage (runRemoteSide remoteside) client finished
where
finished resp () = do
void $ relayPUTRecord k remoteside resp
proxynextclientmessage ()
relayPUTRecord k remoteside SUCCESS = do
addedContent proxymethods (Remote.uuid (remote remoteside)) k
return $ Just [Remote.uuid (remote remoteside)]
relayPUTRecord k remoteside (SUCCESS_PLUS us) = do
let us' = (Remote.uuid (remote remoteside)) : us
forM_ us' $ \u ->
addedContent proxymethods u k
return $ Just us'
relayPUTRecord _ _ _ =
return Nothing
handlePutMulti remotesides k message = do
let initiate remoteside = do
resp <- runRemoteSideOrSkipFailed remoteside $ net $ do
sendMessage message
receiveMessage
case resp of
Just (Just (PUT_FROM (Offset offset))) ->
return $ Right $
Right (remoteside, offset)
Just (Just ALREADY_HAVE) ->
return $ Right $ Left remoteside
Just (Just _) -> protoerr
Just Nothing -> return (Left ())
Nothing -> return (Left ())
let alreadyhave = \case
Right (Left _) -> True
_ -> False
l <- forMC concurrencyconfig remotesides initiate
if all alreadyhave l
then if protocolversion < 2
then protoerrhandler proxynextclientmessage $
client $ net $ sendMessage ALREADY_HAVE
else protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $ ALREADY_HAVE_PLUS $
filter (/= remoteuuid) $
map (Remote.uuid . remote) (lefts (rights l))
else if null (rights l)
-- no response from any remote
then proxydone
else do
let l' = rights (rights l)
let minoffset = minimum (map snd l')
getresponse client (PUT_FROM (Offset minoffset)) $
withDATA (relayPUTMulti minoffset l' k)
relayPUTMulti minoffset remotes k (Len datalen) _ = do
let totallen = datalen + minoffset
-- Tell each remote how much data to expect, depending
-- on the remote's offset.
rs <- forMC concurrencyconfig remotes $ \r@(remoteside, remoteoffset) ->
runRemoteSideOrSkipFailed remoteside $ do
net $ sendMessage $ DATA $ Len $
totallen - remoteoffset
return r
protoerrhandler (send (catMaybes rs) minoffset) $
client $ net $ receiveBytes (Len datalen) nullMeterUpdate
where
chunksize = fromIntegral defaultChunkSize
-- Stream the lazy bytestring out to the remotes in chunks.
-- Only start sending to a remote once past its desired
-- offset.
send rs n b = do
let (chunk, b') = L.splitAt chunksize b
let chunklen = fromIntegral (L.length chunk)
let !n' = n + chunklen
rs' <- forMC concurrencyconfig rs $ \r@(remoteside, remoteoffset) ->
if n >= remoteoffset
then runRemoteSideOrSkipFailed remoteside $ do
net $ sendBytes (Len chunklen) chunk nullMeterUpdate
return r
else if (n' > remoteoffset)
then do
let chunkoffset = remoteoffset - n
let subchunklen = chunklen - chunkoffset
let subchunk = L.drop (fromIntegral chunkoffset) chunk
runRemoteSideOrSkipFailed remoteside $ do
net $ sendBytes (Len subchunklen) subchunk nullMeterUpdate
return r
else return (Just r)
if L.null b'
then sent (catMaybes rs')
else send (catMaybes rs') n' b'
sent [] = proxydone
sent rs = relayDATAFinishMulti k (map fst rs)
runRemoteSideOrSkipFailed remoteside a =
runRemoteSide remoteside a >>= \case
Right v -> return (Just v)
Left _ -> do
-- This connection to the remote is
-- unrecoverable at this point, so close it.
closeRemoteSide remoteside
return Nothing
relayDATAStart x receive message =
protoerrhandler (\() -> receive) $
x $ net $ sendMessage message
relayDATACore len x y a = protoerrhandler send $
x $ net $ receiveBytes len nullMeterUpdate
where
send b = protoerrhandler a $
y $ net $ sendBytes len b nullMeterUpdate
relayDATAFinish x y sendsuccessfailure ()
| protocolversion == 0 = sendsuccessfailure
-- Protocol version 1 has a VALID or
-- INVALID message after the data.
| otherwise = relayonemessage x y (\_ () -> sendsuccessfailure)
relayDATAFinishMulti k rs
| protocolversion == 0 =
finish $ net receiveMessage
| otherwise =
flip protoerrhandler (client $ net $ receiveMessage) $
withresp $ \message ->
finish $ do
-- Relay VALID or INVALID message
-- only to remotes that support
-- protocol version 1.
net getProtocolVersion >>= \case
ProtocolVersion 0 -> return ()
_ -> net $ sendMessage message
net receiveMessage
where
finish a = do
storeduuids <- forMC concurrencyconfig rs $ \r ->
runRemoteSideOrSkipFailed r a >>= \case
Just (Just resp) ->
relayPUTRecord k r resp
_ -> return Nothing
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
case concat (catMaybes storeduuids) of
[] -> FAILURE
us
| protocolversion < 2 -> SUCCESS
| otherwise -> SUCCESS_PLUS us
-- The associated file received from the P2P protocol
-- is relative to the top of the git repository. But this process
-- may be running with a different cwd.
getassociatedfile (ProtoAssociatedFile (AssociatedFile (Just f))) =
AssociatedFile . Just
<$> fromRepo (fromTopFilePath (asTopFilePath f))
getassociatedfile (ProtoAssociatedFile (AssociatedFile Nothing)) =
return $ AssociatedFile Nothing
data ConcurrencyConfig = ConcurrencyConfig Int (MSem.MSem Int)
noConcurrencyConfig :: Annex ConcurrencyConfig
noConcurrencyConfig = liftIO $ ConcurrencyConfig 1 <$> MSem.new 1
getConcurrencyConfig :: Annex ConcurrencyConfig
getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case
NonConcurrent -> noConcurrencyConfig
Concurrent n -> go n
ConcurrentPerCpu -> go =<< liftIO getNumProcessors
where
go n = do
c <- liftIO getNumCapabilities
when (n > c) $
liftIO $ setNumCapabilities n
setConcurrency (ConcurrencyGitConfig (Concurrent n))
msem <- liftIO $ MSem.new n
return (ConcurrencyConfig n msem)
forMC :: ConcurrencyConfig -> [a] -> (a -> Annex b) -> Annex [b]
forMC _ (x:[]) a = do
r <- a x
return [r]
forMC (ConcurrencyConfig n msem) xs a
| n < 2 = forM xs a
| otherwise = do
runners <- forM xs $ \x ->
forkState $ bracketIO
(MSem.wait msem)
(const $ MSem.signal msem)
(const $ a x)
mapM id =<< liftIO (forConcurrently runners id)

View file

@ -342,11 +342,12 @@ remoteLocations (IncludeIgnored ii) locations trusted = do
{- Displays known locations of a key and helps the user take action
- to make them accessible. -}
showLocations :: Bool -> Key -> [UUID] -> String -> Annex ()
showLocations separateuntrusted key exclude nolocmsg = do
showLocations :: Bool -> Key -> (UUID -> Annex Bool) -> String -> Annex ()
showLocations separateuntrusted key checkexclude nolocmsg = do
u <- getUUID
remotes <- remoteList
uuids <- keyLocations key
exclude <- filterM checkexclude uuids
untrusteduuids <- if separateuntrusted
then trustGet UnTrusted
else pure []
@ -447,11 +448,14 @@ claimingUrl' remotefilter url = do
where
checkclaim = maybe (pure False) (`id` url) . claimUrl
{- Is this a remote of a type we can sync with, or a special remote
- with an annex:: url configured? -}
{- Is this a remote of a type that git pull and push work with?
- That includes special remotes with an annex:: url configured.
- It does not include proxied remotes. -}
gitSyncableRemote :: Remote -> Bool
gitSyncableRemote r
| gitSyncableRemoteType (remotetype r) = True
| gitSyncableRemoteType (remotetype r)
&& isJust (remoteUrl (gitconfig r)) =
not (isJust (remoteAnnexProxiedBy (gitconfig r)))
| otherwise = case remoteUrl (gitconfig r) of
Just u | "annex::" `isPrefixOf` u -> True
_ -> False

View file

@ -1,6 +1,6 @@
{- Standard git remotes.
-
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -25,7 +25,6 @@ import qualified Git.Command
import qualified Git.GCrypt
import qualified Git.Types as Git
import qualified Annex
import Logs.Presence
import Annex.Transfer
import Annex.CopyFile
import Annex.Verify
@ -45,6 +44,8 @@ import Annex.Init
import Types.CleanupActions
import qualified CmdLine.GitAnnexShell.Fields as Fields
import Logs.Location
import Logs.Proxy
import Logs.Cluster.Basic
import Utility.Metered
import Utility.Env
import Utility.Batch
@ -66,7 +67,8 @@ import Messages.Progress
import Control.Concurrent
import qualified Data.Map as M
import qualified Data.ByteString as S
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Utility.RawFilePath as R
import Network.URI
@ -92,7 +94,13 @@ list :: Bool -> Annex [Git.Repo]
list autoinit = do
c <- fromRepo Git.config
rs <- mapM (tweakurl c) =<< Annex.getGitRemotes
mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs)
rs' <- mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs)
proxies <- doQuietAction getProxies
if proxies == mempty
then return rs'
else do
proxied <- listProxied proxies rs'
return (proxied++rs')
where
annexurl r = remoteConfig r "annexurl"
tweakurl c r = do
@ -168,6 +176,7 @@ configRead autoinit r = do
Just r' -> return r'
_ -> return r
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs
-- Remote.GitLFS may be used with a repo that is also encrypted
@ -178,10 +187,9 @@ gen r u rc gc rs
Nothing -> do
st <- mkState r u gc
c <- parsedRemoteConfig remote rc
go st c <$> remoteCost gc c defcst
go st c <$> remoteCost gc c (defaultRepoCost r)
Just addr -> Remote.P2P.chainGen addr r u rc gc rs
where
defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost
go st c cst = Just new
where
new = Remote
@ -221,6 +229,11 @@ gen r u rc gc rs
, remoteStateHandle = rs
}
defaultRepoCost :: Git.Repo -> Cost
defaultRepoCost r
| repoCheap r = cheapRemoteCost
| otherwise = expensiveRemoteCost
unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
unavailable r = gen r'
where
@ -265,7 +278,7 @@ tryGitConfigRead autoinit r hasuuid
v <- liftIO $ Git.Config.fromPipe r cmd params st
case v of
Right (r', val, _err) -> do
unless (isUUIDConfigured r' || S.null val || not mustincludeuuuid) $ do
unless (isUUIDConfigured r' || val == mempty || not mustincludeuuuid) $ do
warning $ UnquotedString $ "Failed to get annex.uuid configuration of repository " ++ Git.repoDescribe r
warning $ UnquotedString $ "Instead, got: " ++ show val
warning "This is unexpected; please check the network transport!"
@ -338,7 +351,7 @@ tryGitConfigRead autoinit r hasuuid
readlocalannexconfig = do
let check = do
Annex.BranchState.disableUpdate
catchNonAsync (autoInitialize (pure [])) $ \e ->
catchNonAsync (autoInitialize noop (pure [])) $ \e ->
warning $ UnquotedString $ "Remote " ++ Git.repoDescribe r ++
": " ++ show e
Annex.getState Annex.repo
@ -442,7 +455,8 @@ dropKey' repo r st@(State connpool duc _ _ _) key
, giveup "remote does not have expected annex.uuid value"
)
| Git.repoIsHttp repo = giveup "dropping from http remote not supported"
| otherwise = P2PHelper.remove (Ssh.runProto r connpool (return False)) key
| otherwise = P2PHelper.remove (uuid r)
(Ssh.runProto r connpool (return (False, Nothing))) key
lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey r st key callback = do
@ -464,7 +478,7 @@ lockKey' repo r st@(State connpool duc _ _ _) key callback
)
| Git.repoIsSsh repo = do
showLocking r
let withconn = Ssh.withP2PSshConnection r connpool failedlock
let withconn = Ssh.withP2PShellConnection r connpool failedlock
P2PHelper.lock withconn Ssh.runProtoConn (uuid r) key callback
| otherwise = failedlock
where
@ -542,8 +556,8 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
, giveup "remote does not have expected annex.uuid value"
)
| Git.repoIsSsh repo =
P2PHelper.store (gitconfig r)
(Ssh.runProto r connpool (return False))
P2PHelper.store (uuid r) (gitconfig r)
(Ssh.runProto r connpool (return Nothing))
key file meterupdate
| otherwise = giveup "copying to non-ssh repo not supported"
@ -594,7 +608,7 @@ repairRemote r a = return $ do
s <- Annex.new r
Annex.eval s $ do
Annex.BranchState.disableUpdate
ensureInitialized (pure [])
ensureInitialized noop (pure [])
a `finally` quiesce True
data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)])
@ -638,7 +652,7 @@ onLocal' (LocalRemoteAnnex repo mv) a = liftIO (takeMVar mv) >>= \case
[] -> do
liftIO $ putMVar mv []
v <- newLocal repo
go (v, ensureInitialized (pure []) >> a)
go (v, ensureInitialized noop (pure []) >> a)
(v:rest) -> do
liftIO $ putMVar mv rest
go (v, a)
@ -725,7 +739,7 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
- This returns False when the repository UUID is not as expected. -}
type DeferredUUIDCheck = Annex Bool
data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
getRepoFromState :: State -> Annex Git.Repo
getRepoFromState (State _ _ _ a _) = fst <$> a
@ -738,7 +752,7 @@ getGitConfigFromState (State _ _ _ a _) = snd <$> a
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
mkState r u gc = do
pool <- Ssh.mkP2PSshConnectionPool
pool <- Ssh.mkP2PShellConnectionPool
copycowtried <- liftIO newCopyCoWTried
lra <- mkLocalRemoteAnnex r
(duc, getrepo) <- go
@ -772,3 +786,122 @@ mkState r u gc = do
)
return (duc, getrepo)
listProxied :: M.Map UUID (S.Set Proxy) -> [Git.Repo] -> Annex [Git.Repo]
listProxied proxies rs = concat <$> mapM go rs
where
go r = do
g <- Annex.gitRepo
u <- getRepoUUID r
gc <- Annex.getRemoteGitConfig r
let cu = fromMaybe u $ remoteAnnexConfigUUID gc
if not (canproxy gc r) || cu == NoUUID
then pure []
else case M.lookup cu proxies of
Nothing -> pure []
Just proxied -> catMaybes
<$> mapM (mkproxied g r gc proxied)
(S.toList proxied)
proxiedremotename r p = do
n <- Git.remoteName r
pure $ n ++ "-" ++ proxyRemoteName p
mkproxied g r gc proxied p = case proxiedremotename r p of
Nothing -> pure Nothing
Just proxyname -> mkproxied' g r gc proxied p proxyname
-- The proxied remote is constructed by renaming the proxy remote,
-- changing its uuid, and setting the proxied remote's inherited
-- configs and uuid in Annex state.
mkproxied' g r gc proxied p proxyname
| any isconfig (M.keys (Git.config g)) = pure Nothing
| otherwise = do
clusters <- getClustersWith id
-- Not using addGitConfigOverride for inherited
-- configs, because child git processes do not
-- need them to be provided with -c.
Annex.adjustGitRepo (pure . annexconfigadjuster clusters)
return $ Just $ renamedr
where
renamedr =
let c = adduuid configkeyUUID $
Git.fullconfig r
in r
{ Git.remoteName = Just proxyname
, Git.config = M.map Prelude.head c
, Git.fullconfig = c
}
annexconfigadjuster clusters r' =
let c = adduuid (configRepoUUID renamedr) $
addurl $
addproxiedby $
adjustclusternode clusters $
inheritconfigs $ Git.fullconfig r'
in r'
{ Git.config = M.map Prelude.head c
, Git.fullconfig = c
}
adduuid ck = M.insert ck
[Git.ConfigValue $ fromUUID $ proxyRemoteUUID p]
addurl = M.insert (remoteConfig renamedr (remoteGitConfigKey UrlField))
[Git.ConfigValue $ encodeBS $ Git.repoLocation r]
addproxiedby = case remoteAnnexUUID gc of
Just u -> addremoteannexfield ProxiedByField
[Git.ConfigValue $ fromUUID u]
Nothing -> id
-- A node of a cluster that is being proxied along with
-- that cluster does not need to be synced with
-- by default, because syncing with the cluster will
-- effectively sync with all of its nodes.
--
-- Also, give it a slightly higher cost than the
-- cluster by default, to encourage using the cluster.
adjustclusternode clusters =
case M.lookup (ClusterNodeUUID (proxyRemoteUUID p)) (clusterNodeUUIDs clusters) of
Just cs
| any (\c -> S.member (fromClusterUUID c) proxieduuids) (S.toList cs) ->
addremoteannexfield SyncField
[Git.ConfigValue $ Git.Config.boolConfig' False]
. addremoteannexfield CostField
[Git.ConfigValue $ encodeBS $ show $ defaultRepoCost r + 0.1]
_ -> id
proxieduuids = S.map proxyRemoteUUID proxied
addremoteannexfield f = M.insert
(remoteAnnexConfig renamedr (remoteGitConfigKey f))
inheritconfigs c = foldl' inheritconfig c proxyInheritedFields
inheritconfig c k = case (M.lookup dest c, M.lookup src c) of
(Nothing, Just v) -> M.insert dest v c
_ -> c
where
src = remoteAnnexConfig r k
dest = remoteAnnexConfig renamedr k
-- When the git config has anything set for a remote,
-- avoid making a proxied remote with the same name.
-- It is possible to set git configs of proxies, but it
-- needs both the url and uuid config to be manually set.
isconfig (Git.ConfigKey configkey) =
proxyconfigprefix `B.isPrefixOf` configkey
where
Git.ConfigKey proxyconfigprefix = remoteConfig proxyname mempty
-- Git remotes that are gcrypt or git-lfs special remotes cannot
-- proxy. Local git remotes cannot proxy either because
-- git-annex-shell is not used to access a local git url.
-- Proxing is also yet supported for remotes using P2P
-- addresses.
canproxy gc r
| remoteAnnexGitLFS gc = False
| Git.GCrypt.isEncrypted r = False
| Git.repoIsLocal r || Git.repoIsLocalUnknown r = False
| otherwise = isNothing (repoP2PAddress r)

View file

@ -64,5 +64,7 @@ gitRepoInfo r = do
repo <- Remote.getRepo r
return
[ ("repository location", Git.repoLocation repo)
, ("proxied", Git.Config.boolConfig
(isJust (remoteAnnexProxiedBy (Remote.gitconfig r))))
, ("last synced", lastsynctime)
]

View file

@ -19,6 +19,7 @@ import Utility.Metered
import Utility.Tuple
import Types.NumCopies
import Annex.Verify
import Logs.Location
import Control.Concurrent
@ -32,14 +33,20 @@ type ProtoConnRunner c = forall a. P2P.Proto a -> ClosableConnection c -> Annex
-- the pool when done.
type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) -> Annex a
store :: RemoteGitConfig -> ProtoRunner Bool -> Key -> AssociatedFile -> MeterUpdate -> Annex ()
store gc runner k af p = do
store :: UUID -> RemoteGitConfig -> ProtoRunner (Maybe [UUID]) -> Key -> AssociatedFile -> MeterUpdate -> Annex ()
store remoteuuid gc runner k af p = do
let sizer = KeySizer k (fmap (toRawFilePath . fst3) <$> prepSendAnnex k)
let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc
metered (Just p) sizer bwlimit $ \_ p' ->
runner (P2P.put k af p') >>= \case
Just True -> return ()
Just False -> giveup "Transfer failed"
Just (Just fanoutuuids) -> do
-- Storing on the remote can cause it
-- to be stored on additional UUIDs,
-- so record those.
forM_ fanoutuuids $ \u ->
when (u /= remoteuuid) $
logChange k u InfoPresent
Just Nothing -> giveup "Transfer failed"
Nothing -> remoteUnavail
retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
@ -52,11 +59,20 @@ retrieve gc runner k af dest p verifyconfig = do
Just (False, _) -> giveup "Transfer failed"
Nothing -> remoteUnavail
remove :: ProtoRunner Bool -> Key -> Annex ()
remove runner k = runner (P2P.remove k) >>= \case
Just True -> return ()
Just False -> giveup "removing content from remote failed"
remove :: UUID -> ProtoRunner (Bool, Maybe [UUID]) -> Key -> Annex ()
remove remoteuuid runner k = runner (P2P.remove k) >>= \case
Just (True, alsoremoveduuids) -> note alsoremoveduuids
Just (False, alsoremoveduuids) -> do
note alsoremoveduuids
giveup "removing content from remote failed"
Nothing -> remoteUnavail
where
-- The remote reports removal from other UUIDs than its own,
-- so record those.
note alsoremoveduuids =
forM_ (fromMaybe [] alsoremoveduuids) $ \u ->
when (u /= remoteuuid) $
logChange k u InfoMissing
checkpresent :: ProtoRunner Bool -> Key -> Annex Bool
checkpresent runner k = maybe remoteUnavail return =<< runner (P2P.checkPresent k)

View file

@ -180,67 +180,78 @@ rsyncParams r direction = do
| otherwise = remoteAnnexRsyncUploadOptions gc
gc = gitconfig r
-- A connection over ssh to git-annex shell speaking the P2P protocol.
type P2PSshConnection = P2P.ClosableConnection
-- A connection over ssh or locally to git-annex shell,
-- speaking the P2P protocol.
type P2PShellConnection = P2P.ClosableConnection
(P2P.RunState, P2P.P2PConnection, ProcessHandle)
closeP2PSshConnection :: P2PSshConnection -> IO (P2PSshConnection, Maybe ExitCode)
closeP2PSshConnection P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing)
closeP2PSshConnection (P2P.OpenConnection (_st, conn, pid)) =
closeP2PShellConnection :: P2PShellConnection -> IO (P2PShellConnection, Maybe ExitCode)
closeP2PShellConnection P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing)
closeP2PShellConnection (P2P.OpenConnection (_st, conn, pid)) =
-- mask async exceptions, avoid cleanup being interrupted
uninterruptibleMask_ $ do
P2P.closeConnection conn
exitcode <- waitForProcess pid
return (P2P.ClosedConnection, Just exitcode)
-- Pool of connections over ssh to git-annex-shell p2pstdio.
type P2PSshConnectionPool = TVar (Maybe P2PSshConnectionPoolState)
-- Pool of connections to git-annex-shell p2pstdio.
type P2PShellConnectionPool = TVar (Maybe P2PShellConnectionPoolState)
data P2PSshConnectionPoolState
= P2PSshConnections [P2PSshConnection]
data P2PShellConnectionPoolState
= P2PShellConnections [P2PShellConnection]
-- Remotes using an old version of git-annex-shell don't support P2P
| P2PSshUnsupported
| P2PShellUnsupported
mkP2PSshConnectionPool :: Annex P2PSshConnectionPool
mkP2PSshConnectionPool = liftIO $ newTVarIO Nothing
mkP2PShellConnectionPool :: Annex P2PShellConnectionPool
mkP2PShellConnectionPool = liftIO $ newTVarIO Nothing
-- Takes a connection from the pool, if any are available, otherwise
-- tries to open a new one.
getP2PSshConnection :: Remote -> P2PSshConnectionPool -> Annex (Maybe P2PSshConnection)
getP2PSshConnection r connpool = getexistingconn >>= \case
getP2PShellConnection :: Remote -> P2PShellConnectionPool -> Annex (Maybe P2PShellConnection)
getP2PShellConnection r connpool = getexistingconn >>= \case
Nothing -> return Nothing
Just Nothing -> openP2PSshConnection r connpool
Just Nothing -> openP2PShellConnection r connpool
Just (Just c) -> return (Just c)
where
getexistingconn = liftIO $ atomically $ readTVar connpool >>= \case
Just P2PSshUnsupported -> return Nothing
Just (P2PSshConnections (c:cs)) -> do
writeTVar connpool (Just (P2PSshConnections cs))
Just P2PShellUnsupported -> return Nothing
Just (P2PShellConnections (c:cs)) -> do
writeTVar connpool (Just (P2PShellConnections cs))
return (Just (Just c))
Just (P2PSshConnections []) -> return (Just Nothing)
Just (P2PShellConnections []) -> return (Just Nothing)
Nothing -> return (Just Nothing)
-- Add a connection to the pool, unless it's closed.
storeP2PSshConnection :: P2PSshConnectionPool -> P2PSshConnection -> IO ()
storeP2PSshConnection _ P2P.ClosedConnection = return ()
storeP2PSshConnection connpool conn = atomically $ modifyTVar' connpool $ \case
Just (P2PSshConnections cs) -> Just (P2PSshConnections (conn:cs))
_ -> Just (P2PSshConnections [conn])
storeP2PShellConnection :: P2PShellConnectionPool -> P2PShellConnection -> IO ()
storeP2PShellConnection _ P2P.ClosedConnection = return ()
storeP2PShellConnection connpool conn = atomically $ modifyTVar' connpool $ \case
Just (P2PShellConnections cs) -> Just (P2PShellConnections (conn:cs))
_ -> Just (P2PShellConnections [conn])
-- Try to open a P2PSshConnection.
-- Try to open a P2PShellConnection.
-- The new connection is not added to the pool, so it's available
-- for the caller to use.
-- If the remote does not support the P2P protocol, that's remembered in
-- the connection pool.
openP2PSshConnection :: Remote -> P2PSshConnectionPool -> Annex (Maybe P2PSshConnection)
openP2PSshConnection r connpool = do
openP2PShellConnection :: Remote -> P2PShellConnectionPool -> Annex (Maybe P2PShellConnection)
openP2PShellConnection r connpool =
openP2PShellConnection' r P2P.maxProtocolVersion mempty >>= \case
Just conn -> return (Just conn)
Nothing -> do
liftIO $ rememberunsupported
return Nothing
where
rememberunsupported = atomically $
modifyTVar' connpool $
maybe (Just P2PShellUnsupported) Just
openP2PShellConnection' :: Remote -> P2P.ProtocolVersion -> P2P.Bypass -> Annex (Maybe P2PShellConnection)
openP2PShellConnection' r maxprotoversion bypass = do
u <- getUUID
let ps = [Param (fromUUID u)]
repo <- getRepo r
git_annex_shell ConsumeStdin repo "p2pstdio" ps [] >>= \case
Nothing -> do
liftIO $ rememberunsupported
return Nothing
Nothing -> return Nothing
Just (cmd, params) -> start cmd params
where
start cmd params = liftIO $ do
@ -256,45 +267,41 @@ openP2PSshConnection r connpool = do
, P2P.connIhdl = to
, P2P.connOhdl = from
, P2P.connIdent = P2P.ConnIdent $
Just $ "ssh connection " ++ show pidnum
Just $ "git-annex-shell connection " ++ show pidnum
}
runst <- P2P.mkRunState P2P.Client
let c = P2P.OpenConnection (runst, conn, pid)
-- When the connection is successful, the remote
-- will send an AUTH_SUCCESS with its uuid.
let proto = P2P.postAuth $
P2P.negotiateProtocolVersion P2P.maxProtocolVersion
let proto = P2P.postAuth $ do
P2P.negotiateProtocolVersion maxprotoversion
P2P.sendBypass bypass
tryNonAsync (P2P.runNetProto runst conn proto) >>= \case
Right (Right (Just theiruuid)) | theiruuid == uuid r ->
return $ Just c
_ -> do
(cclosed, exitcode) <- closeP2PSshConnection c
(cclosed, exitcode) <- closeP2PShellConnection c
-- ssh exits 255 when unable to connect to
-- server.
if exitcode == Just (ExitFailure 255)
then return (Just cclosed)
else do
rememberunsupported
return Nothing
rememberunsupported = atomically $
modifyTVar' connpool $
maybe (Just P2PSshUnsupported) Just
else return Nothing
-- Runs a P2P Proto action on a remote when it supports that,
-- otherwise the fallback action.
runProto :: Remote -> P2PSshConnectionPool -> Annex a -> P2P.Proto a -> Annex (Maybe a)
runProto :: Remote -> P2PShellConnectionPool -> Annex a -> P2P.Proto a -> Annex (Maybe a)
runProto r connpool onerr proto = Just <$>
(getP2PSshConnection r connpool >>= maybe onerr go)
(getP2PShellConnection r connpool >>= maybe onerr go)
where
go c = do
(c', v) <- runProtoConn proto c
case v of
Just res -> do
liftIO $ storeP2PSshConnection connpool c'
liftIO $ storeP2PShellConnection connpool c'
return res
Nothing -> onerr
runProtoConn :: P2P.Proto a -> P2PSshConnection -> Annex (P2PSshConnection, Maybe a)
runProtoConn :: P2P.Proto a -> P2PShellConnection -> Annex (P2PShellConnection, Maybe a)
runProtoConn _ P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing)
runProtoConn a conn@(P2P.OpenConnection (runst, c, _)) = do
P2P.runFullProto runst c a >>= \case
@ -303,24 +310,24 @@ runProtoConn a conn@(P2P.OpenConnection (runst, c, _)) = do
-- usable, so close it.
Left e -> do
warning $ UnquotedString $ "Lost connection (" ++ P2P.describeProtoFailure e ++ ")"
conn' <- fst <$> liftIO (closeP2PSshConnection conn)
conn' <- fst <$> liftIO (closeP2PShellConnection conn)
return (conn', Nothing)
-- Allocates a P2P ssh connection from the pool, and runs the action with it,
-- returning the connection to the pool once the action is done.
-- Allocates a P2P shell connection from the pool, and runs the action with
-- it, returning the connection to the pool once the action is done.
--
-- If the remote does not support the P2P protocol, runs the fallback
-- action instead.
withP2PSshConnection
withP2PShellConnection
:: Remote
-> P2PSshConnectionPool
-> P2PShellConnectionPool
-> Annex a
-> (P2PSshConnection -> Annex (P2PSshConnection, a))
-> (P2PShellConnection -> Annex (P2PShellConnection, a))
-> Annex a
withP2PSshConnection r connpool fallback a = bracketOnError get cache go
withP2PShellConnection r connpool fallback a = bracketOnError get cache go
where
get = getP2PSshConnection r connpool
cache (Just conn) = liftIO $ storeP2PSshConnection connpool conn
get = getP2PShellConnection r connpool
cache (Just conn) = liftIO $ storeP2PShellConnection connpool conn
cache Nothing = return ()
go (Just conn) = do
(conn', res) <- a conn

View file

@ -65,7 +65,7 @@ remoteTypes = map adjustExportImportRemoteType
, Remote.External.remote
]
{- Builds a list of all available Remotes.
{- Builds a list of all Remotes.
- Since doing so can be expensive, the list is cached. -}
remoteList :: Annex [Remote]
remoteList = do

View file

@ -57,11 +57,11 @@ chainGen addr r u rc gc rs = do
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = store gc protorunner
, storeKey = store u gc protorunner
, retrieveKeyFile = retrieve gc protorunner
, retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = remove protorunner
, removeKey = remove u protorunner
, lockContent = Just $ lock withconn runProtoConn u
, checkPresent = checkpresent protorunner
, checkPresentCheap = False

84
Types/Cluster.hs Normal file
View file

@ -0,0 +1,84 @@
{- git-annex cluster types
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, OverloadedStrings #-}
module Types.Cluster (
ClusterUUID,
mkClusterUUID,
genClusterUUID,
fromClusterUUID,
isClusterUUID,
ClusterNodeUUID(..),
Clusters(..),
noClusters,
) where
import Types.UUID
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString as B
import Data.Char
-- The UUID of a cluster as a whole.
--
-- Cluster UUIDs are specially constructed so that regular repository UUIDs
-- can never be used as a cluster UUID. This is ncessary for security.
-- They are a version 8 UUID with the first octet set to 'a' and the second
-- to 'c'.
newtype ClusterUUID = ClusterUUID UUID
deriving (Show, Eq, Ord)
-- Smart constructor for a ClusterUUID. Only allows valid cluster UUIDs.
mkClusterUUID :: UUID -> Maybe ClusterUUID
mkClusterUUID u
| isClusterUUID u = Just (ClusterUUID u)
| otherwise = Nothing
-- Check if it is a valid cluster UUID.
isClusterUUID :: UUID -> Bool
isClusterUUID (UUID b)
| B.take 2 b == "ac" =
#if MIN_VERSION_bytestring(0,11,0)
B.indexMaybe b 14 == Just eight
#else
B.length b > 14 && B.head (B.drop 14 b) == eight
#endif
where
eight = fromIntegral (ord '8')
isClusterUUID _ = False
{-# INLINE isClusterUUID #-}
-- Generates a ClusterUUID from any regular UUID (eg V4).
-- It is converted to a valid cluster UUID.
genClusterUUID :: UUID -> Maybe ClusterUUID
genClusterUUID (UUID b)
| B.length b > 14 = Just $ ClusterUUID $ UUID $
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
| otherwise = Nothing
genClusterUUID NoUUID = Nothing
fromClusterUUID :: ClusterUUID -> UUID
fromClusterUUID (ClusterUUID u) = u
-- The UUID of a node in a cluster. The UUID can be either the UUID of a
-- repository, or of another cluster.
newtype ClusterNodeUUID = ClusterNodeUUID { fromClusterNodeUUID :: UUID }
deriving (Show, Eq, Ord)
-- The same information is indexed two ways to allow fast lookups in either
-- direction.
data Clusters = Clusters
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
}
deriving (Show)
noClusters :: Clusters
noClusters = Clusters mempty mempty

View file

@ -142,4 +142,5 @@ data CommandCheckId
| RepoExists
| NoDaemonRunning
| GitAnnexShellOk
| GitAnnexShellNotProxyable
deriving (Show, Ord, Eq)

View file

@ -22,6 +22,9 @@ module Types.GitConfig (
RemoteNameable(..),
remoteAnnexConfig,
remoteConfig,
RemoteGitConfigField(..),
remoteGitConfigKey,
proxyInheritedFields,
) where
import Common
@ -30,7 +33,7 @@ import qualified Git.Config
import qualified Git.Construct
import Git.Types
import Git.ConfigTypes
import Git.Remote (isRemoteKey, remoteKeyToRemoteName)
import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName)
import Git.Branch (CommitMode(..))
import Git.Quote (QuotePath(..))
import Utility.DataUnits
@ -44,6 +47,7 @@ import Types.RefSpec
import Types.RepoVersion
import Types.StallDetection
import Types.View
import Types.Cluster
import Config.DynamicConfig
import Utility.HumanTime
import Utility.Gpg (GpgCmd, mkGpgCmd)
@ -154,6 +158,7 @@ data GitConfig = GitConfig
, annexPrivateRepos :: S.Set UUID
, annexAdviceNoSshCaching :: Bool
, annexViewUnsetDirectory :: ViewUnset
, annexClusters :: M.Map RemoteName ClusterUUID
}
extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig
@ -282,6 +287,10 @@ extractGitConfig configsource r = GitConfig
, annexAdviceNoSshCaching = getbool (annexConfig "advicenosshcaching") True
, annexViewUnsetDirectory = ViewUnset $ fromMaybe "_" $
getmaybe (annexConfig "viewunsetdirectory")
, annexClusters =
M.mapMaybe (mkClusterUUID . toUUID) $
M.mapKeys removeclusterprefix $
M.filterWithKey isclusternamekey (config r)
}
where
getbool k d = fromMaybe d $ getmaybebool k
@ -306,6 +315,11 @@ extractGitConfig configsource r = GitConfig
hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid")
clusterprefix = annexConfigPrefix <> "cluster."
isclusternamekey k _ = clusterprefix `B.isPrefixOf` (fromConfigKey' k)
&& isLegalName (removeclusterprefix k)
removeclusterprefix k = drop (B.length clusterprefix) (fromConfigKey k)
{- Merge a GitConfig that comes from git-config with one containing
- repository-global defaults. -}
mergeGitConfig :: GitConfig -> GitConfig -> GitConfig
@ -372,9 +386,14 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexBwLimitUpload :: Maybe BwRate
, remoteAnnexBwLimitDownload :: Maybe BwRate
, remoteAnnexAllowUnverifiedDownloads :: Bool
, remoteAnnexUUID :: Maybe UUID
, remoteAnnexConfigUUID :: Maybe UUID
, remoteAnnexMaxGitBundles :: Int
, remoteAnnexAllowEncryptedGitRepo :: Bool
, remoteAnnexProxy :: Bool
, remoteAnnexProxiedBy :: Maybe UUID
, remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteAnnexClusterGateway :: [ClusterUUID]
, remoteUrl :: Maybe String
{- These settings are specific to particular types of remotes
@ -409,99 +428,254 @@ data RemoteGitConfig = RemoteGitConfig
extractRemoteGitConfig :: Git.Repo -> RemoteName -> STM RemoteGitConfig
extractRemoteGitConfig r remotename = do
annexcost <- mkDynamicConfig readCommandRunner
(notempty $ getmaybe "cost-command")
(getmayberead "cost")
(notempty $ getmaybe CostCommandField)
(getmayberead CostField)
annexignore <- mkDynamicConfig unsuccessfullCommandRunner
(notempty $ getmaybe "ignore-command")
(getbool "ignore" False)
(notempty $ getmaybe IgnoreCommandField)
(getbool IgnoreField False)
annexsync <- mkDynamicConfig successfullCommandRunner
(notempty $ getmaybe "sync-command")
(getbool "sync" True)
(notempty $ getmaybe SyncCommandField)
(getbool SyncField True)
return $ RemoteGitConfig
{ remoteAnnexCost = annexcost
, remoteAnnexIgnore = annexignore
, remoteAnnexSync = annexsync
, remoteAnnexPull = getbool "pull" True
, remoteAnnexPush = getbool "push" True
, remoteAnnexReadOnly = getbool "readonly" False
, remoteAnnexCheckUUID = getbool "checkuuid" True
, remoteAnnexVerify = getbool "verify" True
, remoteAnnexPull = getbool PullField True
, remoteAnnexPush = getbool PushField True
, remoteAnnexReadOnly = getbool ReadOnlyField False
, remoteAnnexCheckUUID = getbool CheckUUIDField True
, remoteAnnexVerify = getbool VerifyField True
, remoteAnnexTrackingBranch = Git.Ref . encodeBS <$>
( notempty (getmaybe "tracking-branch")
<|> notempty (getmaybe "export-tracking") -- old name
( notempty (getmaybe TrackingBranchField)
<|> notempty (getmaybe ExportTrackingField) -- old name
)
, remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel"
, remoteAnnexStartCommand = notempty $ getmaybe "start-command"
, remoteAnnexStopCommand = notempty $ getmaybe "stop-command"
, remoteAnnexTrustLevel = notempty $ getmaybe TrustLevelField
, remoteAnnexStartCommand = notempty $ getmaybe StartCommandField
, remoteAnnexStopCommand = notempty $ getmaybe StopCommandField
, remoteAnnexSpeculatePresent =
getbool "speculate-present" False
, remoteAnnexBare = getmaybebool "bare"
, remoteAnnexRetry = getmayberead "retry"
, remoteAnnexForwardRetry = getmayberead "forward-retry"
getbool SpeculatePresentField False
, remoteAnnexBare = getmaybebool BareField
, remoteAnnexRetry = getmayberead RetryField
, remoteAnnexForwardRetry = getmayberead ForwardRetryField
, remoteAnnexRetryDelay = Seconds
<$> getmayberead "retrydelay"
<$> getmayberead RetryDelayField
, remoteAnnexStallDetection =
readStallDetection =<< getmaybe "stalldetection"
readStallDetection =<< getmaybe StallDetectionField
, remoteAnnexStallDetectionUpload =
readStallDetection =<< getmaybe "stalldetection-upload"
readStallDetection =<< getmaybe StallDetectionUploadField
, remoteAnnexStallDetectionDownload =
readStallDetection =<< getmaybe "stalldetection-download"
readStallDetection =<< getmaybe StallDetectionDownloadField
, remoteAnnexBwLimit =
readBwRatePerSecond =<< getmaybe "bwlimit"
readBwRatePerSecond =<< getmaybe BWLimitField
, remoteAnnexBwLimitUpload =
readBwRatePerSecond =<< getmaybe "bwlimit-upload"
readBwRatePerSecond =<< getmaybe BWLimitUploadField
, remoteAnnexBwLimitDownload =
readBwRatePerSecond =<< getmaybe "bwlimit-download"
readBwRatePerSecond =<< getmaybe BWLimitDownloadField
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe ("security-allow-unverified-downloads")
getmaybe SecurityAllowUnverifiedDownloadsField
, remoteAnnexUUID = toUUID <$> getmaybe UUIDField
, remoteAnnexConfigUUID = toUUID <$> getmaybe ConfigUUIDField
, remoteAnnexMaxGitBundles =
fromMaybe 100 (getmayberead "max-git-bundles")
, remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid"
, remoteAnnexShell = getmaybe "shell"
, remoteAnnexSshOptions = getoptions "ssh-options"
, remoteAnnexRsyncOptions = getoptions "rsync-options"
, remoteAnnexRsyncDownloadOptions = getoptions "rsync-download-options"
, remoteAnnexRsyncUploadOptions = getoptions "rsync-upload-options"
, remoteAnnexRsyncTransport = getoptions "rsync-transport"
, remoteAnnexGnupgOptions = getoptions "gnupg-options"
, remoteAnnexGnupgDecryptOptions = getoptions "gnupg-decrypt-options"
, remoteAnnexSharedSOPCommand = SOPCmd <$>
notempty (getmaybe "shared-sop-command")
, remoteAnnexSharedSOPProfile = SOPProfile <$>
notempty (getmaybe "shared-sop-profile")
, remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl"
, remoteAnnexBupRepo = getmaybe "buprepo"
, remoteAnnexBorgRepo = getmaybe "borgrepo"
, remoteAnnexTahoe = getmaybe "tahoe"
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
, remoteAnnexDirectory = notempty $ getmaybe "directory"
, remoteAnnexAndroidDirectory = notempty $ getmaybe "androiddirectory"
, remoteAnnexAndroidSerial = notempty $ getmaybe "androidserial"
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"
, remoteAnnexGitLFS = getbool "git-lfs" False
, remoteAnnexDdarRepo = getmaybe "ddarrepo"
, remoteAnnexHookType = notempty $ getmaybe "hooktype"
, remoteAnnexExternalType = notempty $ getmaybe "externaltype"
fromMaybe 100 (getmayberead MaxGitBundlesField)
, remoteAnnexAllowEncryptedGitRepo =
getbool "allow-encrypted-gitrepo" False
getbool AllowEncryptedGitRepoField False
, remoteAnnexProxy = getbool ProxyField False
, remoteAnnexProxiedBy = toUUID <$> getmaybe ProxiedByField
, remoteAnnexClusterNode =
(filter isLegalName . words)
<$> getmaybe ClusterNodeField
, remoteAnnexClusterGateway = fromMaybe [] $
(mapMaybe (mkClusterUUID . toUUID) . words)
<$> getmaybe ClusterGatewayField
, remoteUrl =
case Git.Config.getMaybe (remoteConfig remotename "url") r of
case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b)
| B.null b -> Nothing
| otherwise -> Just (decodeBS b)
_ -> Nothing
, remoteAnnexShell = getmaybe ShellField
, remoteAnnexSshOptions = getoptions SshOptionsField
, remoteAnnexRsyncOptions = getoptions RsyncOptionsField
, remoteAnnexRsyncDownloadOptions = getoptions RsyncDownloadOptionsField
, remoteAnnexRsyncUploadOptions = getoptions RsyncUploadOptionsField
, remoteAnnexRsyncTransport = getoptions RsyncTransportField
, remoteAnnexGnupgOptions = getoptions GnupgOptionsField
, remoteAnnexGnupgDecryptOptions = getoptions GnupgDecryptOptionsField
, remoteAnnexSharedSOPCommand = SOPCmd <$>
notempty (getmaybe SharedSOPCommandField)
, remoteAnnexSharedSOPProfile = SOPProfile <$>
notempty (getmaybe SharedSOPProfileField)
, remoteAnnexRsyncUrl = notempty $ getmaybe RsyncUrlField
, remoteAnnexBupRepo = getmaybe BupRepoField
, remoteAnnexBorgRepo = getmaybe BorgRepoField
, remoteAnnexTahoe = getmaybe TahoeField
, remoteAnnexBupSplitOptions = getoptions BupSplitOptionsField
, remoteAnnexDirectory = notempty $ getmaybe DirectoryField
, remoteAnnexAndroidDirectory = notempty $ getmaybe AndroidDirectoryField
, remoteAnnexAndroidSerial = notempty $ getmaybe AndroidSerialField
, remoteAnnexGCrypt = notempty $ getmaybe GCryptField
, remoteAnnexGitLFS = getbool GitLFSField False
, remoteAnnexDdarRepo = getmaybe DdarRepoField
, remoteAnnexHookType = notempty $ getmaybe HookTypeField
, remoteAnnexExternalType = notempty $ getmaybe ExternalTypeField
}
where
getbool k d = fromMaybe d $ getmaybebool k
getmaybebool k = Git.Config.isTrueFalse' =<< getmaybe' k
getmayberead k = readish =<< getmaybe k
getmaybe = fmap fromConfigValue . getmaybe'
getmaybe' k =
Git.Config.getMaybe (remoteAnnexConfig remotename k) r
<|>
Git.Config.getMaybe (annexConfig k) r
getmaybe' :: RemoteGitConfigField -> Maybe ConfigValue
getmaybe' f =
let k = remoteGitConfigKey f
in Git.Config.getMaybe (remoteAnnexConfig remotename k) r
<|> Git.Config.getMaybe (annexConfig k) r
getoptions k = fromMaybe [] $ words <$> getmaybe k
data RemoteGitConfigField
= CostField
| CostCommandField
| IgnoreField
| IgnoreCommandField
| SyncField
| SyncCommandField
| PullField
| PushField
| ReadOnlyField
| CheckUUIDField
| VerifyField
| TrackingBranchField
| ExportTrackingField
| TrustLevelField
| StartCommandField
| StopCommandField
| SpeculatePresentField
| BareField
| RetryField
| ForwardRetryField
| RetryDelayField
| StallDetectionField
| StallDetectionUploadField
| StallDetectionDownloadField
| BWLimitField
| BWLimitUploadField
| BWLimitDownloadField
| UUIDField
| ConfigUUIDField
| SecurityAllowUnverifiedDownloadsField
| MaxGitBundlesField
| AllowEncryptedGitRepoField
| ProxyField
| ProxiedByField
| ClusterNodeField
| ClusterGatewayField
| UrlField
| ShellField
| SshOptionsField
| RsyncOptionsField
| RsyncDownloadOptionsField
| RsyncUploadOptionsField
| RsyncTransportField
| GnupgOptionsField
| GnupgDecryptOptionsField
| SharedSOPCommandField
| SharedSOPProfileField
| RsyncUrlField
| BupRepoField
| BorgRepoField
| TahoeField
| BupSplitOptionsField
| DirectoryField
| AndroidDirectoryField
| AndroidSerialField
| GCryptField
| GitLFSField
| DdarRepoField
| HookTypeField
| ExternalTypeField
deriving (Enum, Bounded)
remoteGitConfigField :: RemoteGitConfigField -> (UnqualifiedConfigKey, ProxyInherited)
remoteGitConfigField = \case
-- Hard to know the true cost of accessing eg a slow special
-- remote via the proxy. The cost of the proxy is the best guess
-- so do inherit it.
CostField -> inherited "cost"
CostCommandField -> inherited "cost-command"
IgnoreField -> inherited "ignore"
IgnoreCommandField -> inherited "ignore-command"
SyncField -> inherited "sync"
SyncCommandField -> inherited "sync-command"
PullField -> inherited "pull"
PushField -> inherited "push"
ReadOnlyField -> inherited "readonly"
CheckUUIDField -> uninherited "checkuuid"
VerifyField -> inherited "verify"
TrackingBranchField -> uninherited "tracking-branch"
ExportTrackingField -> uninherited "export-tracking"
TrustLevelField -> uninherited "trustlevel"
StartCommandField -> uninherited "start-command"
StopCommandField -> uninherited "stop-command"
SpeculatePresentField -> inherited "speculate-present"
BareField -> inherited "bare"
RetryField -> inherited "retry"
ForwardRetryField -> inherited "forward-retry"
RetryDelayField -> inherited "retrydelay"
StallDetectionField -> inherited "stalldetection"
StallDetectionUploadField -> inherited "stalldetection-upload"
StallDetectionDownloadField -> inherited "stalldetection-download"
BWLimitField -> inherited "bwlimit"
BWLimitUploadField -> inherited "bwlimit-upload"
BWLimitDownloadField -> inherited "bwlimit-upload"
UUIDField -> uninherited "uuid"
ConfigUUIDField -> uninherited "config-uuid"
SecurityAllowUnverifiedDownloadsField -> inherited "security-allow-unverified-downloads"
MaxGitBundlesField -> inherited "max-git-bundles"
AllowEncryptedGitRepoField -> inherited "allow-encrypted-gitrepo"
-- Allow proxy chains.
ProxyField -> inherited "proxy"
ProxiedByField -> uninherited "proxied-by"
ClusterNodeField -> uninherited "cluster-node"
ClusterGatewayField -> uninherited "cluster-gateway"
UrlField -> uninherited "url"
ShellField -> inherited "shell"
SshOptionsField -> inherited "ssh-options"
RsyncOptionsField -> inherited "rsync-options"
RsyncDownloadOptionsField -> inherited "rsync-download-options"
RsyncUploadOptionsField -> inherited "rsync-upload-options"
RsyncTransportField -> inherited "rsync-transport"
GnupgOptionsField -> inherited "gnupg-options"
GnupgDecryptOptionsField -> inherited "gnupg-decrypt-options"
SharedSOPCommandField -> inherited "shared-sop-command"
SharedSOPProfileField -> inherited "shared-sop-profile"
RsyncUrlField -> uninherited "rsyncurl"
BupRepoField -> uninherited "buprepo"
BorgRepoField -> uninherited "borgrepo"
TahoeField -> uninherited "tahoe"
BupSplitOptionsField -> uninherited "bup-split-options"
DirectoryField -> uninherited "directory"
AndroidDirectoryField -> uninherited "androiddirectory"
AndroidSerialField -> uninherited "androidserial"
GCryptField -> uninherited "gcrypt"
GitLFSField -> uninherited "git-lfs"
DdarRepoField -> uninherited "ddarrepo"
HookTypeField -> uninherited "hooktype"
ExternalTypeField -> uninherited "externaltype"
where
inherited f = (f, ProxyInherited True)
uninherited f = (f, ProxyInherited False)
newtype ProxyInherited = ProxyInherited Bool
-- All remote config fields that are inherited from a proxy.
proxyInheritedFields :: [UnqualifiedConfigKey]
proxyInheritedFields =
map fst $
filter (\(_, ProxyInherited p) -> p) $
map remoteGitConfigField [minBound..maxBound]
remoteGitConfigKey :: RemoteGitConfigField -> UnqualifiedConfigKey
remoteGitConfigKey = fst . remoteGitConfigField
notempty :: Maybe String -> Maybe String
notempty Nothing = Nothing
notempty (Just "") = Nothing
@ -513,9 +687,12 @@ dummyRemoteGitConfig = atomically $
type UnqualifiedConfigKey = B.ByteString
annexConfigPrefix :: B.ByteString
annexConfigPrefix = "annex."
{- A global annex setting in git config. -}
annexConfig :: UnqualifiedConfigKey -> ConfigKey
annexConfig key = ConfigKey ("annex." <> key)
annexConfig key = ConfigKey (annexConfigPrefix <> key)
class RemoteNameable r where
getRemoteName :: r -> RemoteName

View file

@ -89,6 +89,12 @@ instance Observable (Maybe a) where
observeBool Nothing = False
observeFailure = Nothing
instance Observable (Either e (Maybe a)) where
observeBool (Left _) = False
observeBool (Right Nothing) = False
observeBool (Right (Just _)) = True
observeFailure = Right Nothing
class Transferrable t where
descTransfrerrable :: t -> Maybe String

View file

@ -1,6 +1,6 @@
{- Simple line-based protocols.
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
- Copyright 2013-2024 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@ -21,6 +21,7 @@ module Utility.SimpleProtocol (
parse3,
parse4,
parse5,
parseList,
dupIoHandles,
getProtocolLine,
) where
@ -111,6 +112,10 @@ parse5 mk s = mk <$> deserialize p1 <*> deserialize p2 <*> deserialize p3 <*> de
splitWord :: String -> (String, String)
splitWord = separate isSpace
{- Only safe to use when the serialization does not include whitespace. -}
parseList :: Serializable p => ([p] -> a) -> Parser a
parseList mk v = mk <$> mapM deserialize (words v)
{- When a program speaks a simple protocol over stdio, any other output
- to stdout (or anything that attempts to read from stdin)
- will mess up the protocol. To avoid that, close stdin,

View file

@ -124,9 +124,16 @@ See [[todo/proving_preferred_content_behavior]].
## rebalancing
In both the 3 of 5 use case and a split brain situation, it's possible for
content to end up not optimally balanced between repositories. git-annex
can be made to operate in a mode where it does additional work to rebalance
repositories.
content to end up not optimally balanced between repositories.
(There are also situations where a cluster node ends up without a copy
of a file that is preferred content, or where adding a copy to a node
would satisfy numcopies. This can happen eg, when a client sends a file
to a single node rather than to the cluster. Rebalancing also will deal
with those.)
git-annex can be made to operate in a mode where it does additional work
to rebalance repositories.
This can be an option like --rebalance, that changes how the preferred content
expression is evaluated. The user can choose where and when to run that.

View file

@ -40,8 +40,8 @@ The server responds with either its own UUID when authentication
is successful. Or, it can fail the authentication, and close the
connection.
AUTH_SUCCESS UUID
AUTH_FAILURE
AUTH-SUCCESS UUID
AUTH-FAILURE
Note that authentication does not guarantee that the client is talking to
who they expect to be talking to. This, and encryption of the connection,
@ -64,6 +64,19 @@ that is less than or equal to the version the client sent:
Now both client and server should use version 1.
## Cluster cycle prevention
In protocol version 2, immediately after VERSION, the
client can send an additional message that is used to
prevent cycles when accessing clusters.
BYPASS UUID1 UUID2 ...
The UUIDs are cluster gateways to avoid connecting to when
serving a cluster.
The server makes no response to this message.
## Binary data
The protocol allows raw binary data to be sent. This is done
@ -117,6 +130,10 @@ To remove a key's content from the server, the client sends:
The server responds with either SUCCESS or FAILURE.
In protocol version 2, the server can optionally reply with SUCCESS-PLUS
or FAILURE-PLUS. Each has a subsequent list of UUIDs of repositories
that the content was removed from.
## Storing content on the server
To store content on the server, the client sends:
@ -132,7 +149,14 @@ spaces, since it's not the last token in the line. Use '%' to indicate
whitespace.)
The server may respond with ALREADY-HAVE if it already
had the conent of that key. Otherwise, it responds with:
had the conent of that key.
In protocol version 2, the server can optionally reply with
ALREADY-HAVE-PLUS. The subsequent list of UUIDs are additional
UUIDs where the content is stored, in addition to the UUID where
the client was going to send it.
Otherwise, it responds with:
PUT-FROM Offset
@ -152,6 +176,9 @@ was being sent.
If the server successfully receives the data and stores the content,
it replies with SUCCESS. Otherwise, FAILURE.
In protocol version 2, the server can optionally reply with SUCCESS-PLUS
and a list of UUIDs where the content was stored.
## Getting content from the server
To get content from the server, the client sends:
@ -192,6 +219,8 @@ its exit code.
CONNECTDONE ExitCode
After that, the server closes the connection.
## Change notification
The client can request to be notified when a ref in

View file

@ -35,7 +35,7 @@ For example (eliding the full HTTP responses, only showing the data):
> Content-Length: ...
>
> AUTH 79a5a1f4-07e8-11ef-873d-97f93ca91925
< AUTH_SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
< AUTH-SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
> POST /git-annex HTTP/1.0
> Content-Type: x-git-annex-p2p
@ -80,7 +80,7 @@ correspond to each action in the P2P protocol.
Something like this:
> GET /git-annex/v1/AUTH?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.0
< AUTH_SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
< AUTH-SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
> GET /git-annex/v1/CHECKPRESENT?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0
> SUCCESS

View file

@ -219,11 +219,6 @@ And, if the proxy repository itself contains the requested key, it can send
it directly. This allows the proxy repository to be primed with frequently
accessed files when it has the space.
(Should uploads check preferred content of the proxy repository and also
store a copy there when allowed? I think this would be ok, so long as when
preferred content is not set, it does not default to storing content
there.)
When a drop is requested from the cluster's UUID, git-annex-shell drops
from all nodes, as well as from the proxy itself. Only indicating success
if it is able to delete all copies from the cluster. This needs
@ -238,6 +233,14 @@ always fail. Also, when constructing a drop proof for a cluster's UUID,
the nodes of that cluster should be omitted, otherwise a drop from the
cluster can lock content on individual nodes, causing the drop to fail.
Moving from a cluster is a special case because it may reduce the number
of copies. So move's `willDropMakeItWorse` check needs to special case
clusters. Since dropping from the cluster may remove content from any of
its nodes, which may include copies on nodes that the local location log does
not know about yet, the special case probably needs to always assume
that dropping from a cluster in a move risks reducing numcopies,
and so only allow it when a drop proof can be constructed.
Some commands like `git-annex whereis` will list content as being stored in
the cluster, as well as on whichever of its nodes, and whereis currently
says "n copies", but since the cluster doesn't count as a copy, that
@ -279,9 +282,9 @@ configuration of the cluster. But the cluster is configured via the
git-annex branch, particularly preferred content, and the proxy log, and
the cluster log.
A user could, for example, make the cluster's frontend want all
content, and so fill up its small disk. They could make a particular node
not want any content. They could remove nodes from the cluster.
A user could, for example, make a small cluster node want all content, and
so fill up its small disk. They could make a particular node not want any
content. They could remove nodes from the cluster.
One way to deal with this is for the cluster to reject git-annex branch
pushes that make such changes. Or only allow them if they are signed with a
@ -296,24 +299,43 @@ A remote will only be treated as a node of a cluster when the git
configuration remote.name.annex-cluster-node is set, which will prevent
creating clusters in places where they are not intended to be.
## distributed clusters
A cluster's nodes may be geographically distributed amoung several
locations, which are effectivly subclusters. To support this, an upload
or removal sent to one frontend proxy of the cluster will be repeated to
other frontend proxies that are remotes of that one and have the cluster's
UUID.
This is better than supporting a cluster that is a node of another cluster,
because rather than a hierarchical structure, this allows for organic
structures of any shape. For example, there could be two frontends to a
cluster, in different locations. An upload to either frontend fans out to
its local nodes as well as over to the other frontend, and to its local
nodes.
This does mean that cycles need to be prevented. See section below.
## speed
A passthrough proxy should be as fast as possible so as not to add overhead
A proxy should be as fast as possible so as not to add overhead
to a file retrieve, store, or checkpresent. This probably means that
it keeps TCP connections open to each host in the cluster. It might use a
it keeps TCP connections open to each host. It might use a
protocol with less overhead than ssh.
In the case of checkpresent, it would be possible for the proxy to not
communicate with the cluster to check that the data is still present on it.
As long as all access is intermediated via the proxy, its git-annex branch
could be relied on to always be correct, in theory. Proving that theory,
making sure to account for all possible race conditions and other scenarios,
would be necessary for such an optimisation.
In the case of checkpresent, it would be possible for the gateway to not
communicate with cluster nodes to check that the data is still present
in the cluster. As long as all access is intermediated via a single gateway,
its git-annex branch could be relied on to always be correct, in theory.
Proving that theory, making sure to account for all possible race conditions
and other scenarios, would be necessary for such an optimisation. This
would not work for multi-gateway clusters unless the gateways were kept in
sync about locations, which they currently are not.
Another way the proxy could speed things up is to cache some subset of
content. Eg, analize what files are typically requested, and store another
copy of those on the proxy. Perhaps prioritize storing smaller files, where
latency tends to swamp transfer speed.
Another way the cluster gateway could speed things up is to cache some
subset of content. Eg, analize what files are typically requested, and
store another copy of those on the proxy. Perhaps prioritize storing
smaller files, where latency tends to swamp transfer speed.
## proxying to special remotes
@ -446,7 +468,7 @@ So overall, it seems better to do proxy-side encryption. But it may be
worth adding a special remote that does its own client-side encryption
in front of the proxy.
## cycles
## cycles of proxies
A repo can advertise that it proxies for a repo which has the same uuid as
itself. Or there can be a larger cycle involving a proxy that proxies to a
@ -454,36 +476,43 @@ proxy, etc.
Since the proxied repo uuid is communicated to git-annex-shell via
--uuid, a repo that advertises proxying for itself will be connected to
with its own uuid. No proxying is done in this case. Same happens with a
larger cycle.
Instantiating remotes needs to identity cycles and break them. Otherwise
it would construct an infinite number of proxied remotes with names
like "foo-foo-foo-foo-..." or "foo-bar-foo-bar-..."
Once `git-annex copy --to proxy` is implemented, and the proxy decides
where to send content that is being sent directly to it, cycles will
become an issue with that as well.
with its own uuid. No proxying is done in that case.
What if repo A is a proxy and has repo B as a remote. Meanwhile, repo B is
a proxy and has repo A as a remote?
a proxy and has repo A as a remote? git-annex-shell on repo A will get
A's uuid, and so will operate on it directly without proxying. So larger
cycles are also not a problem on the proxy side.
An upload to repo A will start by checking if repo B wants the content and if so,
start an upload to repo B. Then the same happens on repo B, leading it to
start an upload to repo A.
On the client side, instantiating remotes needs to identity cycles and
break them. Otherwise it would construct an infinite number of proxied
remotes with names like "foo-foo-foo-foo-..." or "foo-bar-foo-bar-..."
At this point, it might be possible for git-annex to detect the cycle,
if the proxy uses a transfer lock file. If repo B or repo A had some other
remote that is not part of a cycle, they could deposit the upload there and
the upload still succeed. Otherwise the upload would fail, which is
probably the best that can be done with such a broken configuration.
## cycles of cluster proxies
So, it seems like proxies would need to take transfer locks for uploads,
even though the content is being proxied to elsewhere.
If an PUT or REMOVE message is sent to a proxy for a cluster, and that
repository has a remote that is also a proxy for the same cluster,
the message gets repeated on to it. This can lead to cycles, which have to
be broken.
Dropping could have similar cycles with content presence locking, which
needs to be thought through as well. A cycle of the actual dropContent
operation might also be possible.
To break the cycle, extend the P2P protocol with an additional message,
like:
VIA uuid1 uuid2
This indicates to a proxy that the message has been received via the other
listed proxies. It can then avoid repeating the message out via any of
those proxies. When repeating a message out to another proxy, just add
the UUID of the local repository to the list.
This will be an extension to the protocol, but so long as it's added in
the same git-annex version that adds support for proxies, every cluster
proxy will support it.
This avoids cycles, but it does not avoid situations where there are
multiple paths through a proxy network that reach the same node. In such a
situation, a REMOVE might happen twice (no problem) or a PUT be received
twice from different paths (one of them would fail due to the other one
taking the transfer lock).
## exporttree=yes

View file

@ -0,0 +1,44 @@
# NAME
git-annex extendcluster - add an additional gateway to a cluster
# SYNOPSIS
git-annex extendcluster gateway clustername
# DESCRIPTION
This command is used to configure a repository to serve as an additional
gateway to a cluster. It is run in that repository.
The repository this command is run in should have a remote that is a
gateway to the cluster. The `gateway` parameter is the name of that remote.
The `clustername` parameter is the name of the cluster.
The next step after running this command is to configure
any additional cluster nodes that this gateway serves to the cluster,
then run [[git-annex-updatecluster]]. See the documentation of that
command for details about configuring nodes.
After running this command in the new gateway repository, it typically
also needs to be run in the other gateway repositories as well,
after adding the new gateway repository as a remote.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-initcluster]](1)
[[git-annex-updatecluster]](1)
[[git-annex-updateproxy]](1)
<https://git-annex.branchable.com/tips/clusters/>
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -0,0 +1,39 @@
# NAME
git-annex initcluster - initialize a new cluster
# SYNOPSIS
git-annex initcluster name [description]
# DESCRIPTION
This command initializes a new cluster with the specified name. If no
description is provided, one will be set automatically.
This command should be run in the repository that will serve as the gateway
to the cluster.
The next step after running this command is to configure
the cluster nodes, then run [[git-annex-updatecluster]]. See the
documentation of that command for details about configuring nodes.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-updatecluster]](1)
[[git-annex-extendcluster]](1)
[[git-annex-preferred-content]](1)
[[git-annex-updateproxy]](1)
<https://git-annex.branchable.com/tips/clusters/>
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -8,7 +8,7 @@ Each repository has a preferred content setting, which specifies content
that the repository wants to have present. These settings can be configured
using `git annex vicfg` or `git annex wanted`.
They are used by the `--auto` option, by `git annex sync --content`,
and by the git-annex assistant.
by clusters, and by the git-annex assistant.
While preferred content expresses a preference, it can be overridden
by simply using `git annex drop`. On the other hand, required content

View file

@ -9,7 +9,7 @@ git annex required `repository [expression]`
# DESCRIPTION
When run with an expression, configures the content that is required
to be held in the archive.
to be held in the repository.
For example:

View file

@ -86,7 +86,9 @@ first "/~/" or "/~user/" is expanded to the specified home directory.
* --uuid=UUID
git-annex uses this to specify the UUID of the repository it was expecting
git-annex-shell to access, as a sanity check.
git-annex-shell to access. This is both a sanity check, and allows
git-annex shell to proxy access to remotes, when configured
by [[git-annex-update-proxy]].
* Also the [[git-annex-common-options]](1) can be used.

View file

@ -0,0 +1,43 @@
# NAME
git-annex updatecluster - update records of cluster nodes
# SYNOPSIS
git-annex updatecluster
# DESCRIPTION
This command is used to record the nodes of a cluster in the git-annex
branch, and set up proxying to the nodes. It should be run in the
repository that will serve as a gateway to the cluster.
It looks at the git config `remote.name.annex-cluster-node` of
each remote. When that is set to the name of a cluster that has been
initialized with `git-annex initcluster`, the node will be recorded in the
git-annex branch.
To remove a node from a cluster, unset `remote.name.annex-cluster-node`
and run this command.
To add additional gateways to a cluster, after running this command,
use [[git-annex-extendcluster]].
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-initcluster]](1)
[[git-annex-extendcluster]](1)
[[git-annex-updateproxy]](1)
<https://git-annex.branchable.com/tips/clusters/>
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -0,0 +1,44 @@
# NAME
git-annex updateproxy - update records with proxy configuration
# SYNOPSIS
git annex updateproxy
# DESCRIPTION
A git-annex repository can act as a proxy for its remotes. That allows
annexed content to be stored and removed from the proxy's remotes, by
repositories that do not have a direct connection to the remotes.
By default, no proxying is done. To configure the local repository to act
as a proxy for its remote named "foo", run `git config remote.foo.annex-proxy`
true`.
After setting or unsetting `remote.<name>.annex-proxy` git configurations,
run `git-annex updateproxy` to record the proxy configuration in the
git-annex branch. That tells other repositories about the proxy
configuration.
Suppose, for example, that remote "work" has had this command run in
it. Then after pulling from "work", git-annex will know about an
additional remote, "work-foo". That remote will be accessed using "work" as
a proxy.
Proxies can only be accessed via ssh.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-updatecluster]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -9,7 +9,7 @@ git annex wanted `repository [expression]`
# DESCRIPTION
When run with an expression, configures the content that is preferred
to be held in the archive. See [[git-annex-preferred-content]](1)
to be held in the repository. See [[git-annex-preferred-content]](1)
For example:

View file

@ -252,7 +252,6 @@ content from the key-value store.
See [[git-annex-configremote]](1) for details.
* `renameremote`
Renames a special remote.
@ -327,6 +326,31 @@ content from the key-value store.
See [[git-annex-required]](1) for details.
* `initcluster`
Initializes a new cluster.
See [[git-annex-initcluster](1) for details.
* `updatecluster`
Update records of cluster nodes.
See [[git-annex-updatecluster](1) for details.
* `extendcluster`
Adds an additional gateway to a cluster.
See [[git-annex-extendcluster](1) for details.
* `updateproxy`
Update records with proxy configuration.
See [[git-annex-updateproxy](1) for details.
* `schedule repository [expression]`
Get or set scheduled jobs.
@ -1372,6 +1396,15 @@ repository, using [[git-annex-config]]. See its man page for a list.)
set in global git configuration.
For details, see <https://git-annex.branchable.com/tuning/>.
* `annex.cluster.<name>`
This is set to make the repository be a gateway to a cluster.
The value is the cluster UUID. Note that cluster UUIDs are not
the same as repository UUIDs, and a repository UUID cannot be used here.
Usually this is set up by running [[git-annex-initcluster]] or
[[git-annex-extendcluster]].
# CONFIGURATION OF REMOTES
Remotes are configured using these settings in `.git/config`.
@ -1640,6 +1673,38 @@ Remotes are configured using these settings in `.git/config`.
content of any file, even though its normal location tracking does not
indicate that it does. This will cause git-annex to try to get all file
contents from the remote. Can be useful in setting up a caching remote.
* `remote.<name>.annex-proxy`
Set to "true" to make the local repository able to act as a proxy to this
remote.
After configuring this, run [[git-annex-updateproxy](1) to store
the new configuration in the git-annex branch.
* `remote.<name>.annex-proxied-by`
Usually this is used internally, when git-annex sets up proxied remotes,
and will not need to be configured. The value is the UUID of the
git-annex repository that proxies access to this remote.
* `remote.<name>.annex-cluster-node`
Set to the name of a cluster to make this remote be part of
the cluster. Names of multiple clusters can be separated by
whitespace to make a remote be part of more than one cluster.
After configuring this, run [[git-annex-updatecluster](1) to store
the new configuration in the git-annex branch.
* `remote.<name>.annex-cluster-gateway`
Set to the UUID of a cluster that this remote serves as a gateway for.
Multiple UUIDs can be listed, separated by whitespace. When the local
repository is also a gateway for that cluster, it will proxy for the
nodes of the remote gateway.
Usually this is set up by running [[git-annex-extendcluster]].
* `remote.<name>.annex-private`

View file

@ -288,7 +288,7 @@ For example:
These log files store per-remote content identifiers for keys.
A given key may have any number of content identifiers.
The format is a timestamp, followed by the uuid of the remote,
The format is a timestamp, followed by the UUID of the remote,
followed by the content identifiers which are separated by colons.
If a content identifier contains a colon or \r or \n, it will be base64
encoded. Base64 encoded values are indicated by prefixing them with "!".
@ -308,6 +308,33 @@ For example, this logs that a remote has an object stored using both
(When those chunks are removed from the remote, the 9 is changed to 0.)
## `proxy.log`
Used to record what repositories are accessible via a proxy.
Each line starts with a timestamp, then the UUID of the repository
that can serve as a proxy, and then a list of the remotes that it can
proxy to, separated by spaces.
Each remote in the list consists of a repository's UUID,
followed by a colon (`:`) and then a remote name.
For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 26339d22-446b-11e0-9101-002170d25c55:foo c076460c-2290-11ef-be53-b7f0d194c863:bar
## `cluster.log`
Used to record the UUIDs of clusters, and the UUIDs of the nodes
comprising each cluster.
Each line starts with a timestamp, then the UUID the cluster,
followed by a list of the UUIDs of its nodes, separated by spaces.
For example:
1317929100.012345s 5b070cc8-29b8-11ef-80e1-0fd524be241b 5c0c97d2-29b8-11ef-b1d2-5f3d1c80940d 5c40375e-29b8-11ef-814d-872959d2c013
## `schedule.log`
Used to record scheduled events, such as periodic fscks.

View file

@ -4,4 +4,10 @@
* [[how_it_works]]
* [[special_remotes]]
* [[workflows|workflow]]
* [[preferred_content]]
* [[sync]]
### new features
* [[tips/clusters]]
* [[git-remote-annex|tips/storing_a_git_repository_on_any_special_remote]]

217
doc/tips/clusters.mdwn Normal file
View file

@ -0,0 +1,217 @@
A cluster is a collection of git-annex repositories which are combined to
form a single logical repository.
A cluster is accessed via a gateway repository. The gateway is not itself
a node of the cluster.
[[!toc ]]
## using a cluster
To use a cluster, your repository needs to have its gateway configured as a
remote. Clusters can currently only be accessed via ssh. This gateway
remote is added the same as any other remote:
git remote add bigserver me@bigserver:annex
The gateway publishes information about the cluster to the git-annex
branch. So you may need to fetch from it to learn about the cluster:
git fetch bigserver
That will make available an additional remote for the cluster, eg
"bigserver-mycluster", as well as some remotes for each node eg
"bigserver-node1", "bigserver-node2", etc.
You can get files from the cluster without caring which node it comes
from:
$ git-annex get foo --from bigserver-mycluster
copy foo (from bigserver-mycluster...) ok
And you can send files to the cluster, without caring what nodes
they are stored to:
$ git-annex move bar --to bigserver-mycluster
move bar (to bigserver-mycluster...) ok
In fact, a single upload like that can be sent to every node of the cluster
at once, very efficiently.
$ git-annex whereis bar
whereis bar (3 copies)
acae2ff6-6c1e-8bec-b8b9-397a3755f397 -- [bigserver-mycluster]
9f514001-6dc0-4d83-9af3-c64c96626892 -- node 1 [bigserver-node1]
d81e0b28-612e-4d73-a4e6-6dabbb03aba1 -- node 2 [bigserver-node2]
5657baca-2f11-11ef-ae1a-5b68c6321dd9 -- node 3 [bigserver-node3]
Notice that the file is shown as present in the cluster, as well as on
individual nodes. But the cluster itself does not count as a copy of the file,
so the 3 copies are the copies on individual nodes.
Most other git-annex commands that operate on repositories can also operate on
clusters.
A cluster is not a git repository, and so `git pull bigserver-mycluster`
will not work.
## preferred content of clusters
The preferred content of the cluster can be configured. This tells
users what files the cluster as a whole should contain.
To configure the preferred content of a cluster, as well as other related
things like [[groups|git-annex-group]] and [[required_content]], it's easiest
to do the configuration in a repository that has the cluster as a remote.
For example:
$ git-annex wanted bigserver-mycluster standard
$ git-annex group bigserver-mycluster archive
By default, when a file is uploaded to a cluster, it is stored on every node of
the cluster. To control which nodes to store to, the [[preferred_content]] of
each node can be configured.
It's also a good idea to configure the preferred content of the cluster's
gateway. To avoid files redundantly being stored on the gateway
(which remember, is not a node of the cluster), you might make it not want
any files:
$ git-annex wanted bigserver nothing
## setting up a cluster
A new cluster first needs to be initialized. Run [[git-annex-initcluster]] in
the repository that will serve as the cluster's gateway. In the example above,
this was the "bigserver" repository.
$ git-annex initcluster mycluster
Once a cluster is initialized, the next step is to add nodes to it.
To make a remote be a node of the cluster, configure
`git config remote.name.annex-cluster-node`, setting it to the
name of the cluster.
In the example above, the three cluster nodes were configured like this:
$ git remote add node1 /media/disk1/repo
$ git remote add node2 /media/disk2/repo
$ git remote add node3 /media/disk3/repo
$ git config remote.node1.annex-cluster-node mycluster
$ git config remote.node2.annex-cluster-node mycluster
$ git config remote.node3.annex-cluster-node mycluster
Finally, run `git-annex updatecluster` to record the cluster configuration
in the git-annex branch. That tells other repositories about the cluster.
$ git-annex updatecluster
Added node node1 to cluster: mycluster
Added node node2 to cluster: mycluster
Added node node3 to cluster: mycluster
Started proxying for node1
Started proxying for node2
Started proxying for node3
Operations that affect multiple nodes of a cluster can often be sped up by
configuring annex.jobs in the repository that will serve the cluster to
clients. In the example above, the nodes are all disk bound, so operating
on more than one at a time will likely be faster.
$ git config annex.jobs cpus
## adding additional gateways to a cluster
A cluster can have more than one gateway. One way to use this is to
make a cluster that is distributed across several locations.
Suppose you have a datacenter in AMS, and one in NYC. There
will be a gateway in each datacenter which provides access to the nodes
there. And the gateways will relay data between each other as well.
Start by setting up the cluster in Amsterdam. The process is the same
as in the previous section.
AMS$ git-annex initcluster mycluster
AMS$ git remote add node1 /media/disk1/repo
AMS$ git remote add node2 /media/disk2/repo
AMS$ git config remote.node1.annex-cluster-node mycluster
AMS$ git config remote.node2.annex-cluster-node mycluster
AMS$ git-annex updatecluster
AMS$ git config annex.jobs cpus
Now in a clone of the same repository in NYC, add AMS as a git remote
accessed with ssh:
NYC$ git remote add AMS me@amsterdam.example.com:annex
NYC$ git fetch AMS
Setting up the cluster in NYC is different, rather than using
`git-annex initcluster` again (which would make a new, different
cluster), we ask git-annex to extend the cluster from AMS:
NYC$ git-annex extendcluster AMS mycluster
The rest of the setup process for NYC is the same, of course different
nodes are added.
NYC$ git remote add node3 /media/disk3/repo
NYC$ git remote add node4 /media/disk4/repo
NYC$ git config remote.node3.annex-cluster-node mycluster
NYC$ git config remote.node4.annex-cluster-node mycluster
NYC$ git-annex updatecluster
NYC$ git config annex.jobs cpus
Finally, the AMS side of the cluster has to be updated, adding a git remote
for NYC, and extending the cluster to there as well:
AMS$ git remote add NYC me@nyc.example.com:annex
AMS$ git-annex sync NYC
NYC$ git-annex extendcluster NYC mycluster
A user can now add either AMS or NYC as a remote, and will have access
to the entire cluster as either `AMS-mycluster` or `NYC-mycluster`.
user$ git-annex move foo --to AMS-mycluster
move foo (to AMS-mycluster...) ok
Looking at where files end up, all the nodes are visible, not only those
served by the current gateway.
user$ git-annex whereis foo
whereis foo (4 copies)
acfc1cb2-b8d5-8393-b8dc-4a419ea38183 -- cluster mycluster [AMS-mycluster]
11ab09a9-7448-45bd-ab81-3997780d00b3 -- node4 [AMS-NYC-node4]
36197d0e-6d49-4213-8440-71cbb121e670 -- node2 [AMS-node2]
43652651-1efa-442a-8333-eb346db31553 -- node3 [AMS-NYC-node3]
7fb5a77b-77a3-4032-b3e5-536698e308b3 -- node1 [AMS-node1]
ok
Notice that remotes for cluster nodes have names indicating the path through
the cluster used to access them. For example, "AMS-NYC-node3" is accessed via
the AMS gateway, which then relays to NYC where node3 is located.
## considerations for multi-gateway clusters
When a cluster has multiple gateways, nothing keeps the git repositories on
the gateways in sync. A branch pushed to one gateway will not be able to
be pulled from another one. And gateways only learn about the locations of
keys that are uploaded to the cluster via them. So in the example above,
after an upload to AMS-mycluster, NYC-mycluster will only know that the
key is stored in its nodes, but won't know that it's stored in nodes
behind AMS. So, it's best to have a single git repository that is synced
with, or perhaps run [[git-annex-remotedaemon]] on each gateway to keep
its git repository in sync with the other gateways.
Clusters can be constructed with any number of gateways, and any internal
topology of connections between gateways. But there must always be a path
from any gateway to all nodes of the cluster, otherwise a key won't
be able to be stored from, or retrieved from some nodes.
It's best to avoid there being multiple paths to a node that go via
different gateways, since all paths will be tried in parallel when eg,
uploading a key to the cluster.
A breakdown in communication between gateways will temporarily split the
cluster. When communication resumes, some keys may need to be copied to
additional nodes.

View file

@ -11,7 +11,7 @@ repositories.
Joey has received funding to work on this.
Planned schedule of work:
* June: git-annex proxy
* June: git-annex proxies and clusters
* July, part 1: git-annex proxy support for exporttree
* July, part 2: p2p protocol over http
* August: balanced preferred content
@ -24,7 +24,49 @@ Planned schedule of work:
In development on the `proxy` branch.
For June's work on [[design/passthrough_proxy]], implementation plan:
For June's work on [[design/passthrough_proxy]], remaining todos:
* Since proxying to special remotes is not supported yet, and won't be for
the first release, make it fail in a reasonable way.
- or -
* Proxying for special remotes.
Including encryption and chunking. See design for issues.
# items deferred until later for [[design/passthrough_proxy]]
* Indirect uploads when proxying for special remote
(to be considered). See design.
* Getting a key from a cluster currently picks from amoung
the lowest cost remotes at random. This could be smarter,
eg prefer to avoid using remotes that are doing other transfers at the
same time.
* The cost of a proxied node that is accessed via an intermediate gateway
is currently the same as a node accessed via the cluster gateway.
To fix this, there needs to be some way to tell how many hops through
gateways it takes to get to a node. Currently the only way is to
guess based on number of dashes in the node name, which is not satisfying.
Even counting hops is not very satisfying, one cluster gateway could
be much more expensive to traverse than another one.
If seriously tackling this, it might be worth making enough information
available to use spanning tree protocol for routing inside clusters.
* Optimise proxy speed. See design for ideas.
* Use `sendfile()` to avoid data copying overhead when
`receiveBytes` is being fed right into `sendBytes`.
Library to use:
<https://hackage.haskell.org/package/hsyscall-0.4/docs/System-Syscall.html>
* Support using a proxy when its url is a P2P address.
(Eg tor-annex remotes.)
# completed items for June's work on [[design/passthrough_proxy]]:
* UUID discovery via git-annex branch. Add a log file listing UUIDs
accessible via proxy UUIDs. It also will contain the names
@ -40,7 +82,7 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Proxy should update location tracking information for proxied remotes,
so it is available to other users who sync with it. (done)
* Implement `git-annex updatecluster` command (done)
* Implement `git-annex initcluster` and `git-annex updatecluster` commands (done)
* Implement cluster UUID insertation on location log load, and removal
on location log store. (done)
@ -48,66 +90,39 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Omit cluster UUIDs when constructing drop proofs, since lockcontent will
always fail on a cluster. (done)
* Don't count cluster UUID as a copy. (done)
* Don't count cluster UUID as a copy in numcopies checking etc. (done)
* Tab complete proxied remotes and clusters in eg --from option. (done)
* Getting a key from a cluster should proxy from one of the nodes that has
it. (done)
* Getting a key from a cluster currently always selects the lowest cost
remote, and always the same remote if cost is the same. Should
round-robin amoung remotes, and prefer to avoid using remotes that
other git-annex processes are currently using.
* Implement upload with fanout and reporting back additional UUIDs over P2P
protocol. (done, but need to check for fencepost errors on resume of
incomplete upload with remotes at different points)
* On upload to cluster, send to nodes where it's preferred content, and not
to other nodes.
* Implement upload with fanout to multiple cluster nodes and reporting back
additional UUIDs over P2P protocol. (done)
* Implement cluster drops, trying to remove from all nodes, and returning
which UUIDs it was dropped from.
which UUIDs it was dropped from. (done)
Problem: May lock content on cluster
nodes to satisfy numcopies (rather than locking elsewhere) and so not be
able to drop from nodes. Avoid using cluster nodes when constructing drop
proof for cluster.
* `git-annex testremote` works against proxied remote and cluster. (done)
Problem: When nodes are special remotes, may
treat nodes as copies while dropping from cluster, and so violate
numcopies. (But not mincopies.)
* Avoid `git-annex sync --content` etc from operating on cluster nodes by
default since syncing with a cluster implicitly syncs with its nodes. (done)
Problem: `move --from cluster` in "does this make it worse"
check may fail to realize that dropping from multiple nodes does in fact
make it worse.
* On upload to cluster, send to nodes where its preferred content, and not
to other nodes. (done)
* On upload to a cluster, as well as fanout to nodes, if the key is
preferred content of the proxy repository, store it there.
(But not when preferred content is not configured.)
And on download from a cluster, if the proxy repository has the content,
get it from there to avoid the overhead of proxying to a node.
* Support annex.jobs for clusters. (done)
* Basic proxying to special remote support (non-streaming).
* Add `git-annex extendcluster` command and extend `git-annex updatecluster`
to support clusters with multiple gateways. (done)
* Support proxies-of-proxies better, eg foo-bar-baz.
Currently, it does work, but have to run `git-annex updateproxy`
on foo in order for it to notice the bar-baz proxied remote exists,
and record it as foo-bar-baz. Make it skip recording proxies of
proxies like that, and instead automatically generate those from the log.
(With cycle prevention there of course.)
* Support proxying for a remote that is proxied by another gateway of
a cluster. (done)
* Cycle prevention including cluster-in-cluster cycles. See design.
* Support distributed clusters: Make a proxy for a cluster repeat
protocol messages on to any remotes that have the same UUID as
the cluster. Needs extension to P2P protocol to avoid cycles.
(done)
* Optimise proxy speed. See design for ideas.
* Use `sendfile()` to avoid data copying overhead when
`receiveBytes` is being fed right into `sendBytes`.
* Encryption and chunking. See design for issues.
* Indirect uploads (to be considered). See design.
* Support using a proxy when its url is a P2P address.
(Eg tor-annex remotes.)
* Proxied cluster nodes should have slightly higher cost than the cluster
gateway. (done)

View file

@ -6,7 +6,7 @@ remotes.
So this todo remains open, but is now only concerned with
streaming an object that is being received from one remote out to another
remote without first needing to buffer the whole object on disk.
repository without first needing to buffer the whole object on disk.
git-annex's remote interface does not currently support that.
`retrieveKeyFile` stores the object into a file. And `storeKey`
@ -27,3 +27,7 @@ Recieving to a file, and sending from the same file as it grows is one
possibility, since that would handle buffering, and it might avoid needing
to change interfaces as much. It would still need a new interface since the
current one does not guarantee the file is written in-order.
A fifo is a possibility, but would certianly not work with remotes
that don't write to the file in-order. Also resuming a download would not
work with a fifo, the sending remote wouldn't know where to resume from.

View file

@ -508,6 +508,7 @@ Executable git-annex
Annex.ChangedRefs
Annex.CheckAttr
Annex.CheckIgnore
Annex.Cluster
Annex.Common
Annex.Concurrent
Annex.Concurrent.Utility
@ -549,6 +550,7 @@ Executable git-annex
Annex.Path
Annex.Perms
Annex.PidLock
Annex.Proxy
Annex.Queue
Annex.ReplaceFile
Annex.RemoteTrackingBranch
@ -556,6 +558,7 @@ Executable git-annex
Annex.SpecialRemote.Config
Annex.Ssh
Annex.StallDetection
Annex.Startup
Annex.TaggedPush
Annex.Tmp
Annex.Transfer
@ -635,6 +638,7 @@ Executable git-annex
Command.EnableRemote
Command.EnableTor
Command.ExamineKey
Command.ExtendCluster
Command.Expire
Command.Export
Command.FilterBranch
@ -658,6 +662,7 @@ Executable git-annex
Command.Indirect
Command.Info
Command.Init
Command.InitCluster
Command.InitRemote
Command.Inprogress
Command.List
@ -720,6 +725,8 @@ Executable git-annex
Command.UnregisterUrl
Command.Untrust
Command.Unused
Command.UpdateCluster
Command.UpdateProxy
Command.Upgrade
Command.VAdd
Command.VCycle
@ -814,6 +821,8 @@ Executable git-annex
Logs.AdjustedBranchUpdate
Logs.Chunk
Logs.Chunk.Pure
Logs.Cluster
Logs.Cluster.Basic
Logs.Config
Logs.ContentIdentifier
Logs.ContentIdentifier.Pure
@ -838,6 +847,7 @@ Executable git-annex
Logs.PreferredContent.Raw
Logs.Presence
Logs.Presence.Pure
Logs.Proxy
Logs.Remote
Logs.Remote.Pure
Logs.RemoteState
@ -868,6 +878,7 @@ Executable git-annex
P2P.Auth
P2P.IO
P2P.Protocol
P2P.Proxy
Remote
Remote.Adb
Remote.BitTorrent
@ -930,6 +941,7 @@ Executable git-annex
Types.BranchState
Types.CatFileHandles
Types.CleanupActions
Types.Cluster
Types.Command
Types.Concurrency
Types.Creds