Merge branch 'proxy'

This commit is contained in:
Joey Hess 2024-06-27 15:43:45 -04:00
commit c3f88923c0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
78 changed files with 3145 additions and 448 deletions

View file

@ -74,6 +74,7 @@ import Types.CatFileHandles
import Types.RemoteConfig import Types.RemoteConfig
import Types.TransferrerPool import Types.TransferrerPool
import Types.VectorClock import Types.VectorClock
import Types.Cluster
import Annex.VectorClock.Utility import Annex.VectorClock.Utility
import Annex.Debug.Utility import Annex.Debug.Utility
import qualified Database.Keys.Handle as Keys import qualified Database.Keys.Handle as Keys
@ -194,6 +195,7 @@ data AnnexState = AnnexState
, preferredcontentmap :: Maybe (FileMatcherMap Annex) , preferredcontentmap :: Maybe (FileMatcherMap Annex)
, requiredcontentmap :: Maybe (FileMatcherMap Annex) , requiredcontentmap :: Maybe (FileMatcherMap Annex)
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig) , remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
, clusters :: Maybe Clusters
, forcetrust :: TrustMap , forcetrust :: TrustMap
, trustmap :: Maybe TrustMap , trustmap :: Maybe TrustMap
, groupmap :: Maybe GroupMap , groupmap :: Maybe GroupMap
@ -213,6 +215,7 @@ data AnnexState = AnnexState
, urloptions :: Maybe UrlOptions , urloptions :: Maybe UrlOptions
, insmudgecleanfilter :: Bool , insmudgecleanfilter :: Bool
, getvectorclock :: IO CandidateVectorClock , getvectorclock :: IO CandidateVectorClock
, proxyremote :: Maybe (Either ClusterUUID (Types.Remote.RemoteA Annex))
} }
newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState
@ -247,6 +250,7 @@ newAnnexState c r = do
, preferredcontentmap = Nothing , preferredcontentmap = Nothing
, requiredcontentmap = Nothing , requiredcontentmap = Nothing
, remoteconfigmap = Nothing , remoteconfigmap = Nothing
, clusters = Nothing
, forcetrust = M.empty , forcetrust = M.empty
, trustmap = Nothing , trustmap = Nothing
, groupmap = Nothing , groupmap = Nothing
@ -266,6 +270,7 @@ newAnnexState c r = do
, urloptions = Nothing , urloptions = Nothing
, insmudgecleanfilter = False , insmudgecleanfilter = False
, getvectorclock = vc , getvectorclock = vc
, proxyremote = Nothing
} }
{- Makes an Annex state object for the specified git repo. {- Makes an Annex state object for the specified git repo.
@ -423,6 +428,7 @@ changeGitRepo r = do
{ repo = r' { repo = r'
, gitconfig = gitconfigadjuster $ , gitconfig = gitconfigadjuster $
extractGitConfig FromGitConfig r' extractGitConfig FromGitConfig r'
, gitremotes = Nothing
} }
{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that {- Gets the RemoteGitConfig from a remote, given the Git.Repo for that

View file

@ -5,12 +5,9 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE CPP #-}
module Annex.Action ( module Annex.Action (
action, action,
verifiedAction, verifiedAction,
startup,
quiesce, quiesce,
stopCoProcesses, stopCoProcesses,
) where ) where
@ -27,11 +24,6 @@ import Annex.CheckIgnore
import Annex.TransferrerPool import Annex.TransferrerPool
import qualified Database.Keys import qualified Database.Keys
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Runs an action that may throw exceptions, catching and displaying them. -} {- Runs an action that may throw exceptions, catching and displaying them. -}
action :: Annex () -> Annex Bool action :: Annex () -> Annex Bool
action a = tryNonAsync a >>= \case action a = tryNonAsync a >>= \case
@ -47,34 +39,6 @@ verifiedAction a = tryNonAsync a >>= \case
warning (UnquotedString (show e)) warning (UnquotedString (show e))
return (False, UnVerified) return (False, UnVerified)
{- Actions to perform each time ran. -}
startup :: Annex ()
startup = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif
{- Rn all cleanup actions, save all state, stop all long-running child {- Rn all cleanup actions, save all state, stop all long-running child
- processes. - processes.
- -

167
Annex/Cluster.hs Normal file
View file

@ -0,0 +1,167 @@
{- clusters
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes, OverloadedStrings #-}
module Annex.Cluster where
import Annex.Common
import qualified Annex
import Types.Cluster
import Logs.Cluster
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Annex.Proxy
import Annex.UUID
import Logs.Location
import Logs.PreferredContent
import Types.Command
import Remote.List
import qualified Remote
import qualified Types.Remote as Remote
import qualified Data.Map as M
import qualified Data.Set as S
import System.Random
{- Proxy to a cluster. -}
proxyCluster
:: ClusterUUID
-> CommandPerform
-> ServerMode
-> ClientSide
-> (forall a. ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform))
-> CommandPerform
proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
getClientProtocolVersion (fromClusterUUID clusteruuid) clientside
withclientversion protoerrhandler
where
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
withclientversion (Just (clientmaxversion, othermsg)) = do
-- The protocol versions supported by the nodes are not
-- known at this point, and would be too expensive to
-- determine. Instead, pick the newest protocol version
-- that we and the client both speak. The proxy code
-- checks protocol versions when operating on multiple
-- nodes, and allows nodes to have different protocol
-- versions.
let protocolversion = min maxProtocolVersion clientmaxversion
sendClientProtocolVersion clientside othermsg protocolversion
(getclientbypass protocolversion) protoerrhandler
withclientversion Nothing = proxydone
getclientbypass protocolversion othermsg =
getClientBypass clientside protocolversion othermsg
(withclientbypass protocolversion) protoerrhandler
withclientbypass protocolversion (bypassuuids, othermsg) = do
selectnode <- clusterProxySelector clusteruuid protocolversion bypassuuids
concurrencyconfig <- getConcurrencyConfig
proxy proxydone proxymethods servermode clientside
(fromClusterUUID clusteruuid)
selectnode concurrencyconfig protocolversion
othermsg protoerrhandler
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Bypass -> Annex ProxySelector
clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters
myclusters <- annexClusters <$> Annex.getGitConfig
allremotes <- concat . Remote.byCost <$> remoteList
hereu <- getUUID
let bypass' = S.insert hereu bypass
let clusterremotes = filter (isnode bypass' allremotes nodeuuids myclusters) allremotes
fastDebug "Annex.Cluster" $ unwords
[ "cluster gateway at", fromUUID hereu
, "connecting to", show (map Remote.name clusterremotes)
, "bypass", show (S.toList bypass)
]
nodes <- mapM (proxySshRemoteSide protocolversion (Bypass bypass')) clusterremotes
return $ ProxySelector
{ proxyCHECKPRESENT = nodecontaining nodes
, proxyGET = nodecontaining nodes
-- The key is sent to multiple nodes at the same time,
-- skipping nodes where it's known/expected to already be
-- present to avoid needing to connect to those, and
-- skipping nodes where it's not preferred content.
, proxyPUT = \af k -> do
locs <- S.fromList <$> loggedLocations k
let l = filter (flip S.notMember locs . Remote.uuid . remote) nodes
l' <- filterM (\n -> isPreferredContent (Just (Remote.uuid (remote n))) mempty (Just k) af True) l
-- PUT to no nodes doesn't work, so fall
-- back to all nodes.
return $ nonempty [l', l] nodes
-- Remove the key from every node that contains it.
-- But, since it's possible the location log for some nodes
-- could be out of date, actually try to remove from every
-- node.
, proxyREMOVE = const (pure nodes)
-- Content is not locked on the cluster as a whole,
-- instead it can be locked on individual nodes that are
-- proxied to the client.
, proxyLOCKCONTENT = const (pure Nothing)
, proxyUNLOCKCONTENT = pure Nothing
}
where
-- Nodes of the cluster have remote.name.annex-cluster-node
-- containing its name.
--
-- Or, a node can be the cluster proxied by another gateway.
isnode bypass' rs nodeuuids myclusters r =
case remoteAnnexClusterNode (Remote.gitconfig r) of
Just names
| any (isclustername myclusters) names ->
flip S.member nodeuuids $
ClusterNodeUUID $ Remote.uuid r
| otherwise -> False
Nothing -> isclusterviagateway bypass' rs r
-- Is this remote the same cluster, proxied via another gateway?
--
-- Must avoid bypassed gateways to prevent cycles.
isclusterviagateway bypass' rs r =
case mkClusterUUID (Remote.uuid r) of
Just cu | cu == clusteruuid ->
case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid | proxyuuid `S.notMember` bypass' ->
not $ null $
filter isclustergateway $
filter (\p -> Remote.uuid p == proxyuuid) rs
_ -> False
_ -> False
isclustergateway r = any (== clusteruuid) $
remoteAnnexClusterGateway $ Remote.gitconfig r
isclustername myclusters name =
M.lookup name myclusters == Just clusteruuid
nodecontaining nodes k = do
locs <- S.fromList <$> loggedLocations k
case filter (flip S.member locs . Remote.uuid . remote) nodes of
[] -> return Nothing
(node:[]) -> return (Just node)
(node:rest) ->
-- The list of nodes is ordered by cost.
-- Use any of the ones with equally low
-- cost.
let lowestcost = Remote.cost (remote node)
samecost = node : takeWhile (\n -> Remote.cost (remote n) == lowestcost) rest
in do
n <- getStdRandom $
randomR (0, length samecost - 1)
return (Just (samecost !! n))
nonempty (l:ls) fallback
| null l = nonempty ls fallback
| otherwise = l
nonempty [] fallback = fallback

View file

@ -58,7 +58,7 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
getcopies fs = do getcopies fs = do
(untrusted, have) <- trustPartition UnTrusted locs (untrusted, have) <- trustPartition UnTrusted locs
(numcopies, mincopies) <- getSafestNumMinCopies' afile key fs (numcopies, mincopies) <- getSafestNumMinCopies' afile key fs
return (length have, numcopies, mincopies, S.fromList untrusted) return (numCopiesCount have, numcopies, mincopies, S.fromList untrusted)
{- Check that we have enough copies still to drop the content. {- Check that we have enough copies still to drop the content.
- When the remote being dropped from is untrusted, it was not - When the remote being dropped from is untrusted, it was not

View file

@ -103,8 +103,8 @@ genDescription Nothing = do
Right username -> [username, at, hostname, ":", reldir] Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir] Left _ -> [hostname, ":", reldir]
initialize :: Maybe String -> Maybe RepoVersion -> Annex () initialize :: Annex () -> Maybe String -> Maybe RepoVersion -> Annex ()
initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do initialize startupannex mdescription mversion = checkInitializeAllowed $ \initallowed -> do
{- Has to come before any commits are made as the shared {- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -} - clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone sharedclone <- checkSharedClone
@ -114,14 +114,14 @@ initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
ensureCommit $ Annex.Branch.create ensureCommit $ Annex.Branch.create
prepUUID prepUUID
initialize' mversion initallowed initialize' startupannex mversion initallowed
initSharedClone sharedclone initSharedClone sharedclone
u <- getUUID u <- getUUID
when (u == NoUUID) $ when (u == NoUUID) $
giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report." giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report."
{- Avoid overwriting existing description with a default {- Avoid overwriting existing description with a default
- description. -} - description. -}
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $ whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
@ -129,8 +129,8 @@ initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
-- Everything except for uuid setup, shared clone setup, and initial -- Everything except for uuid setup, shared clone setup, and initial
-- description. -- description.
initialize' :: Maybe RepoVersion -> InitializeAllowed -> Annex () initialize' :: Annex () -> Maybe RepoVersion -> InitializeAllowed -> Annex ()
initialize' mversion _initallowed = do initialize' startupannex mversion _initallowed = do
checkLockSupport checkLockSupport
checkFifoSupport checkFifoSupport
checkCrippledFileSystem checkCrippledFileSystem
@ -162,6 +162,10 @@ initialize' mversion _initallowed = do
createInodeSentinalFile False createInodeSentinalFile False
fixupUnusualReposAfterInit fixupUnusualReposAfterInit
-- This is usually run at Annex startup, but when git-annex was
-- not already initialized, it will not yet have run.
startupannex
uninitialize :: Annex () uninitialize :: Annex ()
uninitialize = do uninitialize = do
-- Remove hooks that are written when initializing. -- Remove hooks that are written when initializing.
@ -203,12 +207,12 @@ getInitializedVersion = do
- -
- Checks repository version and handles upgrades too. - Checks repository version and handles upgrades too.
-} -}
ensureInitialized :: Annex [Remote] -> Annex () ensureInitialized :: Annex () -> Annex [Remote] -> Annex ()
ensureInitialized remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade ensureInitialized startupannex remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
where where
needsinit = ifM autoInitializeAllowed needsinit = ifM autoInitializeAllowed
( do ( do
tryNonAsync (initialize Nothing Nothing) >>= \case tryNonAsync (initialize startupannex Nothing Nothing) >>= \case
Right () -> noop Right () -> noop
Left e -> giveup $ show e ++ "\n" ++ Left e -> giveup $ show e ++ "\n" ++
"git-annex: automatic initialization failed due to above problems" "git-annex: automatic initialization failed due to above problems"
@ -256,15 +260,16 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
- -
- Checks repository version and handles upgrades too. - Checks repository version and handles upgrades too.
-} -}
autoInitialize :: Annex [Remote] -> Annex () autoInitialize :: Annex () -> Annex [Remote] -> Annex ()
autoInitialize = autoInitialize' autoInitializeAllowed autoInitialize = autoInitialize' autoInitializeAllowed
autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex () autoInitialize' :: Annex Bool -> Annex () -> Annex [Remote] -> Annex ()
autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade autoInitialize' check startupannex remotelist =
getInitializedVersion >>= maybe needsinit checkUpgrade
where where
needsinit = needsinit =
whenM (initializeAllowed <&&> check) $ do whenM (initializeAllowed <&&> check) $ do
initialize Nothing Nothing initialize startupannex Nothing Nothing
autoEnableSpecialRemotes remotelist autoEnableSpecialRemotes remotelist
{- Checks if a repository is initialized. Does not check version for upgrade. -} {- Checks if a repository is initialized. Does not check version for upgrade. -}

View file

@ -1,6 +1,6 @@
{- git-annex numcopies configuration and checking {- git-annex numcopies configuration and checking
- -
- Copyright 2014-2021 Joey Hess <id@joeyh.name> - Copyright 2014-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -20,6 +20,8 @@ module Annex.NumCopies (
defaultNumCopies, defaultNumCopies,
numCopiesCheck, numCopiesCheck,
numCopiesCheck', numCopiesCheck',
numCopiesCheck'',
numCopiesCount,
verifyEnoughCopiesToDrop, verifyEnoughCopiesToDrop,
verifiableCopies, verifiableCopies,
UnVerifiedCopy(..), UnVerifiedCopy(..),
@ -30,6 +32,7 @@ import qualified Annex
import Types.NumCopies import Types.NumCopies
import Logs.NumCopies import Logs.NumCopies
import Logs.Trust import Logs.Trust
import Logs.Cluster
import Annex.CheckAttr import Annex.CheckAttr
import qualified Remote import qualified Remote
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
@ -39,8 +42,10 @@ import Annex.CatFile
import qualified Database.Keys import qualified Database.Keys
import Control.Exception import Control.Exception
import qualified Control.Monad.Catch as M import qualified Control.Monad.Catch as MC
import Data.Typeable import Data.Typeable
import qualified Data.Set as S
import qualified Data.Map as M
defaultNumCopies :: NumCopies defaultNumCopies :: NumCopies
defaultNumCopies = configuredNumCopies 1 defaultNumCopies = configuredNumCopies 1
@ -197,12 +202,24 @@ numCopiesCheck file key vs = do
numCopiesCheck' :: RawFilePath -> (Int -> Int -> v) -> [UUID] -> Annex v numCopiesCheck' :: RawFilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
numCopiesCheck' file vs have = do numCopiesCheck' file vs have = do
needed <- fromNumCopies . fst <$> getFileNumMinCopies file needed <- fst <$> getFileNumMinCopies file
let nhave = length have let nhave = numCopiesCount have
explain (ActionItemTreeFile file) $ Just $ UnquotedString $ explain (ActionItemTreeFile file) $ Just $ UnquotedString $
"has " ++ show nhave ++ " " ++ pluralCopies nhave ++ "has " ++ show nhave ++ " " ++ pluralCopies nhave ++
", and the configured annex.numcopies is " ++ show needed ", and the configured annex.numcopies is " ++ show needed
return $ nhave `vs` needed return $ numCopiesCheck'' have vs needed
numCopiesCheck'' :: [UUID] -> (Int -> Int -> v) -> NumCopies -> v
numCopiesCheck'' have vs needed =
let nhave = numCopiesCount have
in nhave `vs` fromNumCopies needed
{- When a key is logged as present in a node of the cluster,
- the cluster's UUID will also be in the list, but is not a
- distinct copy.
-}
numCopiesCount :: [UUID] -> Int
numCopiesCount = length . filter (not . isClusterUUID)
data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere
deriving (Ord, Eq) deriving (Ord, Eq)
@ -214,6 +231,7 @@ data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere
verifyEnoughCopiesToDrop verifyEnoughCopiesToDrop
:: String -- message to print when there are no known locations :: String -- message to print when there are no known locations
-> Key -> Key
-> Maybe UUID -- repo dropping from
-> Maybe ContentRemovalLock -> Maybe ContentRemovalLock
-> NumCopies -> NumCopies
-> MinCopies -> MinCopies
@ -223,14 +241,14 @@ verifyEnoughCopiesToDrop
-> (SafeDropProof -> Annex a) -- action to perform the drop -> (SafeDropProof -> Annex a) -- action to perform the drop
-> Annex a -- action to perform when unable to drop -> Annex a -- action to perform when unable to drop
-> Annex a -> Annex a
verifyEnoughCopiesToDrop nolocmsg key removallock neednum needmin skip preverified tocheck dropaction nodropaction = verifyEnoughCopiesToDrop nolocmsg key dropfrom removallock neednum needmin skip preverified tocheck dropaction nodropaction =
helper [] [] preverified (nub tocheck) [] helper [] [] preverified (nub tocheck) []
where where
helper bad missing have [] lockunsupported = helper bad missing have [] lockunsupported =
liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case
Right proof -> dropaction proof Right proof -> dropaction proof
Left stillhave -> do Left stillhave -> do
notEnoughCopies key neednum needmin stillhave (skip++missing) bad nolocmsg lockunsupported notEnoughCopies key dropfrom neednum needmin stillhave (skip++missing) bad nolocmsg lockunsupported
nodropaction nodropaction
helper bad missing have (c:cs) lockunsupported helper bad missing have (c:cs) lockunsupported
| isSafeDrop neednum needmin have removallock = | isSafeDrop neednum needmin have removallock =
@ -239,12 +257,17 @@ verifyEnoughCopiesToDrop nolocmsg key removallock neednum needmin skip preverifi
Left stillhave -> helper bad missing stillhave (c:cs) lockunsupported Left stillhave -> helper bad missing stillhave (c:cs) lockunsupported
| otherwise = case c of | otherwise = case c of
UnVerifiedHere -> lockContentShared key contverified UnVerifiedHere -> lockContentShared key contverified
UnVerifiedRemote r -> checkremote r contverified $ UnVerifiedRemote r
let lockunsupported' = r : lockunsupported -- Skip cluster uuids because locking is
in Remote.hasKey r key >>= \case -- not supported with them, instead will
Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported' -- lock individual nodes.
Left _ -> helper (r:bad) missing have cs lockunsupported' | isClusterUUID (Remote.uuid r) -> helper bad missing have cs lockunsupported
Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported' | otherwise -> checkremote r contverified $
let lockunsupported' = r : lockunsupported
in Remote.hasKey r key >>= \case
Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported'
Left _ -> helper (r:bad) missing have cs lockunsupported'
Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported'
where where
contverified vc = helper bad missing (vc : have) cs lockunsupported contverified vc = helper bad missing (vc : have) cs lockunsupported
@ -264,11 +287,11 @@ verifyEnoughCopiesToDrop nolocmsg key removallock neednum needmin skip preverifi
-- of exceptions by using DropException. -- of exceptions by using DropException.
let a = lockcontent key $ \v -> let a = lockcontent key $ \v ->
cont v `catchNonAsync` (throw . DropException) cont v `catchNonAsync` (throw . DropException)
a `M.catches` a `MC.catches`
[ M.Handler (\ (e :: AsyncException) -> throwM e) [ MC.Handler (\ (e :: AsyncException) -> throwM e)
, M.Handler (\ (e :: SomeAsyncException) -> throwM e) , MC.Handler (\ (e :: SomeAsyncException) -> throwM e)
, M.Handler (\ (DropException e') -> throwM e') , MC.Handler (\ (DropException e') -> throwM e')
, M.Handler (\ (_e :: SomeException) -> fallback) , MC.Handler (\ (_e :: SomeException) -> fallback)
] ]
Nothing -> fallback Nothing -> fallback
@ -277,8 +300,8 @@ data DropException = DropException SomeException
instance Exception DropException instance Exception DropException
notEnoughCopies :: Key -> NumCopies -> MinCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex () notEnoughCopies :: Key -> Maybe UUID -> NumCopies -> MinCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex ()
notEnoughCopies key neednum needmin have skip bad nolocmsg lockunsupported = do notEnoughCopies key dropfrom neednum needmin have skip bad nolocmsg lockunsupported = do
showNote "unsafe" showNote "unsafe"
if length have < fromNumCopies neednum if length have < fromNumCopies neednum
then showLongNote $ UnquotedString $ then showLongNote $ UnquotedString $
@ -297,7 +320,29 @@ notEnoughCopies key neednum needmin have skip bad nolocmsg lockunsupported = do
++ Remote.listRemoteNames lockunsupported ++ Remote.listRemoteNames lockunsupported
Remote.showTriedRemotes bad Remote.showTriedRemotes bad
Remote.showLocations True key (map toUUID have++skip) nolocmsg -- When dropping from a cluster, don't suggest making the nodes of
-- the cluster available
clusternodes <- case mkClusterUUID =<< dropfrom of
Nothing -> pure []
Just cu -> do
clusters <- getClusters
pure $ maybe [] (map fromClusterNodeUUID . S.toList) $
M.lookup cu (clusterUUIDs clusters)
let excludeset = S.fromList $ map toUUID have++skip++clusternodes
-- Don't suggest making a cluster available when dropping from its
-- node.
let exclude u
| u `S.member` excludeset = pure True
| otherwise = case (dropfrom, mkClusterUUID u) of
(Just dropfrom', Just cu) -> do
clusters <- getClusters
pure $ case M.lookup cu (clusterUUIDs clusters) of
Just nodes ->
ClusterNodeUUID dropfrom'
`S.member` nodes
Nothing -> False
_ -> pure False
Remote.showLocations True key exclude nolocmsg
pluralCopies :: Int -> String pluralCopies :: Int -> String
pluralCopies 1 = "copy" pluralCopies 1 = "copy"
@ -312,17 +357,27 @@ pluralCopies _ = "copies"
- The return lists also exclude any repositories that are untrusted, - The return lists also exclude any repositories that are untrusted,
- since those should not be used for verification. - since those should not be used for verification.
- -
- When dropping from a cluster UUID, its nodes are excluded.
-
- Cluster UUIDs are also excluded since locking a key on a cluster
- is done by locking on individual nodes.
-
- The UnVerifiedCopy list is cost ordered. - The UnVerifiedCopy list is cost ordered.
- The VerifiedCopy list contains repositories that are trusted to - The VerifiedCopy list contains repositories that are trusted to
- contain the key. - contain the key.
-} -}
verifiableCopies :: Key -> [UUID] -> Annex ([UnVerifiedCopy], [VerifiedCopy]) verifiableCopies :: Key -> [UUID] -> Annex ([UnVerifiedCopy], [VerifiedCopy])
verifiableCopies key exclude = do verifiableCopies key exclude = do
locs <- Remote.keyLocations key locs <- filter (not . isClusterUUID) <$> Remote.keyLocations key
(remotes, trusteduuids) <- Remote.remoteLocations (Remote.IncludeIgnored False) locs (remotes, trusteduuids) <- Remote.remoteLocations (Remote.IncludeIgnored False) locs
=<< trustGet Trusted =<< trustGet Trusted
clusternodes <- if any isClusterUUID exclude
then do
clusters <- getClusters
pure $ concatMap (getclusternodes clusters) exclude
else pure []
untrusteduuids <- trustGet UnTrusted untrusteduuids <- trustGet UnTrusted
let exclude' = exclude ++ untrusteduuids let exclude' = exclude ++ untrusteduuids ++ clusternodes
let remotes' = Remote.remotesWithoutUUID remotes (exclude' ++ trusteduuids) let remotes' = Remote.remotesWithoutUUID remotes (exclude' ++ trusteduuids)
let verified = map (mkVerifiedCopy TrustedCopy) $ let verified = map (mkVerifiedCopy TrustedCopy) $
filter (`notElem` exclude') trusteduuids filter (`notElem` exclude') trusteduuids
@ -331,3 +386,8 @@ verifiableCopies key exclude = do
then [UnVerifiedHere] then [UnVerifiedHere]
else [] else []
return (herec ++ map UnVerifiedRemote remotes', verified) return (herec ++ map UnVerifiedRemote remotes', verified)
where
getclusternodes clusters u = case mkClusterUUID u of
Just cu -> maybe [] (map fromClusterNodeUUID . S.toList) $
M.lookup cu (clusterUUIDs clusters)
Nothing -> []

26
Annex/Proxy.hs Normal file
View file

@ -0,0 +1,26 @@
{- proxying
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Proxy where
import Annex.Common
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
-- FIXME: Support special remotes.
proxySshRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxySshRemoteSide clientmaxversion bypass r = mkRemoteSide r $
openP2PShellConnection' r clientmaxversion bypass >>= \case
Just conn@(OpenConnection (remoterunst, remoteconn, _)) ->
return $ Just
( remoterunst
, remoteconn
, void $ liftIO $ closeP2PShellConnection conn
)
_ -> return Nothing

67
Annex/Startup.hs Normal file
View file

@ -0,0 +1,67 @@
{- git-annex startup
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Startup where
import Annex.Common
import qualified Annex
import Logs.Cluster
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Run when starting up the main git-annex program. -}
startup :: Annex ()
startup = do
startupSignals
gc <- Annex.getGitConfig
when (isinitialized gc)
startupAnnex
where
isinitialized gc = annexUUID gc /= NoUUID
&& isJust (annexVersion gc)
{- Run when starting up the main git-annex program when
- git-annex has already been initialized.
- Alternatively, run after initialization.
-}
startupAnnex :: Annex ()
startupAnnex = doQuietAction $
-- Logs.Location needs clusters to be loaded before it is used,
-- in order for a cluster to be treated as the location of keys
-- that are located in any of its nodes.
void loadClusters
startupSignals :: Annex ()
startupSignals = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif

View file

@ -6,7 +6,7 @@
- UUIDs of remotes are cached in git config, using keys named - UUIDs of remotes are cached in git config, using keys named
- remote.<name>.annex-uuid - remote.<name>.annex-uuid
- -
- Copyright 2010-2016 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -15,6 +15,7 @@
module Annex.UUID ( module Annex.UUID (
configkeyUUID, configkeyUUID,
configRepoUUID,
getUUID, getUUID,
getRepoUUID, getRepoUUID,
getUncachedUUID, getUncachedUUID,
@ -47,6 +48,9 @@ import Data.String
configkeyUUID :: ConfigKey configkeyUUID :: ConfigKey
configkeyUUID = annexConfig "uuid" configkeyUUID = annexConfig "uuid"
configRepoUUID :: Git.Repo -> ConfigKey
configRepoUUID r = remoteAnnexConfig r "uuid"
{- Generates a random UUID, that does not include the MAC address. -} {- Generates a random UUID, that does not include the MAC address. -}
genUUID :: IO UUID genUUID :: IO UUID
genUUID = toUUID <$> U4.nextRandom genUUID = toUUID <$> U4.nextRandom
@ -82,7 +86,7 @@ getRepoUUID r = do
updatecache u = do updatecache u = do
g <- gitRepo g <- gitRepo
when (g /= r) $ storeUUIDIn cachekey u when (g /= r) $ storeUUIDIn cachekey u
cachekey = remoteAnnexConfig r "uuid" cachekey = configRepoUUID r
removeRepoUUID :: Annex () removeRepoUUID :: Annex ()
removeRepoUUID = do removeRepoUUID = do

View file

@ -19,6 +19,7 @@ import qualified Annex
import Annex.UUID import Annex.UUID
import Annex.AdjustedBranch import Annex.AdjustedBranch
import Annex.Action import Annex.Action
import Annex.Startup
import Types.StandardGroups import Types.StandardGroups
import Logs.PreferredContent import Logs.PreferredContent
import qualified Annex.Branch import qualified Annex.Branch
@ -85,7 +86,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex () initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
initRepo' desc mgroup = unlessM isInitialized $ do initRepo' desc mgroup = unlessM isInitialized $ do
initialize desc Nothing initialize startupAnnex desc Nothing
u <- getUUID u <- getUUID
maybe noop (defaultStandardGroup u) mgroup maybe noop (defaultStandardGroup u) mgroup
{- Ensure branch gets committed right away so it is {- Ensure branch gets committed right away so it is

View file

@ -19,7 +19,6 @@ import Utility.Env.Set
import Types.Distribution import Types.Distribution
import Types.Transfer import Types.Transfer
import Logs.Web import Logs.Web
import Logs.Presence
import Logs.Location import Logs.Location
import Annex.Content import Annex.Content
import Annex.UUID import Annex.UUID

View file

@ -1,9 +1,18 @@
git-annex (10.20240532) UNRELEASED; urgency=medium git-annex (10.20240532) UNRELEASED; urgency=medium
* git-annex remotes can now act as proxies that provide access to
their remotes. Configure this with remote.name.annex-proxy
and the git-annex update proxy command.
* Clusters are now supported. These are collections of nodes that can
be accessed as a single entity, accessed by one or more gateway
repositories.
* Added git-annex initcluster, updatecluster, and extendcluster commands.
* Fix a bug where interrupting git-annex while it is updating the * Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck git-annex branch for an export could later lead to git fsck
complaining about missing tree objects. complaining about missing tree objects.
* Tab completion of options like --from now includes special remotes. * Tab completion of options like --from now includes special remotes,
as well as proxied remotes and clusters.
* P2P protocol version 2.
* Fix Windows build with Win32 2.13.4+ * Fix Windows build with Win32 2.13.4+
Thanks, Oleg Tolmatcev Thanks, Oleg Tolmatcev
* When --debugfilter or annex.debugfilter is set, avoid propigating * When --debugfilter or annex.debugfilter is set, avoid propigating

View file

@ -23,6 +23,7 @@ import qualified Annex
import qualified Git import qualified Git
import qualified Git.AutoCorrect import qualified Git.AutoCorrect
import qualified Git.Config import qualified Git.Config
import Annex.Startup
import Annex.Action import Annex.Action
import Annex.Environment import Annex.Environment
import Command import Command

View file

@ -149,7 +149,7 @@ commandAction start = do
showEndMessage startmsg False showEndMessage startmsg False
return False return False
{- Waits for all worker threads to finish and merges their AnnexStates {- Waits for all worker thrneads to finish and merges their AnnexStates
- back into the current Annex's state. - back into the current Annex's state.
-} -}
finishCommandActions :: Annex () finishCommandActions :: Annex ()

View file

@ -124,6 +124,10 @@ import qualified Command.Smudge
import qualified Command.FilterProcess import qualified Command.FilterProcess
import qualified Command.Restage import qualified Command.Restage
import qualified Command.Undo import qualified Command.Undo
import qualified Command.InitCluster
import qualified Command.UpdateCluster
import qualified Command.ExtendCluster
import qualified Command.UpdateProxy
import qualified Command.Version import qualified Command.Version
import qualified Command.RemoteDaemon import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT #ifdef WITH_ASSISTANT
@ -247,6 +251,10 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.FilterProcess.cmd , Command.FilterProcess.cmd
, Command.Restage.cmd , Command.Restage.cmd
, Command.Undo.cmd , Command.Undo.cmd
, Command.InitCluster.cmd
, Command.UpdateCluster.cmd
, Command.ExtendCluster.cmd
, Command.UpdateProxy.cmd
, Command.Version.cmd , Command.Version.cmd
, Command.RemoteDaemon.cmd , Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT #ifdef WITH_ASSISTANT

View file

@ -1,6 +1,6 @@
{- git-annex-shell main program {- git-annex-shell main program
- -
- Copyright 2010-2023 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -8,6 +8,7 @@
module CmdLine.GitAnnexShell where module CmdLine.GitAnnexShell where
import Annex.Common import Annex.Common
import qualified Annex
import qualified Git.Construct import qualified Git.Construct
import qualified Git.Config import qualified Git.Config
import CmdLine import CmdLine
@ -19,6 +20,11 @@ import CmdLine.GitAnnexShell.Fields
import Remote.GCrypt (getGCryptUUID) import Remote.GCrypt (getGCryptUUID)
import P2P.Protocol (ServerMode(..)) import P2P.Protocol (ServerMode(..))
import Git.Types import Git.Types
import qualified Types.Remote as R
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Remote
import qualified Command.ConfigList import qualified Command.ConfigList
import qualified Command.NotifyChanges import qualified Command.NotifyChanges
@ -30,6 +36,7 @@ import qualified Command.SendKey
import qualified Command.DropKey import qualified Command.DropKey
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
cmdsMap :: M.Map ServerMode [Command] cmdsMap :: M.Map ServerMode [Command]
cmdsMap = M.fromList $ map mk cmdsMap = M.fromList $ map mk
@ -39,20 +46,22 @@ cmdsMap = M.fromList $ map mk
] ]
where where
readonlycmds = map addAnnexOptions readonlycmds = map addAnnexOptions
[ Command.ConfigList.cmd [ notProxyable Command.ConfigList.cmd
, gitAnnexShellCheck Command.NotifyChanges.cmd , gitAnnexShellCheck Command.NotifyChanges.cmd
-- p2pstdio checks the environment variables to -- p2pstdio checks the environment variables to
-- determine the security policy to use -- determine the security policy to use, so is safe to
-- include in the readonly list even though it is not
-- always readonly
, gitAnnexShellCheck Command.P2PStdIO.cmd , gitAnnexShellCheck Command.P2PStdIO.cmd
, gitAnnexShellCheck Command.InAnnex.cmd , notProxyable (gitAnnexShellCheck Command.InAnnex.cmd)
, gitAnnexShellCheck Command.SendKey.cmd , notProxyable (gitAnnexShellCheck Command.SendKey.cmd)
] ]
appendcmds = readonlycmds ++ map addAnnexOptions appendcmds = readonlycmds ++ map addAnnexOptions
[ gitAnnexShellCheck Command.RecvKey.cmd [ notProxyable (gitAnnexShellCheck Command.RecvKey.cmd)
] ]
allcmds = appendcmds ++ map addAnnexOptions allcmds = appendcmds ++ map addAnnexOptions
[ gitAnnexShellCheck Command.DropKey.cmd [ notProxyable (gitAnnexShellCheck Command.DropKey.cmd)
, Command.GCryptSetup.cmd , notProxyable Command.GCryptSetup.cmd
] ]
mk (s, l) = (s, map (adddirparam . noMessages) l) mk (s, l) = (s, map (adddirparam . noMessages) l)
@ -77,17 +86,23 @@ commonShellOptions =
where where
checkUUID expected = getUUID >>= check checkUUID expected = getUUID >>= check
where where
check u | u == toUUID expected = noop
check NoUUID = checkGCryptUUID expected check NoUUID = checkGCryptUUID expected
check u = unexpectedUUID expected u check u
| u == toUUID expected = noop
| otherwise =
unlessM (checkProxy (toUUID expected) u) $
unexpectedUUID expected u
checkGCryptUUID expected = check =<< getGCryptUUID True =<< gitRepo checkGCryptUUID expected = check =<< getGCryptUUID True =<< gitRepo
where where
check (Just u) | u == toUUID expected = noop check (Just u) | u == toUUID expected = noop
check Nothing = unexpected expected "uninitialized repository" check Nothing = unexpected expected "uninitialized repository"
check (Just u) = unexpectedUUID expected u check (Just u) = unexpectedUUID expected u
unexpectedUUID expected u = unexpected expected $ "UUID " ++ fromUUID u unexpectedUUID expected u = unexpected expected $ "UUID " ++ fromUUID u
unexpected expected s = giveup $ unexpected expected s = giveup $
"expected repository UUID " ++ expected ++ " but found " ++ s "expected repository UUID " ++ expected ++ " but found " ++ s
run :: [String] -> IO () run :: [String] -> IO ()
run [] = failure run [] = failure
@ -104,6 +119,11 @@ run c@(cmd:_)
| cmd `elem` builtins = failure | cmd `elem` builtins = failure
| otherwise = external c | otherwise = external c
failure :: IO ()
failure = giveup $ "bad parameters\n\n" ++ usage h cmdsList
where
h = "git-annex-shell [-c] command [parameters ...] [option ...]"
builtins :: [String] builtins :: [String]
builtins = map cmdname cmdsList builtins = map cmdname cmdsList
@ -165,7 +185,60 @@ checkField (field, val)
| field == fieldName autoInit = fieldCheck autoInit val | field == fieldName autoInit = fieldCheck autoInit val
| otherwise = False | otherwise = False
failure :: IO () {- Check if this repository can proxy for a specified remote uuid,
failure = giveup $ "bad parameters\n\n" ++ usage h cmdsList - and if so enable proxying for it. -}
checkProxy :: UUID -> UUID -> Annex Bool
checkProxy remoteuuid ouruuid = M.lookup ouruuid <$> getProxies >>= \case
Nothing -> return False
-- This repository has (or had) proxying enabled. So it's
-- ok to display error messages that talk about proxies.
Just proxies ->
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where where
h = "git-annex-shell [-c] command [parameters ...] [option ...]" -- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . byCost <$> remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = uuid r == remoteuuid
let samename r p = name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Right r) }
return True
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch making git-annex-shell unexpectedly
-- proxy for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (R.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (R.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . R.gitconfig) $
filter (\p -> R.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Left cu) }
return True
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> giveup $ "not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return False

View file

@ -1,6 +1,6 @@
{- git-annex-shell checks {- git-annex-shell checks
- -
- Copyright 2012 Joey Hess <id@joeyh.name> - Copyright 2012-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -82,3 +82,12 @@ gitAnnexShellCheck = addCheck GitAnnexShellOk okforshell . dontCheck repoExists
where where
okforshell = unlessM (isInitialized <||> isJust . gcryptId <$> Annex.getGitConfig) $ okforshell = unlessM (isInitialized <||> isJust . gcryptId <$> Annex.getGitConfig) $
giveup "Not a git-annex or gcrypt repository." giveup "Not a git-annex or gcrypt repository."
{- Used for Commands that don't support proxying. -}
notProxyable :: Command -> Command
notProxyable c = addCheck GitAnnexShellNotProxyable checkok c
where
checkok = Annex.getState Annex.proxyremote >>= \case
Nothing -> return ()
Just _ -> giveup $ "Cannot proxy " ++ cmdname c ++ " command."

View file

@ -30,6 +30,7 @@ import qualified Logs.Remote
import qualified Remote.External import qualified Remote.External
import Remote.Helper.Encryptable (parseEncryptionMethod) import Remote.Helper.Encryptable (parseEncryptionMethod)
import Annex.Transfer import Annex.Transfer
import Annex.Startup
import Backend.GitRemoteAnnex import Backend.GitRemoteAnnex
import Config import Config
import Types.Key import Types.Key
@ -1173,7 +1174,7 @@ cleanupInitialization sab alternatejournaldir = void $ tryNonAsync $ do
inRepo $ Git.Branch.delete Annex.Branch.fullname inRepo $ Git.Branch.delete Annex.Branch.fullname
ifM (Annex.Branch.hasSibling <&&> nonbuggygitversion) ifM (Annex.Branch.hasSibling <&&> nonbuggygitversion)
( do ( do
autoInitialize' (pure True) remoteList autoInitialize' (pure True) startupAnnex remoteList
differences <- allDifferences <$> recordedDifferences differences <- allDifferences <$> recordedDifferences
when (differences /= mempty) $ when (differences /= mempty) $
deletebundleobjects deletebundleobjects

View file

@ -23,6 +23,7 @@ import CmdLine.Batch as ReExported
import Options.Applicative as ReExported hiding (command) import Options.Applicative as ReExported hiding (command)
import qualified Git import qualified Git
import Annex.Init import Annex.Init
import Annex.Startup
import Utility.Daemon import Utility.Daemon
import Types.Transfer import Types.Transfer
import Types.ActionItem as ReExported import Types.ActionItem as ReExported
@ -125,7 +126,7 @@ commonChecks :: [CommandCheck]
commonChecks = [repoExists] commonChecks = [repoExists]
repoExists :: CommandCheck repoExists :: CommandCheck
repoExists = CommandCheck RepoExists (ensureInitialized remoteList) repoExists = CommandCheck RepoExists (ensureInitialized startupAnnex remoteList)
notBareRepo :: Command -> Command notBareRepo :: Command -> Command
notBareRepo = addCheck CheckNotBareRepo checkNotBareRepo notBareRepo = addCheck CheckNotBareRepo checkNotBareRepo

View file

@ -17,6 +17,7 @@ import qualified BuildInfo
import Utility.HumanTime import Utility.HumanTime
import Assistant.Install import Assistant.Install
import Remote.List import Remote.List
import Annex.Startup
import Control.Concurrent.Async import Control.Concurrent.Async
@ -63,7 +64,7 @@ start o
stop stop
| otherwise = do | otherwise = do
liftIO ensureInstalled liftIO ensureInstalled
ensureInitialized remoteList ensureInitialized startupAnnex remoteList
Command.Watch.start True (daemonOptions o) (startDelayOption o) Command.Watch.start True (daemonOptions o) (startDelayOption o)
startNoRepo :: AssistantOptions -> IO () startNoRepo :: AssistantOptions -> IO ()

View file

@ -16,6 +16,7 @@ import Git.Types
import Remote.GCrypt (coreGCryptId) import Remote.GCrypt (coreGCryptId)
import qualified CmdLine.GitAnnexShell.Fields as Fields import qualified CmdLine.GitAnnexShell.Fields as Fields
import CmdLine.GitAnnexShell.Checks import CmdLine.GitAnnexShell.Checks
import Annex.Startup
cmd :: Command cmd :: Command
cmd = noCommit $ dontCheck repoExists $ cmd = noCommit $ dontCheck repoExists $
@ -47,7 +48,7 @@ findOrGenUUID = do
else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit)) else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit))
( do ( do
liftIO checkNotReadOnly liftIO checkNotReadOnly
initialize Nothing Nothing initialize startupAnnex Nothing Nothing
getUUID getUUID
, return NoUUID , return NoUUID
) )

View file

@ -205,7 +205,7 @@ doDrop pcc dropfrom contentlock key afile numcopies mincopies skip preverified c
ifM (Annex.getRead Annex.force) ifM (Annex.getRead Annex.force)
( dropaction Nothing ( dropaction Nothing
, ifM (checkRequiredContent pcc dropfrom key afile) , ifM (checkRequiredContent pcc dropfrom key afile)
( verifyEnoughCopiesToDrop nolocmsg key ( verifyEnoughCopiesToDrop nolocmsg key (Just dropfrom)
contentlock numcopies mincopies contentlock numcopies mincopies
skip preverified check skip preverified check
(dropaction . Just) (dropaction . Just)
@ -253,7 +253,7 @@ checkDropAuto automode mremote afile key a =
uuid <- getUUID uuid <- getUUID
let remoteuuid = fromMaybe uuid $ Remote.uuid <$> mremote let remoteuuid = fromMaybe uuid $ Remote.uuid <$> mremote
locs' <- trustExclude UnTrusted $ filter (/= remoteuuid) locs locs' <- trustExclude UnTrusted $ filter (/= remoteuuid) locs
if length locs' >= fromNumCopies numcopies if numCopiesCheck'' locs' (>=) numcopies
then a numcopies mincopies then a numcopies mincopies
else stop else stop
| otherwise = a numcopies mincopies | otherwise = a numcopies mincopies

58
Command/ExtendCluster.hs Normal file
View file

@ -0,0 +1,58 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.ExtendCluster where
import Command
import qualified Annex
import Types.Cluster
import Config
import Types.GitConfig
import qualified Remote
import qualified Command.UpdateCluster
import qualified Data.Map as M
cmd :: Command
cmd = command "extendcluster" SectionSetup "add an gateway to a cluster"
(paramPair paramRemote paramName) (withParams seek)
seek :: CmdParams -> CommandSeek
seek (remotename:clustername:[]) = Remote.byName (Just clusterremotename) >>= \case
Just clusterremote -> Remote.byName (Just remotename) >>= \case
Just gatewayremote ->
case mkClusterUUID (Remote.uuid clusterremote) of
Just cu -> do
commandAction $ start cu clustername gatewayremote
Command.UpdateCluster.seek []
Nothing -> giveup $ clusterremotename
++ " is not a cluster remote."
Nothing -> giveup $ "No remote named " ++ remotename ++ " exists."
Nothing -> giveup $ "Expected to find a cluster remote named "
++ clusterremotename
++ " that is accessed via " ++ remotename
++ ", but there is no such remote. Perhaps you need to"
++ "git fetch from " ++ remotename
++ ", or git-annex updatecluster needs to be run there?"
where
clusterremotename = remotename ++ "-" ++ clustername
seek _ = giveup "Expected two parameters, gateway and clustername."
start :: ClusterUUID -> String -> Remote -> CommandStart
start cu clustername gatewayremote = starting "extendcluster" ai si $ do
myclusters <- annexClusters <$> Annex.getGitConfig
let setcus f = setConfig f (fromUUID (fromClusterUUID cu))
unless (M.member clustername myclusters) $ do
setcus $ annexConfig ("cluster." <> encodeBS clustername)
setcus $ remoteAnnexConfig gatewayremote $
remoteGitConfigKey ClusterGatewayField
next $ return True
where
ai = ActionItemOther (Just (UnquotedString clustername))
si = SeekInput [clustername]

View file

@ -573,7 +573,7 @@ checkKeyNumCopies key afile numcopies = do
locs <- loggedLocations key locs <- loggedLocations key
(untrustedlocations, otherlocations) <- trustPartition UnTrusted locs (untrustedlocations, otherlocations) <- trustPartition UnTrusted locs
(deadlocations, safelocations) <- trustPartition DeadTrusted otherlocations (deadlocations, safelocations) <- trustPartition DeadTrusted otherlocations
let present = length safelocations let present = numCopiesCount safelocations
if present < fromNumCopies numcopies if present < fromNumCopies numcopies
then ifM (checkDead key) then ifM (checkDead key)
( do ( do

View file

@ -108,7 +108,8 @@ getKey' key afile = dispatch
Remote.showTriedRemotes remotes Remote.showTriedRemotes remotes
showlocs (map Remote.uuid remotes) showlocs (map Remote.uuid remotes)
return False return False
showlocs exclude = Remote.showLocations False key exclude showlocs exclude = Remote.showLocations False key
(\u -> pure (u `elem` exclude))
"No other repository is known to contain the file." "No other repository is known to contain the file."
-- This check is to avoid an ugly message if a remote is a -- This check is to avoid an ugly message if a remote is a
-- drive that is not mounted. -- drive that is not mounted.

View file

@ -319,7 +319,7 @@ verifyExisting key destfile (yes, no) = do
(needcopies, mincopies) <- getFileNumMinCopies destfile (needcopies, mincopies) <- getFileNumMinCopies destfile
(tocheck, preverified) <- verifiableCopies key [] (tocheck, preverified) <- verifiableCopies key []
verifyEnoughCopiesToDrop [] key Nothing needcopies mincopies [] preverified tocheck verifyEnoughCopiesToDrop [] key Nothing Nothing needcopies mincopies [] preverified tocheck
(const yes) no (const yes) no
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> [String] -> CommandSeek seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> [String] -> CommandSeek

View file

@ -12,6 +12,7 @@ module Command.Init where
import Command import Command
import Annex.Init import Annex.Init
import Annex.Version import Annex.Version
import Annex.Startup
import Types.RepoVersion import Types.RepoVersion
import qualified Annex.SpecialRemote import qualified Annex.SpecialRemote
@ -77,7 +78,7 @@ perform os = do
Just v | v /= wantversion -> Just v | v /= wantversion ->
giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version." giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version."
_ -> noop _ -> noop
initialize initialize startupAnnex
(if null (initDesc os) then Nothing else Just (initDesc os)) (if null (initDesc os) then Nothing else Just (initDesc os))
(initVersion os) (initVersion os)
unless (noAutoEnable os) unless (noAutoEnable os)

50
Command/InitCluster.hs Normal file
View file

@ -0,0 +1,50 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.InitCluster where
import Command
import qualified Annex
import Types.Cluster
import Logs.UUID
import Config
import Annex.UUID
import Git.Types
import Git.Remote (isLegalName)
import qualified Data.Map as M
cmd :: Command
cmd = command "initcluster" SectionSetup "initialize a new cluster"
(paramPair paramName paramDesc) (withParams seek)
seek :: CmdParams -> CommandSeek
seek (clustername:desc:[]) = commandAction $
start clustername (toUUIDDesc desc)
seek (clustername:[]) = commandAction $
start clustername $ toUUIDDesc ("cluster " ++ clustername)
seek _ = giveup "Expected two parameters, name and description."
start :: RemoteName -> UUIDDesc -> CommandStart
start clustername desc = starting "initcluster" ai si $ do
unless (isLegalName clustername) $
giveup "That cluster name is not a valid git remote name."
myclusters <- annexClusters <$> Annex.getGitConfig
unless (M.member clustername myclusters) $ do
cu <- fromMaybe (giveup "unable to generate a cluster UUID")
<$> genClusterUUID <$> liftIO genUUID
setConfig (annexConfig ("cluster." <> encodeBS clustername))
(fromUUID (fromClusterUUID cu))
describeUUID (fromClusterUUID cu) desc
next $ return True
where
ai = ActionItemOther (Just (UnquotedString clustername))
si = SeekInput [clustername]

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2010-2023 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -16,11 +16,11 @@ import Annex.Content
import qualified Remote import qualified Remote
import Annex.UUID import Annex.UUID
import Annex.Transfer import Annex.Transfer
import Logs.Presence
import Logs.Trust import Logs.Trust
import Logs.File import Logs.File
import Logs.Location import Logs.Location
import Annex.NumCopies import Annex.NumCopies
import Types.Cluster
import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
@ -194,7 +194,7 @@ toPerform' mcontentlock dest removewhen key afile fastcheck isthere = do
DropCheckNumCopies -> do DropCheckNumCopies -> do
(numcopies, mincopies) <- getSafestNumMinCopies afile key (numcopies, mincopies) <- getSafestNumMinCopies afile key
(tocheck, verified) <- verifiableCopies key [srcuuid] (tocheck, verified) <- verifiableCopies key [srcuuid]
verifyEnoughCopiesToDrop "" key (Just contentlock) verifyEnoughCopiesToDrop "" key (Just srcuuid) (Just contentlock)
numcopies mincopies [srcuuid] verified numcopies mincopies [srcuuid] verified
(UnVerifiedRemote dest : tocheck) (UnVerifiedRemote dest : tocheck)
(drophere setpresentremote contentlock . showproof) (drophere setpresentremote contentlock . showproof)
@ -300,7 +300,7 @@ fromDrop src destuuid deststartedwithcopy key afile adjusttocheck =
DropCheckNumCopies -> do DropCheckNumCopies -> do
(numcopies, mincopies) <- getSafestNumMinCopies afile key (numcopies, mincopies) <- getSafestNumMinCopies afile key
(tocheck, verified) <- verifiableCopies key [Remote.uuid src] (tocheck, verified) <- verifiableCopies key [Remote.uuid src]
verifyEnoughCopiesToDrop "" key Nothing numcopies mincopies [Remote.uuid src] verified verifyEnoughCopiesToDrop "" key (Just (Remote.uuid src)) Nothing numcopies mincopies [Remote.uuid src] verified
(adjusttocheck tocheck) (dropremote . showproof) faileddropremote (adjusttocheck tocheck) (dropremote . showproof) faileddropremote
DropWorse -> faileddropremote DropWorse -> faileddropremote
where where
@ -503,7 +503,8 @@ fromToPerform src dest removewhen key afile = do
- On the other hand, when the destination repository did not start - On the other hand, when the destination repository did not start
- with a copy of a file, it can be dropped from the source without - with a copy of a file, it can be dropped from the source without
- making numcopies worse, so the move is allowed even if numcopies - making numcopies worse, so the move is allowed even if numcopies
- is not met. - is not met. (However, when the source is a cluster, dropping from it
- drops from all nodes, and so numcopies must be checked.)
- -
- Similarly, a file can move from an untrusted repository to another - Similarly, a file can move from an untrusted repository to another
- untrusted repository, even if that is the only copy of the file. - untrusted repository, even if that is the only copy of the file.
@ -520,7 +521,7 @@ fromToPerform src dest removewhen key afile = do
willDropMakeItWorse :: UUID -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> Annex DropCheck willDropMakeItWorse :: UUID -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> Annex DropCheck
willDropMakeItWorse srcuuid destuuid (DestStartedWithCopy deststartedwithcopy _) key afile = willDropMakeItWorse srcuuid destuuid (DestStartedWithCopy deststartedwithcopy _) key afile =
ifM (Command.Drop.checkRequiredContent (Command.Drop.PreferredContentChecked False) srcuuid key afile) ifM (Command.Drop.checkRequiredContent (Command.Drop.PreferredContentChecked False) srcuuid key afile)
( if deststartedwithcopy ( if deststartedwithcopy || isClusterUUID srcuuid
then unlessforced DropCheckNumCopies then unlessforced DropCheckNumCopies
else ifM checktrustlevel else ifM checktrustlevel
( return DropAllowed ( return DropAllowed

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2018 Joey Hess <id@joeyh.name> - Copyright 2018-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -10,9 +10,16 @@ module Command.P2PStdIO where
import Command import Command
import P2P.IO import P2P.IO
import P2P.Annex import P2P.Annex
import P2P.Proxy
import qualified P2P.Protocol as P2P import qualified P2P.Protocol as P2P
import qualified Annex
import Annex.Proxy
import Annex.UUID import Annex.UUID
import qualified CmdLine.GitAnnexShell.Checks as Checks import qualified CmdLine.GitAnnexShell.Checks as Checks
import Logs.Location
import Logs.Cluster
import Annex.Cluster
import qualified Remote
import System.IO.Error import System.IO.Error
@ -34,16 +41,71 @@ start theiruuid = startingCustomOutput (ActionItemOther Nothing) $ do
(True, _) -> P2P.ServeReadOnly (True, _) -> P2P.ServeReadOnly
(False, True) -> P2P.ServeAppendOnly (False, True) -> P2P.ServeAppendOnly
(False, False) -> P2P.ServeReadWrite (False, False) -> P2P.ServeReadWrite
Annex.getState Annex.proxyremote >>= \case
Nothing ->
performLocal theiruuid servermode
Just (Right r) ->
performProxy theiruuid servermode r
Just (Left clusteruuid) ->
performProxyCluster theiruuid clusteruuid servermode
performLocal :: UUID -> P2P.ServerMode -> CommandPerform
performLocal theiruuid servermode = do
myuuid <- getUUID myuuid <- getUUID
let conn = stdioP2PConnection Nothing let conn = stdioP2PConnection Nothing
let server = do let server = do
P2P.net $ P2P.sendMessage (P2P.AUTH_SUCCESS myuuid) P2P.net $ P2P.sendMessage (P2P.AUTH_SUCCESS myuuid)
P2P.serveAuthed servermode myuuid P2P.serveAuthed servermode myuuid
runst <- liftIO $ mkRunState $ Serving theiruuid Nothing runst <- liftIO $ mkRunState $ Serving theiruuid Nothing
runFullProto runst conn server >>= \case p2pErrHandler (const p2pDone) (runFullProto runst conn server)
Right () -> done
-- Avoid displaying an error when the client hung up on us. performProxy :: UUID -> P2P.ServerMode -> Remote -> CommandPerform
Left (ProtoFailureIOError e) | isEOFError e -> done performProxy clientuuid servermode r = do
Left e -> giveup (describeProtoFailure e) clientside <- proxyClientSide clientuuid
getClientProtocolVersion (Remote.uuid r) clientside
(withclientversion clientside)
p2pErrHandler
where where
done = next $ return True withclientversion clientside (Just (clientmaxversion, othermsg)) = do
remoteside <- proxySshRemoteSide clientmaxversion mempty r
protocolversion <- either (const (min P2P.maxProtocolVersion clientmaxversion)) id
<$> runRemoteSide remoteside
(P2P.net P2P.getProtocolVersion)
let closer = do
closeRemoteSide remoteside
p2pDone
concurrencyconfig <- noConcurrencyConfig
let runproxy othermsg' = proxy closer proxymethods
servermode clientside
(Remote.uuid r)
(singleProxySelector remoteside)
concurrencyconfig
protocolversion othermsg' p2pErrHandler
sendClientProtocolVersion clientside othermsg protocolversion
runproxy p2pErrHandler
withclientversion _ Nothing = p2pDone
proxymethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}
performProxyCluster :: UUID -> ClusterUUID -> P2P.ServerMode -> CommandPerform
performProxyCluster clientuuid clusteruuid servermode = do
clientside <- proxyClientSide clientuuid
proxyCluster clusteruuid p2pDone servermode clientside p2pErrHandler
proxyClientSide :: UUID -> Annex ClientSide
proxyClientSide clientuuid = do
clientrunst <- liftIO (mkRunState $ Serving clientuuid Nothing)
return $ ClientSide clientrunst (stdioP2PConnection Nothing)
p2pErrHandler :: (a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform
p2pErrHandler cont a = a >>= \case
-- Avoid displaying an error when the client hung up on us.
Left (ProtoFailureIOError e) | isEOFError e -> p2pDone
Left e -> giveup (describeProtoFailure e)
Right v -> cont v
p2pDone :: CommandPerform
p2pDone = next $ return True

View file

@ -10,6 +10,7 @@ module Command.Reinit where
import Command import Command
import Annex.Init import Annex.Init
import Annex.UUID import Annex.UUID
import Annex.Startup
import qualified Remote import qualified Remote
import qualified Annex.SpecialRemote import qualified Annex.SpecialRemote
@ -36,6 +37,6 @@ perform s = do
then return $ toUUID s then return $ toUUID s
else Remote.nameToUUID s else Remote.nameToUUID s
storeUUID u storeUUID u
checkInitializeAllowed $ initialize' Nothing checkInitializeAllowed $ initialize' startupAnnex Nothing
Annex.SpecialRemote.autoEnable Annex.SpecialRemote.autoEnable
next $ return True next $ return True

84
Command/UpdateCluster.hs Normal file
View file

@ -0,0 +1,84 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.UpdateCluster where
import Command
import qualified Annex
import Types.Cluster
import Logs.Cluster
import qualified Remote as R
import qualified Types.Remote as R
import qualified Command.UpdateProxy
import Utility.SafeOutput
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noMessages $ command "updatecluster" SectionSetup
"update records of cluster nodes"
paramNothing (withParams seek)
seek :: CmdParams -> CommandSeek
seek = withNothing $ do
commandAction start
commandAction Command.UpdateProxy.start
start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let getnode r = do
clusternames <- remoteAnnexClusterNode (R.gitconfig r)
return $ M.fromList $ zip clusternames (repeat (S.singleton r))
let myclusternodes = M.unionsWith S.union (mapMaybe getnode rs)
myclusters <- annexClusters <$> Annex.getGitConfig
recordedclusters <- getClusters
descs <- R.uuidDescriptions
-- Update the cluster log to list the currently configured nodes
-- of each configured cluster.
forM_ (M.toList myclusters) $ \(clustername, cu) -> do
let mynodesremotes = fromMaybe mempty $
M.lookup clustername myclusternodes
let mynodes = S.map (ClusterNodeUUID . R.uuid) mynodesremotes
let recordednodes = fromMaybe mempty $ M.lookup cu $
clusterUUIDs recordedclusters
proxiednodes <- findProxiedClusterNodes recordednodes
let allnodes = S.union mynodes proxiednodes
if recordednodes == allnodes
then liftIO $ putStrLn $ safeOutput $
"No cluster node changes for cluster: " ++ clustername
else do
describechanges descs clustername recordednodes allnodes mynodesremotes
recordCluster cu allnodes
next $ return True
where
describechanges descs clustername oldnodes allnodes mynodesremotes = do
forM_ (S.toList mynodesremotes) $ \r ->
unless (S.member (ClusterNodeUUID (R.uuid r)) oldnodes) $
liftIO $ putStrLn $ safeOutput $
"Added node " ++ R.name r ++ " to cluster: " ++ clustername
forM_ (S.toList oldnodes) $ \n ->
unless (S.member n allnodes) $ do
let desc = maybe (fromUUID (fromClusterNodeUUID n)) fromUUIDDesc $
M.lookup (fromClusterNodeUUID n) descs
liftIO $ putStrLn $ safeOutput $
"Removed node " ++ desc ++ " from cluster: " ++ clustername
-- Finds nodes that are proxied by other cluster gateways.
findProxiedClusterNodes :: S.Set ClusterNodeUUID -> Annex (S.Set ClusterNodeUUID)
findProxiedClusterNodes recordednodes =
(S.fromList . map asclusternode . filter isproxynode) <$> R.remoteList
where
isproxynode r =
asclusternode r `S.member` recordednodes
&& isJust (remoteAnnexProxiedBy (R.gitconfig r))
asclusternode = ClusterNodeUUID . R.uuid

96
Command/UpdateProxy.hs Normal file
View file

@ -0,0 +1,96 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.UpdateProxy where
import Command
import qualified Annex
import Logs.Proxy
import Logs.Cluster
import Annex.UUID
import qualified Remote as R
import qualified Types.Remote as R
import Utility.SafeOutput
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noMessages $ command "updateproxy" SectionSetup
"update records with proxy configuration"
paramNothing (withParams seek)
seek :: CmdParams -> CommandSeek
seek = withNothing (commandAction start)
start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let remoteproxies = S.fromList $ map mkproxy $
filter (isproxy . R.gitconfig) rs
clusterproxies <- getClusterProxies remoteproxies
let proxies = S.union remoteproxies clusterproxies
u <- getUUID
oldproxies <- fromMaybe mempty . M.lookup u <$> getProxies
if oldproxies == proxies
then liftIO $ putStrLn "No proxy changes to record."
else do
describechanges oldproxies proxies
recordProxies proxies
next $ return True
where
describechanges oldproxies proxies =
forM_ (S.toList $ S.union oldproxies proxies) $ \p ->
case (S.member p oldproxies, S.member p proxies) of
(False, True) -> liftIO $
putStrLn $ safeOutput $
"Started proxying for " ++ proxyRemoteName p
(True, False) -> liftIO $
putStrLn $ safeOutput $
"Stopped proxying for " ++ proxyRemoteName p
_ -> noop
isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c))
mkproxy r = Proxy (R.uuid r) (R.name r)
-- Automatically proxy nodes of any cluster this repository is configured
-- to serve as a gateway for. Also proxy other cluster nodes that are
-- themselves proxied via other remotes.
getClusterProxies :: S.Set Proxy -> Annex (S.Set Proxy)
getClusterProxies remoteproxies = do
myclusters <- (map mkclusterproxy . M.toList . annexClusters)
<$> Annex.getGitConfig
remoteproxiednodes <- findRemoteProxiedClusterNodes
let myproxieduuids = S.map proxyRemoteUUID remoteproxies
<> S.fromList (map proxyRemoteUUID myclusters)
-- filter out nodes we proxy for from the remote proxied nodes
-- to avoid cycles
let remoteproxiednodes' = filter
(\n -> proxyRemoteUUID n `S.notMember` myproxieduuids)
remoteproxiednodes
return (S.fromList (myclusters ++ remoteproxiednodes'))
where
mkclusterproxy (remotename, cu) =
Proxy (fromClusterUUID cu) remotename
findRemoteProxiedClusterNodes :: Annex [Proxy]
findRemoteProxiedClusterNodes = do
myclusters <- (S.fromList . M.elems . annexClusters)
<$> Annex.getGitConfig
clusternodes <- clusterNodeUUIDs <$> getClusters
let isproxiedclusternode r
| isJust (remoteAnnexProxiedBy (R.gitconfig r)) =
case M.lookup (ClusterNodeUUID (R.uuid r)) clusternodes of
Nothing -> False
Just s -> not $ S.null $
S.intersection s myclusters
| otherwise = False
(map asproxy . filter isproxiedclusternode)
<$> R.remoteList
where
asproxy r = Proxy (R.uuid r) (R.name r)

View file

@ -11,6 +11,7 @@ import Command
import Upgrade import Upgrade
import Annex.Version import Annex.Version
import Annex.Init import Annex.Init
import Annex.Startup
cmd :: Command cmd :: Command
cmd = dontCheck cmd = dontCheck
@ -46,6 +47,6 @@ start (UpgradeOptions { autoOnly = True }) =
start _ = start _ =
starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do
whenM (isNothing <$> getVersion) $ do whenM (isNothing <$> getVersion) $ do
initialize Nothing Nothing initialize startupAnnex Nothing Nothing
r <- upgrade False latestVersion r <- upgrade False latestVersion
next $ return r next $ return r

View file

@ -15,6 +15,7 @@ import Logs.Trust
import Logs.Web import Logs.Web
import Remote.Web (getWebUrls) import Remote.Web (getWebUrls)
import Annex.UUID import Annex.UUID
import Annex.NumCopies
import qualified Utility.Format import qualified Utility.Format
import qualified Command.Find import qualified Command.Find
@ -86,7 +87,7 @@ perform o remotemap key ai = do
(untrustedlocations, safelocations) <- trustPartition UnTrusted locations (untrustedlocations, safelocations) <- trustPartition UnTrusted locations
case formatOption o of case formatOption o of
Nothing -> do Nothing -> do
let num = length safelocations let num = numCopiesCount safelocations
showNote $ UnquotedString $ show num ++ " " ++ copiesplural num showNote $ UnquotedString $ show num ++ " " ++ copiesplural num
pp <- ppwhereis "whereis" safelocations urls pp <- ppwhereis "whereis" safelocations urls
unless (null safelocations) $ unless (null safelocations) $

View file

@ -184,12 +184,6 @@ commit commitmode allowempty message branch parentrefs repo =
update' branch sha repo update' branch sha repo
return $ Just sha return $ Just sha
Nothing -> return Nothing Nothing -> return Nothing
where
cancommit tree
| allowempty = return True
| otherwise = case parentrefs of
[p] -> maybe False (tree /=) <$> Git.Ref.tree p repo
_ -> return True
{- Same as commit but without updating any branch. -} {- Same as commit but without updating any branch. -}
commitSha :: CommitMode -> Bool -> String -> [Ref] -> Repo -> IO (Maybe Sha) commitSha :: CommitMode -> Bool -> String -> [Ref] -> Repo -> IO (Maybe Sha)

View file

@ -65,9 +65,13 @@ makeLegalName s = case filter legal $ replace "/" "_" s of
{- Only alphanumerics, and a few common bits of punctuation common {- Only alphanumerics, and a few common bits of punctuation common
- in hostnames. -} - in hostnames. -}
legal '_' = True legal '_' = True
legal '-' = True
legal '.' = True legal '.' = True
legal c = isAlphaNum c legal c = isAlphaNum c
isLegalName :: String -> Bool
isLegalName s = s == makeLegalName s
data RemoteLocation = RemoteUrl String | RemotePath FilePath data RemoteLocation = RemoteUrl String | RemotePath FilePath
deriving (Eq, Show) deriving (Eq, Show)

View file

@ -84,6 +84,9 @@ instance Default ConfigValue where
fromConfigKey :: ConfigKey -> String fromConfigKey :: ConfigKey -> String
fromConfigKey (ConfigKey s) = decodeBS s fromConfigKey (ConfigKey s) = decodeBS s
fromConfigKey' :: ConfigKey -> S.ByteString
fromConfigKey' (ConfigKey s) = s
instance Show ConfigKey where instance Show ConfigKey where
show = fromConfigKey show = fromConfigKey

View file

@ -408,7 +408,7 @@ limitCopies want = case splitc ':' want of
go' n good notpresent key = do go' n good notpresent key = do
us <- filter (`S.notMember` notpresent) us <- filter (`S.notMember` notpresent)
<$> (filterM good =<< Remote.keyLocations key) <$> (filterM good =<< Remote.keyLocations key)
return $ length us >= n return $ numCopiesCount us >= n
checktrust checker u = checker <$> lookupTrust u checktrust checker u = checker <$> lookupTrust u
checkgroup g u = S.member g <$> lookupGroups u checkgroup g u = S.member g <$> lookupGroups u
parsetrustspec s parsetrustspec s
@ -442,7 +442,8 @@ limitLackingCopies desc approx want = case readish want of
MatchingUserInfo {} -> approxNumCopies MatchingUserInfo {} -> approxNumCopies
us <- filter (`S.notMember` notpresent) us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key) <$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ fromNumCopies numcopies - length us >= needed let vs nhave numcopies' = numcopies' - nhave >= needed
return $ numCopiesCheck'' us vs numcopies
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
{- Match keys that are unused. {- Match keys that are unused.

View file

@ -98,6 +98,8 @@ topLevelOldUUIDBasedLogs =
topLevelNewUUIDBasedLogs :: [RawFilePath] topLevelNewUUIDBasedLogs :: [RawFilePath]
topLevelNewUUIDBasedLogs = topLevelNewUUIDBasedLogs =
[ exportLog [ exportLog
, proxyLog
, clusterLog
] ]
{- Other top-level logs. -} {- Other top-level logs. -}
@ -154,6 +156,12 @@ multicastLog = "multicast.log"
exportLog :: RawFilePath exportLog :: RawFilePath
exportLog = "export.log" exportLog = "export.log"
proxyLog :: RawFilePath
proxyLog = "proxy.log"
clusterLog :: RawFilePath
clusterLog = "cluster.log"
{- This is not a log file, it's where exported treeishes get grafted into {- This is not a log file, it's where exported treeishes get grafted into
- the git-annex branch. -} - the git-annex branch. -}
exportTreeGraftPoint :: RawFilePath exportTreeGraftPoint :: RawFilePath

41
Logs/Cluster.hs Normal file
View file

@ -0,0 +1,41 @@
{- git-annex cluster log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, TupleSections #-}
module Logs.Cluster (
module Types.Cluster,
getClusters,
loadClusters,
recordCluster,
) where
import qualified Annex
import Annex.Common
import Types.Cluster
import Logs.Cluster.Basic
import Logs.Trust
import qualified Data.Map as M
import qualified Data.Set as S
getClusters :: Annex Clusters
getClusters = maybe loadClusters return =<< Annex.getState Annex.clusters
{- Loads the clusters and caches it for later.
-
- This takes care of removing dead nodes from clusters,
- to avoid inserting the cluster uuid into the location
- log when only dead nodes contain the content of a key.
-}
loadClusters :: Annex Clusters
loadClusters = do
dead <- (S.fromList . map ClusterNodeUUID)
<$> trustGet DeadTrusted
clusters <- getClustersWith (M.map (`S.difference` dead))
Annex.changeState $ \s -> s { Annex.clusters = Just clusters }
return clusters

91
Logs/Cluster/Basic.hs Normal file
View file

@ -0,0 +1,91 @@
{- git-annex cluster log, basics
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, TupleSections #-}
module Logs.Cluster.Basic (
module Types.Cluster,
getClustersWith,
recordCluster,
) where
import qualified Annex
import Annex.Common
import qualified Annex.Branch
import Types.Cluster
import Logs
import Logs.UUIDBased
import Logs.MapLog
import qualified Data.Set as S
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.ByteString.Lazy as L
{- Gets the clusters. Note that this includes any dead nodes,
- unless a function is provided to remove them.
-}
getClustersWith
:: (M.Map ClusterUUID (S.Set ClusterNodeUUID)
-> M.Map ClusterUUID (S.Set ClusterNodeUUID))
-> Annex Clusters
getClustersWith removedeadnodes = do
m <- removedeadnodes
. convclusteruuids
. M.map value
. fromMapLog
. parseClusterLog
<$> Annex.Branch.get clusterLog
return $ Clusters
{ clusterUUIDs = m
, clusterNodeUUIDs = M.foldlWithKey inverter mempty m
}
where
convclusteruuids :: M.Map UUID (S.Set ClusterNodeUUID) -> M.Map ClusterUUID (S.Set ClusterNodeUUID)
convclusteruuids = M.fromList
. mapMaybe (\(mk, v) -> (, v) <$> mk)
. M.toList . M.mapKeys mkClusterUUID
inverter m k v = M.unionWith (<>) m
(M.fromList (map (, S.singleton k) (S.toList v)))
recordCluster :: ClusterUUID -> S.Set ClusterNodeUUID -> Annex ()
recordCluster clusteruuid nodeuuids = do
-- If a private UUID has been configured as a cluster node,
-- avoid leaking it into the git-annex log.
privateuuids <- annexPrivateRepos <$> Annex.getGitConfig
let nodeuuids' = S.filter
(\(ClusterNodeUUID n) -> S.notMember n privateuuids)
nodeuuids
c <- currentVectorClock
Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $
(buildLogNew buildClusterNodeList)
. changeLog c (fromClusterUUID clusteruuid) nodeuuids'
. parseClusterLog
buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder
buildClusterNodeList = assemble
. map (buildUUID . fromClusterNodeUUID)
. S.toList
where
assemble [] = mempty
assemble (x:[]) = x
assemble (x:y:l) = x <> " " <> assemble (y:l)
parseClusterLog :: L.ByteString -> Log (S.Set ClusterNodeUUID)
parseClusterLog = parseLogNew parseClusterNodeList
parseClusterNodeList :: A.Parser (S.Set ClusterNodeUUID)
parseClusterNodeList = S.fromList <$> many parseword
where
parseword = parsenode
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
parsenode = ClusterNodeUUID
<$> (toUUID <$> A8.takeWhile1 (/= ' '))

View file

@ -22,9 +22,6 @@ module Logs.Export (
getExportExcluded, getExportExcluded,
) where ) where
import qualified Data.Map as M
import qualified Data.ByteString as B
import Annex.Common import Annex.Common
import qualified Annex.Branch import qualified Annex.Branch
import qualified Git import qualified Git
@ -38,6 +35,8 @@ import qualified Git.LsTree
import qualified Git.Tree import qualified Git.Tree
import Annex.UUID import Annex.UUID
import qualified Data.Map as M
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import Data.Either import Data.Either
import Data.Char import Data.Char

View file

@ -8,7 +8,7 @@
- Repositories record their UUID and the date when they --get or --drop - Repositories record their UUID and the date when they --get or --drop
- a value. - a value.
- -
- Copyright 2010-2023 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -41,6 +41,7 @@ import Annex.Common
import qualified Annex.Branch import qualified Annex.Branch
import Logs import Logs
import Logs.Presence import Logs.Presence
import Types.Cluster
import Annex.UUID import Annex.UUID
import Annex.CatFile import Annex.CatFile
import Annex.VectorClock import Annex.VectorClock
@ -49,6 +50,8 @@ import qualified Annex
import Data.Time.Clock import Data.Time.Clock
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
{- Log a change in the presence of a key's value in current repository. -} {- Log a change in the presence of a key's value in current repository. -}
logStatus :: Key -> LogStatus -> Annex () logStatus :: Key -> LogStatus -> Annex ()
@ -66,15 +69,22 @@ logStatusAfter key a = ifM a
, return False , return False
) )
{- Log a change in the presence of a key's value in a repository. -} {- Log a change in the presence of a key's value in a repository.
-
- Cluster UUIDs are not logged. Instead, when a node of a cluster is
- logged to contain a key, loading the log will include the cluster's
- UUID.
-}
logChange :: Key -> UUID -> LogStatus -> Annex () logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key u@(UUID _) s = do logChange key u@(UUID _) s
config <- Annex.getGitConfig | isClusterUUID u = noop
maybeAddLog | otherwise = do
(Annex.Branch.RegardingUUID [u]) config <- Annex.getGitConfig
(locationLogFile config key) maybeAddLog
s (Annex.Branch.RegardingUUID [u])
(LogInfo (fromUUID u)) (locationLogFile config key)
s
(LogInfo (fromUUID u))
logChange _ NoUUID _ = noop logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have {- Returns a list of repository UUIDs that, according to the log, have
@ -97,14 +107,29 @@ loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
{- Parses the content of a log file and gets the locations in it. -} {- Parses the content of a log file and gets the locations in it. -}
parseLoggedLocations :: L.ByteString -> [UUID] parseLoggedLocations :: Clusters -> L.ByteString -> [UUID]
parseLoggedLocations l = map (toUUID . fromLogInfo . info) parseLoggedLocations clusters l = addClusterUUIDs clusters $
(filterPresent (parseLog l)) map (toUUID . fromLogInfo . info)
(filterPresent (parseLog l))
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID] getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
getLoggedLocations getter key = do getLoggedLocations getter key = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
map (toUUID . fromLogInfo) <$> getter (locationLogFile config key) locs <- map (toUUID . fromLogInfo) <$> getter (locationLogFile config key)
clusters <- getClusters
return $ addClusterUUIDs clusters locs
-- Add UUIDs of any clusters whose nodes are in the list.
addClusterUUIDs :: Clusters -> [UUID] -> [UUID]
addClusterUUIDs clusters locs
| M.null clustermap = locs
-- ^ optimisation for common case of no clusters
| otherwise = clusterlocs ++ locs
where
clustermap = clusterNodeUUIDs clusters
clusterlocs = map fromClusterUUID $ S.toList $
S.unions $ mapMaybe findclusters locs
findclusters u = M.lookup (ClusterNodeUUID u) clustermap
{- Is there a location log for the key? True even for keys with no {- Is there a location log for the key? True even for keys with no
- remaining locations. -} - remaining locations. -}
@ -204,6 +229,7 @@ overLocationLogs'
-> Annex v -> Annex v
overLocationLogs' iv discarder keyaction = do overLocationLogs' iv discarder keyaction = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
clusters <- getClusters
let getk = locationLogFileKey config let getk = locationLogFileKey config
let go v reader = reader >>= \case let go v reader = reader >>= \case
@ -214,11 +240,16 @@ overLocationLogs' iv discarder keyaction = do
ifM (checkDead k) ifM (checkDead k)
( go v reader ( go v reader
, do , do
!v' <- keyaction k (maybe [] parseLoggedLocations content) v !v' <- keyaction k (maybe [] (parseLoggedLocations clusters) content) v
go v' reader go v' reader
) )
Nothing -> return v Nothing -> return v
Annex.Branch.overBranchFileContents getk (go iv) >>= \case Annex.Branch.overBranchFileContents getk (go iv) >>= \case
Just r -> return r Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on allu keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
-- Cannot import Logs.Cluster due to a cycle.
-- Annex.clusters gets populated when starting up git-annex.
getClusters :: Annex Clusters
getClusters = fromMaybe noClusters <$> Annex.getState Annex.clusters

88
Logs/Proxy.hs Normal file
View file

@ -0,0 +1,88 @@
{- git-annex proxy log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Logs.Proxy (
Proxy(..),
getProxies,
recordProxies,
) where
import qualified Annex
import Annex.Common
import qualified Annex.Branch
import qualified Git.Remote
import Git.Types
import Logs
import Logs.UUIDBased
import Logs.MapLog
import Annex.UUID
import qualified Data.Set as S
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.ByteString.Lazy as L
data Proxy = Proxy
{ proxyRemoteUUID :: UUID
, proxyRemoteName :: RemoteName
} deriving (Show, Eq, Ord)
getProxies :: Annex (M.Map UUID (S.Set Proxy))
getProxies = M.map (validateProxies . value) . fromMapLog . parseProxyLog
<$> Annex.Branch.get proxyLog
recordProxies :: S.Set Proxy -> Annex ()
recordProxies proxies = do
-- If a private UUID has been configured as a proxy, avoid leaking
-- it into the git-annex log.
privateuuids <- annexPrivateRepos <$> Annex.getGitConfig
let proxies' = S.filter
(\p -> S.notMember (proxyRemoteUUID p) privateuuids) proxies
c <- currentVectorClock
u <- getUUID
Annex.Branch.change (Annex.Branch.RegardingUUID [u]) proxyLog $
(buildLogNew buildProxyList)
. changeLog c u proxies'
. parseProxyLog
buildProxyList :: S.Set Proxy -> Builder
buildProxyList = assemble . map fmt . S.toList
where
fmt p = buildUUID (proxyRemoteUUID p)
<> colon
<> byteString (encodeBS (proxyRemoteName p))
colon = charUtf8 ':'
assemble [] = mempty
assemble (x:[]) = x
assemble (x:y:l) = x <> " " <> assemble (y:l)
parseProxyLog :: L.ByteString -> Log (S.Set Proxy)
parseProxyLog = parseLogNew parseProxyList
parseProxyList :: A.Parser (S.Set Proxy)
parseProxyList = S.fromList <$> many parseword
where
parseword = parseproxy
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
parseproxy = Proxy
<$> (toUUID <$> A8.takeWhile1 (/= colon))
<* (const () <$> A8.char colon)
<*> (decodeBS <$> A8.takeWhile1 (/= ' '))
colon = ':'
-- Filter out any proxies that have a name that is not allowed as a git
-- remote name. This avoids any security problems with eg escape
-- characters in names, and ensures the name can be used anywhere a usual
-- git remote name can be used without causing issues.
validateProxies :: S.Set Proxy -> S.Set Proxy
validateProxies = S.filter $ Git.Remote.isLegalName . proxyRemoteName

View file

@ -2,13 +2,14 @@
- -
- See doc/design/p2p_protocol.mdwn - See doc/design/p2p_protocol.mdwn
- -
- Copyright 2016-2021 Joey Hess <id@joeyh.name> - Copyright 2016-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE DeriveFunctor, TemplateHaskell, FlexibleContexts #-} {-# LANGUAGE DeriveFunctor, TemplateHaskell, FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, RankNTypes #-} {-# LANGUAGE TypeSynonymInstances, FlexibleInstances, RankNTypes #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
module P2P.Protocol where module P2P.Protocol where
@ -37,6 +38,7 @@ import System.IO
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import Data.Char import Data.Char
import Control.Applicative import Control.Applicative
import Prelude import Prelude
@ -54,7 +56,7 @@ defaultProtocolVersion :: ProtocolVersion
defaultProtocolVersion = ProtocolVersion 0 defaultProtocolVersion = ProtocolVersion 0
maxProtocolVersion :: ProtocolVersion maxProtocolVersion :: ProtocolVersion
maxProtocolVersion = ProtocolVersion 1 maxProtocolVersion = ProtocolVersion 2
newtype ProtoAssociatedFile = ProtoAssociatedFile AssociatedFile newtype ProtoAssociatedFile = ProtoAssociatedFile AssociatedFile
deriving (Show) deriving (Show)
@ -65,6 +67,9 @@ data Service = UploadPack | ReceivePack
data Validity = Valid | Invalid data Validity = Valid | Invalid
deriving (Show) deriving (Show)
newtype Bypass = Bypass (S.Set UUID)
deriving (Show, Monoid, Semigroup)
-- | Messages in the protocol. The peer that makes the connection -- | Messages in the protocol. The peer that makes the connection
-- always initiates requests, and the other peer makes responses to them. -- always initiates requests, and the other peer makes responses to them.
@ -85,8 +90,12 @@ data Message
| PUT ProtoAssociatedFile Key | PUT ProtoAssociatedFile Key
| PUT_FROM Offset | PUT_FROM Offset
| ALREADY_HAVE | ALREADY_HAVE
| ALREADY_HAVE_PLUS [UUID]
| SUCCESS | SUCCESS
| SUCCESS_PLUS [UUID]
| FAILURE | FAILURE
| FAILURE_PLUS [UUID]
| BYPASS Bypass
| DATA Len -- followed by bytes of data | DATA Len -- followed by bytes of data
| VALIDITY Validity | VALIDITY Validity
| ERROR String | ERROR String
@ -109,8 +118,12 @@ instance Proto.Sendable Message where
formatMessage (PUT af key) = ["PUT", Proto.serialize af, Proto.serialize key] formatMessage (PUT af key) = ["PUT", Proto.serialize af, Proto.serialize key]
formatMessage (PUT_FROM offset) = ["PUT-FROM", Proto.serialize offset] formatMessage (PUT_FROM offset) = ["PUT-FROM", Proto.serialize offset]
formatMessage ALREADY_HAVE = ["ALREADY-HAVE"] formatMessage ALREADY_HAVE = ["ALREADY-HAVE"]
formatMessage (ALREADY_HAVE_PLUS uuids) = ("ALREADY-HAVE-PLUS":map Proto.serialize uuids)
formatMessage SUCCESS = ["SUCCESS"] formatMessage SUCCESS = ["SUCCESS"]
formatMessage (SUCCESS_PLUS uuids) = ("SUCCESS-PLUS":map Proto.serialize uuids)
formatMessage FAILURE = ["FAILURE"] formatMessage FAILURE = ["FAILURE"]
formatMessage (FAILURE_PLUS uuids) = ("FAILURE-PLUS":map Proto.serialize uuids)
formatMessage (BYPASS (Bypass uuids)) = ("BYPASS":map Proto.serialize (S.toList uuids))
formatMessage (VALIDITY Valid) = ["VALID"] formatMessage (VALIDITY Valid) = ["VALID"]
formatMessage (VALIDITY Invalid) = ["INVALID"] formatMessage (VALIDITY Invalid) = ["INVALID"]
formatMessage (DATA len) = ["DATA", Proto.serialize len] formatMessage (DATA len) = ["DATA", Proto.serialize len]
@ -133,8 +146,12 @@ instance Proto.Receivable Message where
parseCommand "PUT" = Proto.parse2 PUT parseCommand "PUT" = Proto.parse2 PUT
parseCommand "PUT-FROM" = Proto.parse1 PUT_FROM parseCommand "PUT-FROM" = Proto.parse1 PUT_FROM
parseCommand "ALREADY-HAVE" = Proto.parse0 ALREADY_HAVE parseCommand "ALREADY-HAVE" = Proto.parse0 ALREADY_HAVE
parseCommand "ALREADY-HAVE-PLUS" = Proto.parseList ALREADY_HAVE_PLUS
parseCommand "SUCCESS" = Proto.parse0 SUCCESS parseCommand "SUCCESS" = Proto.parse0 SUCCESS
parseCommand "SUCCESS-PLUS" = Proto.parseList SUCCESS_PLUS
parseCommand "FAILURE" = Proto.parse0 FAILURE parseCommand "FAILURE" = Proto.parse0 FAILURE
parseCommand "FAILURE-PLUS" = Proto.parseList FAILURE_PLUS
parseCommand "BYPASS" = Proto.parseList (BYPASS . Bypass . S.fromList)
parseCommand "DATA" = Proto.parse1 DATA parseCommand "DATA" = Proto.parse1 DATA
parseCommand "ERROR" = Proto.parse1 ERROR parseCommand "ERROR" = Proto.parse1 ERROR
parseCommand "VALID" = Proto.parse0 (VALIDITY Valid) parseCommand "VALID" = Proto.parse0 (VALIDITY Valid)
@ -164,12 +181,15 @@ instance Proto.Serializable Service where
-- its serialization cannot contain any whitespace. This is handled -- its serialization cannot contain any whitespace. This is handled
-- by replacing whitespace with '%' (and '%' with '%%') -- by replacing whitespace with '%' (and '%' with '%%')
-- --
-- When deserializing an AssociatedFile from a peer, it's sanitized, -- When deserializing an AssociatedFile from a peer, that escaping is
-- to avoid any unusual characters that might cause problems when it's -- reversed. Unfortunately, an input tab will be deescaped to a space
-- displayed to the user. -- though. And it's sanitized, to avoid any control characters that might
-- cause problems when it's displayed to the user.
-- --
-- These mungings are ok, because a ProtoAssociatedFile is only ever displayed -- These mungings are ok, because a ProtoAssociatedFile is normally
-- to the user and does not need to match a file on disk. -- only displayed to the user and so does not need to match a file on disk.
-- It may also be used in checking preferred content, which is very
-- unlikely to care about spaces vs tabs or control characters.
instance Proto.Serializable ProtoAssociatedFile where instance Proto.Serializable ProtoAssociatedFile where
serialize (ProtoAssociatedFile (AssociatedFile Nothing)) = "" serialize (ProtoAssociatedFile (AssociatedFile Nothing)) = ""
serialize (ProtoAssociatedFile (AssociatedFile (Just af))) = serialize (ProtoAssociatedFile (AssociatedFile (Just af))) =
@ -244,7 +264,7 @@ data LocalF c
| ContentSize Key (Maybe Len -> c) | ContentSize Key (Maybe Len -> c)
-- ^ Gets size of the content of a key, when the full content is -- ^ Gets size of the content of a key, when the full content is
-- present. -- present.
| ReadContent Key AssociatedFile Offset (L.ByteString -> Proto Validity -> Proto Bool) (Bool -> c) | ReadContent Key AssociatedFile Offset (L.ByteString -> Proto Validity -> Proto (Maybe [UUID])) (Maybe [UUID] -> c)
-- ^ Reads the content of a key and sends it to the callback. -- ^ Reads the content of a key and sends it to the callback.
-- Must run the callback, or terminate the protocol connection. -- Must run the callback, or terminate the protocol connection.
-- --
@ -324,6 +344,15 @@ negotiateProtocolVersion preferredversion = do
Just (ERROR _) -> return () Just (ERROR _) -> return ()
_ -> net $ sendMessage (ERROR "expected VERSION") _ -> net $ sendMessage (ERROR "expected VERSION")
sendBypass :: Bypass -> Proto ()
sendBypass bypass@(Bypass s)
| S.null s = return ()
| otherwise = do
ver <- net getProtocolVersion
if ver >= ProtocolVersion 2
then net $ sendMessage (BYPASS bypass)
else return ()
checkPresent :: Key -> Proto Bool checkPresent :: Key -> Proto Bool
checkPresent key = do checkPresent key = do
net $ sendMessage (CHECKPRESENT key) net $ sendMessage (CHECKPRESENT key)
@ -349,10 +378,10 @@ lockContentWhile runproto key a = bracket setup cleanup a
cleanup True = runproto () $ net $ sendMessage UNLOCKCONTENT cleanup True = runproto () $ net $ sendMessage UNLOCKCONTENT
cleanup False = return () cleanup False = return ()
remove :: Key -> Proto Bool remove :: Key -> Proto (Bool, Maybe [UUID])
remove key = do remove key = do
net $ sendMessage (REMOVE key) net $ sendMessage (REMOVE key)
checkSuccess checkSuccessFailurePlus
get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification) get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
get dest key iv af m p = get dest key iv af m p =
@ -362,16 +391,17 @@ get dest key iv af m p =
sizer = fileSize dest sizer = fileSize dest
storer = storeContentTo dest iv storer = storeContentTo dest iv
put :: Key -> AssociatedFile -> MeterUpdate -> Proto Bool put :: Key -> AssociatedFile -> MeterUpdate -> Proto (Maybe [UUID])
put key af p = do put key af p = do
net $ sendMessage (PUT (ProtoAssociatedFile af) key) net $ sendMessage (PUT (ProtoAssociatedFile af) key)
r <- net receiveMessage r <- net receiveMessage
case r of case r of
Just (PUT_FROM offset) -> sendContent key af offset p Just (PUT_FROM offset) -> sendContent key af offset p
Just ALREADY_HAVE -> return True Just ALREADY_HAVE -> return (Just [])
Just (ALREADY_HAVE_PLUS uuids) -> return (Just uuids)
_ -> do _ -> do
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE") net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
return False return Nothing
data ServerHandler a data ServerHandler a
= ServerGot a = ServerGot a
@ -440,8 +470,6 @@ data ServerMode
serveAuthed :: ServerMode -> UUID -> Proto () serveAuthed :: ServerMode -> UUID -> Proto ()
serveAuthed servermode myuuid = void $ serverLoop handler serveAuthed servermode myuuid = void $ serverLoop handler
where where
readonlyerror = net $ sendMessage (ERROR "this repository is read-only; write access denied")
appendonlyerror = net $ sendMessage (ERROR "this repository is append-only; removal denied")
handler (VERSION theirversion) = do handler (VERSION theirversion) = do
let v = min theirversion maxProtocolVersion let v = min theirversion maxProtocolVersion
net $ setProtocolVersion v net $ setProtocolVersion v
@ -459,45 +487,42 @@ serveAuthed servermode myuuid = void $ serverLoop handler
handler (CHECKPRESENT key) = do handler (CHECKPRESENT key) = do
sendSuccess =<< local (checkContentPresent key) sendSuccess =<< local (checkContentPresent key)
return ServerContinue return ServerContinue
handler (REMOVE key) = case servermode of handler (REMOVE key) =
ServeReadWrite -> do checkREMOVEServerMode servermode $ \case
sendSuccess =<< local (removeContent key) Nothing -> do
return ServerContinue sendSuccess =<< local (removeContent key)
ServeAppendOnly -> do return ServerContinue
appendonlyerror Just notallowed -> do
return ServerContinue notallowed
ServeReadOnly -> do return ServerContinue
readonlyerror handler (PUT (ProtoAssociatedFile af) key) =
return ServerContinue checkPUTServerMode servermode $ \case
handler (PUT (ProtoAssociatedFile af) key) = case servermode of Nothing -> handleput af key
ServeReadWrite -> handleput af key Just notallowed -> do
ServeAppendOnly -> handleput af key notallowed
ServeReadOnly -> do return ServerContinue
readonlyerror
return ServerContinue
handler (GET offset (ProtoAssociatedFile af) key) = do handler (GET offset (ProtoAssociatedFile af) key) = do
void $ sendContent key af offset nullMeterUpdate void $ sendContent key af offset nullMeterUpdate
-- setPresent not called because the peer may have -- setPresent not called because the peer may have
-- requested the data but not permanently stored it. -- requested the data but not permanently stored it.
return ServerContinue return ServerContinue
handler (CONNECT service) = do handler (CONNECT service) = do
let goahead = net $ relayService service
case (servermode, service) of
(ServeReadWrite, _) -> goahead
(ServeAppendOnly, UploadPack) -> goahead
-- git protocol could be used to overwrite
-- refs or something, so don't allow
(ServeAppendOnly, ReceivePack) -> readonlyerror
(ServeReadOnly, UploadPack) -> goahead
(ServeReadOnly, ReceivePack) -> readonlyerror
-- After connecting to git, there may be unconsumed data -- After connecting to git, there may be unconsumed data
-- from the git processes hanging around (even if they -- from the git processes hanging around (even if they
-- exited successfully), so stop serving this connection. -- exited successfully), so stop serving this connection.
return $ ServerGot () let endit = return $ ServerGot ()
checkCONNECTServerMode service servermode $ \case
Nothing -> do
net $ relayService service
endit
Just notallowed -> do
notallowed
endit
handler NOTIFYCHANGE = do handler NOTIFYCHANGE = do
refs <- local waitRefChange refs <- local waitRefChange
net $ sendMessage (CHANGED refs) net $ sendMessage (CHANGED refs)
return ServerContinue return ServerContinue
handler (BYPASS _) = return ServerContinue
handler _ = return ServerUnexpected handler _ = return ServerUnexpected
handleput af key = do handleput af key = do
@ -512,7 +537,40 @@ serveAuthed servermode myuuid = void $ serverLoop handler
local $ setPresent key myuuid local $ setPresent key myuuid
return ServerContinue return ServerContinue
sendContent :: Key -> AssociatedFile -> Offset -> MeterUpdate -> Proto Bool sendReadOnlyError :: Proto ()
sendReadOnlyError = net $ sendMessage $
ERROR "this repository is read-only; write access denied"
sendAppendOnlyError :: Proto ()
sendAppendOnlyError = net $ sendMessage $
ERROR "this repository is append-only; removal denied"
checkPUTServerMode :: Monad m => ServerMode -> (Maybe (Proto ()) -> m a) -> m a
checkPUTServerMode servermode a =
case servermode of
ServeReadWrite -> a Nothing
ServeAppendOnly -> a Nothing
ServeReadOnly -> a (Just sendReadOnlyError)
checkREMOVEServerMode :: Monad m => ServerMode -> (Maybe (Proto ()) -> m a) -> m a
checkREMOVEServerMode servermode a =
case servermode of
ServeReadWrite -> a Nothing
ServeAppendOnly -> a (Just sendAppendOnlyError)
ServeReadOnly -> a (Just sendReadOnlyError)
checkCONNECTServerMode :: Monad m => Service -> ServerMode -> (Maybe (Proto ()) -> m a) -> m a
checkCONNECTServerMode service servermode a =
case (servermode, service) of
(ServeReadWrite, _) -> a Nothing
(ServeAppendOnly, UploadPack) -> a Nothing
-- git protocol could be used to overwrite
-- refs or something, so don't allow
(ServeAppendOnly, ReceivePack) -> a (Just sendReadOnlyError)
(ServeReadOnly, UploadPack) -> a Nothing
(ServeReadOnly, ReceivePack) -> a (Just sendReadOnlyError)
sendContent :: Key -> AssociatedFile -> Offset -> MeterUpdate -> Proto (Maybe [UUID])
sendContent key af offset@(Offset n) p = go =<< local (contentSize key) sendContent key af offset@(Offset n) p = go =<< local (contentSize key)
where where
go (Just (Len totallen)) = do go (Just (Len totallen)) = do
@ -531,7 +589,7 @@ sendContent key af offset@(Offset n) p = go =<< local (contentSize key)
ver <- net getProtocolVersion ver <- net getProtocolVersion
when (ver >= ProtocolVersion 1) $ when (ver >= ProtocolVersion 1) $
net . sendMessage . VALIDITY =<< validitycheck net . sendMessage . VALIDITY =<< validitycheck
checkSuccess checkSuccessPlus
receiveContent receiveContent
:: Observable t :: Observable t
@ -579,6 +637,32 @@ checkSuccess = do
net $ sendMessage (ERROR "expected SUCCESS or FAILURE") net $ sendMessage (ERROR "expected SUCCESS or FAILURE")
return False return False
checkSuccessPlus :: Proto (Maybe [UUID])
checkSuccessPlus =
checkSuccessFailurePlus >>= return . \case
(True, v) -> v
(False, _) -> Nothing
checkSuccessFailurePlus :: Proto (Bool, Maybe [UUID])
checkSuccessFailurePlus = do
ver <- net getProtocolVersion
if ver >= ProtocolVersion 2
then do
ack <- net receiveMessage
case ack of
Just SUCCESS -> return (True, Just [])
Just (SUCCESS_PLUS l) -> return (True, Just l)
Just FAILURE -> return (False, Nothing)
Just (FAILURE_PLUS l) -> return (False, Just l)
_ -> do
net $ sendMessage (ERROR "expected SUCCESS or SUCCESS-PLUS or FAILURE or FAILURE-PLUS")
return (False, Nothing)
else do
ok <- checkSuccess
if ok
then return (True, Just [])
else return (False, Nothing)
sendSuccess :: Bool -> Proto () sendSuccess :: Bool -> Proto ()
sendSuccess True = net $ sendMessage SUCCESS sendSuccess True = net $ sendMessage SUCCESS
sendSuccess False = net $ sendMessage FAILURE sendSuccess False = net $ sendMessage FAILURE

576
P2P/Proxy.hs Normal file
View file

@ -0,0 +1,576 @@
{- P2P protocol proxying
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes, FlexibleContexts, ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
module P2P.Proxy where
import Annex.Common
import qualified Annex
import P2P.Protocol
import P2P.IO
import Utility.Metered
import Git.FilePath
import Types.Concurrency
import Annex.Concurrent
import qualified Remote
import Data.Either
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Control.Concurrent.MSem as MSem
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import GHC.Conc
type ProtoCloser = Annex ()
data ClientSide = ClientSide RunState P2PConnection
data RemoteSide = RemoteSide
{ remote :: Remote
, remoteConnect :: Annex (Maybe (RunState, P2PConnection, ProtoCloser))
, remoteTMVar :: TMVar (RunState, P2PConnection, ProtoCloser)
}
mkRemoteSide :: Remote -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide
mkRemoteSide r remoteconnect = RemoteSide
<$> pure r
<*> pure remoteconnect
<*> liftIO (atomically newEmptyTMVar)
runRemoteSide :: RemoteSide -> Proto a -> Annex (Either ProtoFailure a)
runRemoteSide remoteside a =
liftIO (atomically $ tryReadTMVar $ remoteTMVar remoteside) >>= \case
Just (runst, conn, _closer) -> liftIO $ runNetProto runst conn a
Nothing -> remoteConnect remoteside >>= \case
Just (runst, conn, closer) -> do
liftIO $ atomically $ putTMVar
(remoteTMVar remoteside)
(runst, conn, closer)
liftIO $ runNetProto runst conn a
Nothing -> giveup "Unable to connect to remote."
closeRemoteSide :: RemoteSide -> Annex ()
closeRemoteSide remoteside =
liftIO (atomically $ tryReadTMVar $ remoteTMVar remoteside) >>= \case
Just (_, _, closer) -> closer
Nothing -> return ()
{- Selects what remotes to proxy to for top-level P2P protocol
- actions.
- -}
data ProxySelector = ProxySelector
{ proxyCHECKPRESENT :: Key -> Annex (Maybe RemoteSide)
, proxyLOCKCONTENT :: Key -> Annex (Maybe RemoteSide)
, proxyUNLOCKCONTENT :: Annex (Maybe RemoteSide)
, proxyREMOVE :: Key -> Annex [RemoteSide]
-- ^ remove from all of these remotes
, proxyGET :: Key -> Annex (Maybe RemoteSide)
, proxyPUT :: AssociatedFile -> Key -> Annex [RemoteSide]
-- ^ put to some/all of these remotes
}
singleProxySelector :: RemoteSide -> ProxySelector
singleProxySelector r = ProxySelector
{ proxyCHECKPRESENT = const (pure (Just r))
, proxyLOCKCONTENT = const (pure (Just r))
, proxyUNLOCKCONTENT = pure (Just r)
, proxyREMOVE = const (pure [r])
, proxyGET = const (pure (Just r))
, proxyPUT = const (const (pure [r]))
}
{- To keep this module limited to P2P protocol actions,
- all other actions that a proxy needs to do are provided
- here. -}
data ProxyMethods = ProxyMethods
{ removedContent :: UUID -> Key -> Annex ()
-- ^ called when content is removed from a repository
, addedContent :: UUID -> Key -> Annex ()
-- ^ called when content is added to a repository
}
{- Type of function that takes a error handler, which is
- used to handle a ProtoFailure when receiving a message
- from the client or remote.
-}
type ProtoErrorHandled r =
(forall t. ((t -> Annex r) -> Annex (Either ProtoFailure t) -> Annex r)) -> Annex r
{- This is the first thing run when proxying with a client.
- The client has already authenticated. Most clients will send a
- VERSION message, although version 0 clients will not and will send
- some other message, which is returned to handle later.
-
- But before the client will send VERSION, it needs to see AUTH_SUCCESS.
- So send that, although the connection with the remote is not actually
- brought up yet.
-}
getClientProtocolVersion
:: UUID
-> ClientSide
-> (Maybe (ProtocolVersion, Maybe Message) -> Annex r)
-> ProtoErrorHandled r
getClientProtocolVersion remoteuuid (ClientSide clientrunst clientconn) cont protoerrhandler =
protoerrhandler cont $ client $ getClientProtocolVersion' remoteuuid
where
client = liftIO . runNetProto clientrunst clientconn
getClientProtocolVersion'
:: UUID
-> Proto (Maybe (ProtocolVersion, Maybe Message))
getClientProtocolVersion' remoteuuid = do
net $ sendMessage (AUTH_SUCCESS remoteuuid)
msg <- net receiveMessage
case msg of
Nothing -> return Nothing
Just (VERSION v) ->
-- If the client sends a newer version than we
-- understand, reduce it; we need to parse the
-- protocol too.
let v' = min v maxProtocolVersion
in return (Just (v', Nothing))
Just othermsg -> return
(Just (defaultProtocolVersion, Just othermsg))
{- Send negotiated protocol version to the client.
- With a version 0 client, preserves the other protocol message
- received in getClientProtocolVersion. -}
sendClientProtocolVersion
:: ClientSide
-> Maybe Message
-> ProtocolVersion
-> (Maybe Message -> Annex r)
-> ProtoErrorHandled r
sendClientProtocolVersion (ClientSide clientrunst clientconn) othermsg protocolversion cont protoerrhandler =
case othermsg of
Nothing -> protoerrhandler (\() -> cont Nothing) $
client $ net $ sendMessage $ VERSION protocolversion
Just _ -> cont othermsg
where
client = liftIO . runNetProto clientrunst clientconn
{- When speaking to a version 2 client, get the BYPASS message which may be
- sent immediately after VERSION. Returns any other message to be handled
- later. -}
getClientBypass
:: ClientSide
-> ProtocolVersion
-> Maybe Message
-> ((Bypass, Maybe Message) -> Annex r)
-> ProtoErrorHandled r
getClientBypass (ClientSide clientrunst clientconn) (ProtocolVersion protocolversion) Nothing cont protoerrhandler
| protocolversion < 2 = cont (Bypass S.empty, Nothing)
| otherwise = protoerrhandler cont $
client $ net receiveMessage >>= return . \case
Just (BYPASS bypass) -> (bypass, Nothing)
Just othermsg -> (Bypass S.empty, Just othermsg)
Nothing -> (Bypass S.empty, Nothing)
where
client = liftIO . runNetProto clientrunst clientconn
getClientBypass _ _ (Just othermsg) cont _ =
-- Pass along non-BYPASS message from version 0 client.
cont (Bypass S.empty, (Just othermsg))
{- Proxy between the client and the remote. This picks up after
- sendClientProtocolVersion.
-}
proxy
:: Annex r
-> ProxyMethods
-> ServerMode
-> ClientSide
-> UUID
-> ProxySelector
-> ConcurrencyConfig
-> ProtocolVersion
-- ^ Protocol version being spoken between the proxy and the
-- client. When there are multiple remotes, some may speak an
-- earlier version.
-> Maybe Message
-- ^ non-VERSION message that was received from the client when
-- negotiating protocol version, and has not been responded to yet
-> ProtoErrorHandled r
proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remoteuuid proxyselector concurrencyconfig (ProtocolVersion protocolversion) othermsg protoerrhandler = do
case othermsg of
Nothing -> proxynextclientmessage ()
Just message -> proxyclientmessage (Just message)
where
client = liftIO . runNetProto clientrunst clientconn
proxynextclientmessage () = protoerrhandler proxyclientmessage $
client (net receiveMessage)
servermodechecker c a = c servermode $ \case
Nothing -> a
Just notallowed ->
protoerrhandler proxynextclientmessage $
client notallowed
proxyclientmessage Nothing = proxydone
proxyclientmessage (Just message) = case message of
CHECKPRESENT k -> proxyCHECKPRESENT proxyselector k >>= \case
Just remoteside ->
proxyresponse remoteside message
(const proxynextclientmessage)
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
LOCKCONTENT k -> proxyLOCKCONTENT proxyselector k >>= \case
Just remoteside ->
proxyresponse remoteside message
(const proxynextclientmessage)
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
UNLOCKCONTENT -> proxyUNLOCKCONTENT proxyselector >>= \case
Just remoteside ->
proxynoresponse remoteside message
proxynextclientmessage
Nothing -> proxynextclientmessage ()
REMOVE k -> do
remotesides <- proxyREMOVE proxyselector k
servermodechecker checkREMOVEServerMode $
handleREMOVE remotesides k message
GET _ _ k -> proxyGET proxyselector k >>= \case
Just remoteside -> handleGET remoteside message
Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
ERROR "content not present"
PUT paf k -> do
af <- getassociatedfile paf
remotesides <- proxyPUT proxyselector af k
servermodechecker checkPUTServerMode $
handlePUT remotesides k message
BYPASS _ -> proxynextclientmessage ()
-- These messages involve the git repository, not the
-- annex. So they affect the git repository of the proxy,
-- not the remote.
CONNECT service ->
servermodechecker (checkCONNECTServerMode service) $
-- P2P protocol does not continue after
-- relaying from git.
protoerrhandler (\() -> proxydone) $
client $ net $ relayService service
NOTIFYCHANGE -> protoerr
-- Messages that the client should only send after one of
-- the messages above.
SUCCESS -> protoerr
SUCCESS_PLUS _ -> protoerr
FAILURE -> protoerr
FAILURE_PLUS _ -> protoerr
DATA _ -> protoerr
VALIDITY _ -> protoerr
-- If the client errors out, give up.
ERROR msg -> giveup $ "client error: " ++ msg
-- Messages that only the server should send.
CONNECTDONE _ -> protoerr
CHANGED _ -> protoerr
AUTH_SUCCESS _ -> protoerr
AUTH_FAILURE -> protoerr
PUT_FROM _ -> protoerr
ALREADY_HAVE -> protoerr
ALREADY_HAVE_PLUS _ -> protoerr
-- Early messages that the client should not send now.
AUTH _ _ -> protoerr
VERSION _ -> protoerr
-- Send a message to the remote, send its response back to the
-- client, and pass it to the continuation.
proxyresponse remoteside message a =
getresponse (runRemoteSide remoteside) message $ \resp ->
protoerrhandler (a resp) $
client $ net $ sendMessage resp
-- Send a message to the remote, that it will not respond to.
proxynoresponse remoteside message a =
protoerrhandler a $
runRemoteSide remoteside $ net $ sendMessage message
-- Send a message to the endpoint and get back its response.
getresponse endpoint message handleresp =
protoerrhandler (withresp handleresp) $
endpoint $ net $ do
sendMessage message
receiveMessage
withresp a (Just resp) = a resp
-- Whichever of the remote or client the message was read from
-- hung up.
withresp _ Nothing = proxydone
-- Read a message from one party, send it to the other,
-- and then pass the message to the continuation.
relayonemessage from to cont =
flip protoerrhandler (from $ net $ receiveMessage) $
withresp $ \message ->
protoerrhandler (cont message) $
to $ net $ sendMessage message
protoerr = do
_ <- client $ net $ sendMessage (ERROR "protocol error")
giveup "protocol error"
handleREMOVE [] _ _ =
-- When no places are provided to remove from,
-- don't report a successful remote.
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
handleREMOVE remotesides k message = do
v <- forMC concurrencyconfig remotesides $ \r ->
runRemoteSideOrSkipFailed r $ do
net $ sendMessage message
net receiveMessage >>= return . \case
Just SUCCESS ->
Just (True, [Remote.uuid (remote r)])
Just (SUCCESS_PLUS us) ->
Just (True, Remote.uuid (remote r):us)
Just FAILURE ->
Just (False, [])
Just (FAILURE_PLUS us) ->
Just (False, us)
_ -> Nothing
let v' = map join v
let us = concatMap snd $ catMaybes v'
mapM_ (\u -> removedContent proxymethods u k) us
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
let nonplussed = all (== remoteuuid) us
|| protocolversion < 2
in if all (maybe False fst) v'
then if nonplussed
then SUCCESS
else SUCCESS_PLUS us
else if nonplussed
then FAILURE
else FAILURE_PLUS us
handleGET remoteside message = getresponse (runRemoteSide remoteside) message $
withDATA (relayGET remoteside)
handlePUT (remoteside:[]) k message
| Remote.uuid (remote remoteside) == remoteuuid =
getresponse (runRemoteSide remoteside) message $ \resp -> case resp of
ALREADY_HAVE -> protoerrhandler proxynextclientmessage $
client $ net $ sendMessage resp
ALREADY_HAVE_PLUS _ -> protoerrhandler proxynextclientmessage $
client $ net $ sendMessage resp
PUT_FROM _ ->
getresponse client resp $
withDATA (relayPUT remoteside k)
_ -> protoerr
handlePUT [] _ _ =
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage ALREADY_HAVE
handlePUT remotesides k message =
handlePutMulti remotesides k message
withDATA a message@(DATA len) = a len message
withDATA _ _ = protoerr
relayGET remoteside len = relayDATAStart client $
relayDATACore len (runRemoteSide remoteside) client $
relayDATAFinish (runRemoteSide remoteside) client $
relayonemessage client (runRemoteSide remoteside) $
const proxynextclientmessage
relayPUT remoteside k len = relayDATAStart (runRemoteSide remoteside) $
relayDATACore len client (runRemoteSide remoteside) $
relayDATAFinish client (runRemoteSide remoteside) $
relayonemessage (runRemoteSide remoteside) client finished
where
finished resp () = do
void $ relayPUTRecord k remoteside resp
proxynextclientmessage ()
relayPUTRecord k remoteside SUCCESS = do
addedContent proxymethods (Remote.uuid (remote remoteside)) k
return $ Just [Remote.uuid (remote remoteside)]
relayPUTRecord k remoteside (SUCCESS_PLUS us) = do
let us' = (Remote.uuid (remote remoteside)) : us
forM_ us' $ \u ->
addedContent proxymethods u k
return $ Just us'
relayPUTRecord _ _ _ =
return Nothing
handlePutMulti remotesides k message = do
let initiate remoteside = do
resp <- runRemoteSideOrSkipFailed remoteside $ net $ do
sendMessage message
receiveMessage
case resp of
Just (Just (PUT_FROM (Offset offset))) ->
return $ Right $
Right (remoteside, offset)
Just (Just ALREADY_HAVE) ->
return $ Right $ Left remoteside
Just (Just _) -> protoerr
Just Nothing -> return (Left ())
Nothing -> return (Left ())
let alreadyhave = \case
Right (Left _) -> True
_ -> False
l <- forMC concurrencyconfig remotesides initiate
if all alreadyhave l
then if protocolversion < 2
then protoerrhandler proxynextclientmessage $
client $ net $ sendMessage ALREADY_HAVE
else protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $ ALREADY_HAVE_PLUS $
filter (/= remoteuuid) $
map (Remote.uuid . remote) (lefts (rights l))
else if null (rights l)
-- no response from any remote
then proxydone
else do
let l' = rights (rights l)
let minoffset = minimum (map snd l')
getresponse client (PUT_FROM (Offset minoffset)) $
withDATA (relayPUTMulti minoffset l' k)
relayPUTMulti minoffset remotes k (Len datalen) _ = do
let totallen = datalen + minoffset
-- Tell each remote how much data to expect, depending
-- on the remote's offset.
rs <- forMC concurrencyconfig remotes $ \r@(remoteside, remoteoffset) ->
runRemoteSideOrSkipFailed remoteside $ do
net $ sendMessage $ DATA $ Len $
totallen - remoteoffset
return r
protoerrhandler (send (catMaybes rs) minoffset) $
client $ net $ receiveBytes (Len datalen) nullMeterUpdate
where
chunksize = fromIntegral defaultChunkSize
-- Stream the lazy bytestring out to the remotes in chunks.
-- Only start sending to a remote once past its desired
-- offset.
send rs n b = do
let (chunk, b') = L.splitAt chunksize b
let chunklen = fromIntegral (L.length chunk)
let !n' = n + chunklen
rs' <- forMC concurrencyconfig rs $ \r@(remoteside, remoteoffset) ->
if n >= remoteoffset
then runRemoteSideOrSkipFailed remoteside $ do
net $ sendBytes (Len chunklen) chunk nullMeterUpdate
return r
else if (n' > remoteoffset)
then do
let chunkoffset = remoteoffset - n
let subchunklen = chunklen - chunkoffset
let subchunk = L.drop (fromIntegral chunkoffset) chunk
runRemoteSideOrSkipFailed remoteside $ do
net $ sendBytes (Len subchunklen) subchunk nullMeterUpdate
return r
else return (Just r)
if L.null b'
then sent (catMaybes rs')
else send (catMaybes rs') n' b'
sent [] = proxydone
sent rs = relayDATAFinishMulti k (map fst rs)
runRemoteSideOrSkipFailed remoteside a =
runRemoteSide remoteside a >>= \case
Right v -> return (Just v)
Left _ -> do
-- This connection to the remote is
-- unrecoverable at this point, so close it.
closeRemoteSide remoteside
return Nothing
relayDATAStart x receive message =
protoerrhandler (\() -> receive) $
x $ net $ sendMessage message
relayDATACore len x y a = protoerrhandler send $
x $ net $ receiveBytes len nullMeterUpdate
where
send b = protoerrhandler a $
y $ net $ sendBytes len b nullMeterUpdate
relayDATAFinish x y sendsuccessfailure ()
| protocolversion == 0 = sendsuccessfailure
-- Protocol version 1 has a VALID or
-- INVALID message after the data.
| otherwise = relayonemessage x y (\_ () -> sendsuccessfailure)
relayDATAFinishMulti k rs
| protocolversion == 0 =
finish $ net receiveMessage
| otherwise =
flip protoerrhandler (client $ net $ receiveMessage) $
withresp $ \message ->
finish $ do
-- Relay VALID or INVALID message
-- only to remotes that support
-- protocol version 1.
net getProtocolVersion >>= \case
ProtocolVersion 0 -> return ()
_ -> net $ sendMessage message
net receiveMessage
where
finish a = do
storeduuids <- forMC concurrencyconfig rs $ \r ->
runRemoteSideOrSkipFailed r a >>= \case
Just (Just resp) ->
relayPUTRecord k r resp
_ -> return Nothing
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
case concat (catMaybes storeduuids) of
[] -> FAILURE
us
| protocolversion < 2 -> SUCCESS
| otherwise -> SUCCESS_PLUS us
-- The associated file received from the P2P protocol
-- is relative to the top of the git repository. But this process
-- may be running with a different cwd.
getassociatedfile (ProtoAssociatedFile (AssociatedFile (Just f))) =
AssociatedFile . Just
<$> fromRepo (fromTopFilePath (asTopFilePath f))
getassociatedfile (ProtoAssociatedFile (AssociatedFile Nothing)) =
return $ AssociatedFile Nothing
data ConcurrencyConfig = ConcurrencyConfig Int (MSem.MSem Int)
noConcurrencyConfig :: Annex ConcurrencyConfig
noConcurrencyConfig = liftIO $ ConcurrencyConfig 1 <$> MSem.new 1
getConcurrencyConfig :: Annex ConcurrencyConfig
getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case
NonConcurrent -> noConcurrencyConfig
Concurrent n -> go n
ConcurrentPerCpu -> go =<< liftIO getNumProcessors
where
go n = do
c <- liftIO getNumCapabilities
when (n > c) $
liftIO $ setNumCapabilities n
setConcurrency (ConcurrencyGitConfig (Concurrent n))
msem <- liftIO $ MSem.new n
return (ConcurrencyConfig n msem)
forMC :: ConcurrencyConfig -> [a] -> (a -> Annex b) -> Annex [b]
forMC _ (x:[]) a = do
r <- a x
return [r]
forMC (ConcurrencyConfig n msem) xs a
| n < 2 = forM xs a
| otherwise = do
runners <- forM xs $ \x ->
forkState $ bracketIO
(MSem.wait msem)
(const $ MSem.signal msem)
(const $ a x)
mapM id =<< liftIO (forConcurrently runners id)

View file

@ -342,11 +342,12 @@ remoteLocations (IncludeIgnored ii) locations trusted = do
{- Displays known locations of a key and helps the user take action {- Displays known locations of a key and helps the user take action
- to make them accessible. -} - to make them accessible. -}
showLocations :: Bool -> Key -> [UUID] -> String -> Annex () showLocations :: Bool -> Key -> (UUID -> Annex Bool) -> String -> Annex ()
showLocations separateuntrusted key exclude nolocmsg = do showLocations separateuntrusted key checkexclude nolocmsg = do
u <- getUUID u <- getUUID
remotes <- remoteList remotes <- remoteList
uuids <- keyLocations key uuids <- keyLocations key
exclude <- filterM checkexclude uuids
untrusteduuids <- if separateuntrusted untrusteduuids <- if separateuntrusted
then trustGet UnTrusted then trustGet UnTrusted
else pure [] else pure []
@ -447,11 +448,14 @@ claimingUrl' remotefilter url = do
where where
checkclaim = maybe (pure False) (`id` url) . claimUrl checkclaim = maybe (pure False) (`id` url) . claimUrl
{- Is this a remote of a type we can sync with, or a special remote {- Is this a remote of a type that git pull and push work with?
- with an annex:: url configured? -} - That includes special remotes with an annex:: url configured.
- It does not include proxied remotes. -}
gitSyncableRemote :: Remote -> Bool gitSyncableRemote :: Remote -> Bool
gitSyncableRemote r gitSyncableRemote r
| gitSyncableRemoteType (remotetype r) = True | gitSyncableRemoteType (remotetype r)
&& isJust (remoteUrl (gitconfig r)) =
not (isJust (remoteAnnexProxiedBy (gitconfig r)))
| otherwise = case remoteUrl (gitconfig r) of | otherwise = case remoteUrl (gitconfig r) of
Just u | "annex::" `isPrefixOf` u -> True Just u | "annex::" `isPrefixOf` u -> True
_ -> False _ -> False

View file

@ -1,6 +1,6 @@
{- Standard git remotes. {- Standard git remotes.
- -
- Copyright 2011-2023 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -25,7 +25,6 @@ import qualified Git.Command
import qualified Git.GCrypt import qualified Git.GCrypt
import qualified Git.Types as Git import qualified Git.Types as Git
import qualified Annex import qualified Annex
import Logs.Presence
import Annex.Transfer import Annex.Transfer
import Annex.CopyFile import Annex.CopyFile
import Annex.Verify import Annex.Verify
@ -45,6 +44,8 @@ import Annex.Init
import Types.CleanupActions import Types.CleanupActions
import qualified CmdLine.GitAnnexShell.Fields as Fields import qualified CmdLine.GitAnnexShell.Fields as Fields
import Logs.Location import Logs.Location
import Logs.Proxy
import Logs.Cluster.Basic
import Utility.Metered import Utility.Metered
import Utility.Env import Utility.Env
import Utility.Batch import Utility.Batch
@ -66,7 +67,8 @@ import Messages.Progress
import Control.Concurrent import Control.Concurrent
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString as S import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import Network.URI import Network.URI
@ -92,7 +94,13 @@ list :: Bool -> Annex [Git.Repo]
list autoinit = do list autoinit = do
c <- fromRepo Git.config c <- fromRepo Git.config
rs <- mapM (tweakurl c) =<< Annex.getGitRemotes rs <- mapM (tweakurl c) =<< Annex.getGitRemotes
mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs) rs' <- mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs)
proxies <- doQuietAction getProxies
if proxies == mempty
then return rs'
else do
proxied <- listProxied proxies rs'
return (proxied++rs')
where where
annexurl r = remoteConfig r "annexurl" annexurl r = remoteConfig r "annexurl"
tweakurl c r = do tweakurl c r = do
@ -168,6 +176,7 @@ configRead autoinit r = do
Just r' -> return r' Just r' -> return r'
_ -> return r _ -> return r
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs gen r u rc gc rs
-- Remote.GitLFS may be used with a repo that is also encrypted -- Remote.GitLFS may be used with a repo that is also encrypted
@ -178,10 +187,9 @@ gen r u rc gc rs
Nothing -> do Nothing -> do
st <- mkState r u gc st <- mkState r u gc
c <- parsedRemoteConfig remote rc c <- parsedRemoteConfig remote rc
go st c <$> remoteCost gc c defcst go st c <$> remoteCost gc c (defaultRepoCost r)
Just addr -> Remote.P2P.chainGen addr r u rc gc rs Just addr -> Remote.P2P.chainGen addr r u rc gc rs
where where
defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost
go st c cst = Just new go st c cst = Just new
where where
new = Remote new = Remote
@ -221,6 +229,11 @@ gen r u rc gc rs
, remoteStateHandle = rs , remoteStateHandle = rs
} }
defaultRepoCost :: Git.Repo -> Cost
defaultRepoCost r
| repoCheap r = cheapRemoteCost
| otherwise = expensiveRemoteCost
unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
unavailable r = gen r' unavailable r = gen r'
where where
@ -265,7 +278,7 @@ tryGitConfigRead autoinit r hasuuid
v <- liftIO $ Git.Config.fromPipe r cmd params st v <- liftIO $ Git.Config.fromPipe r cmd params st
case v of case v of
Right (r', val, _err) -> do Right (r', val, _err) -> do
unless (isUUIDConfigured r' || S.null val || not mustincludeuuuid) $ do unless (isUUIDConfigured r' || val == mempty || not mustincludeuuuid) $ do
warning $ UnquotedString $ "Failed to get annex.uuid configuration of repository " ++ Git.repoDescribe r warning $ UnquotedString $ "Failed to get annex.uuid configuration of repository " ++ Git.repoDescribe r
warning $ UnquotedString $ "Instead, got: " ++ show val warning $ UnquotedString $ "Instead, got: " ++ show val
warning "This is unexpected; please check the network transport!" warning "This is unexpected; please check the network transport!"
@ -338,7 +351,7 @@ tryGitConfigRead autoinit r hasuuid
readlocalannexconfig = do readlocalannexconfig = do
let check = do let check = do
Annex.BranchState.disableUpdate Annex.BranchState.disableUpdate
catchNonAsync (autoInitialize (pure [])) $ \e -> catchNonAsync (autoInitialize noop (pure [])) $ \e ->
warning $ UnquotedString $ "Remote " ++ Git.repoDescribe r ++ warning $ UnquotedString $ "Remote " ++ Git.repoDescribe r ++
": " ++ show e ": " ++ show e
Annex.getState Annex.repo Annex.getState Annex.repo
@ -442,7 +455,8 @@ dropKey' repo r st@(State connpool duc _ _ _) key
, giveup "remote does not have expected annex.uuid value" , giveup "remote does not have expected annex.uuid value"
) )
| Git.repoIsHttp repo = giveup "dropping from http remote not supported" | Git.repoIsHttp repo = giveup "dropping from http remote not supported"
| otherwise = P2PHelper.remove (Ssh.runProto r connpool (return False)) key | otherwise = P2PHelper.remove (uuid r)
(Ssh.runProto r connpool (return (False, Nothing))) key
lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey r st key callback = do lockKey r st key callback = do
@ -464,7 +478,7 @@ lockKey' repo r st@(State connpool duc _ _ _) key callback
) )
| Git.repoIsSsh repo = do | Git.repoIsSsh repo = do
showLocking r showLocking r
let withconn = Ssh.withP2PSshConnection r connpool failedlock let withconn = Ssh.withP2PShellConnection r connpool failedlock
P2PHelper.lock withconn Ssh.runProtoConn (uuid r) key callback P2PHelper.lock withconn Ssh.runProtoConn (uuid r) key callback
| otherwise = failedlock | otherwise = failedlock
where where
@ -542,8 +556,8 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
, giveup "remote does not have expected annex.uuid value" , giveup "remote does not have expected annex.uuid value"
) )
| Git.repoIsSsh repo = | Git.repoIsSsh repo =
P2PHelper.store (gitconfig r) P2PHelper.store (uuid r) (gitconfig r)
(Ssh.runProto r connpool (return False)) (Ssh.runProto r connpool (return Nothing))
key file meterupdate key file meterupdate
| otherwise = giveup "copying to non-ssh repo not supported" | otherwise = giveup "copying to non-ssh repo not supported"
@ -594,7 +608,7 @@ repairRemote r a = return $ do
s <- Annex.new r s <- Annex.new r
Annex.eval s $ do Annex.eval s $ do
Annex.BranchState.disableUpdate Annex.BranchState.disableUpdate
ensureInitialized (pure []) ensureInitialized noop (pure [])
a `finally` quiesce True a `finally` quiesce True
data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)]) data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)])
@ -638,7 +652,7 @@ onLocal' (LocalRemoteAnnex repo mv) a = liftIO (takeMVar mv) >>= \case
[] -> do [] -> do
liftIO $ putMVar mv [] liftIO $ putMVar mv []
v <- newLocal repo v <- newLocal repo
go (v, ensureInitialized (pure []) >> a) go (v, ensureInitialized noop (pure []) >> a)
(v:rest) -> do (v:rest) -> do
liftIO $ putMVar mv rest liftIO $ putMVar mv rest
go (v, a) go (v, a)
@ -725,7 +739,7 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
- This returns False when the repository UUID is not as expected. -} - This returns False when the repository UUID is not as expected. -}
type DeferredUUIDCheck = Annex Bool type DeferredUUIDCheck = Annex Bool
data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
getRepoFromState :: State -> Annex Git.Repo getRepoFromState :: State -> Annex Git.Repo
getRepoFromState (State _ _ _ a _) = fst <$> a getRepoFromState (State _ _ _ a _) = fst <$> a
@ -738,7 +752,7 @@ getGitConfigFromState (State _ _ _ a _) = snd <$> a
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
mkState r u gc = do mkState r u gc = do
pool <- Ssh.mkP2PSshConnectionPool pool <- Ssh.mkP2PShellConnectionPool
copycowtried <- liftIO newCopyCoWTried copycowtried <- liftIO newCopyCoWTried
lra <- mkLocalRemoteAnnex r lra <- mkLocalRemoteAnnex r
(duc, getrepo) <- go (duc, getrepo) <- go
@ -772,3 +786,122 @@ mkState r u gc = do
) )
return (duc, getrepo) return (duc, getrepo)
listProxied :: M.Map UUID (S.Set Proxy) -> [Git.Repo] -> Annex [Git.Repo]
listProxied proxies rs = concat <$> mapM go rs
where
go r = do
g <- Annex.gitRepo
u <- getRepoUUID r
gc <- Annex.getRemoteGitConfig r
let cu = fromMaybe u $ remoteAnnexConfigUUID gc
if not (canproxy gc r) || cu == NoUUID
then pure []
else case M.lookup cu proxies of
Nothing -> pure []
Just proxied -> catMaybes
<$> mapM (mkproxied g r gc proxied)
(S.toList proxied)
proxiedremotename r p = do
n <- Git.remoteName r
pure $ n ++ "-" ++ proxyRemoteName p
mkproxied g r gc proxied p = case proxiedremotename r p of
Nothing -> pure Nothing
Just proxyname -> mkproxied' g r gc proxied p proxyname
-- The proxied remote is constructed by renaming the proxy remote,
-- changing its uuid, and setting the proxied remote's inherited
-- configs and uuid in Annex state.
mkproxied' g r gc proxied p proxyname
| any isconfig (M.keys (Git.config g)) = pure Nothing
| otherwise = do
clusters <- getClustersWith id
-- Not using addGitConfigOverride for inherited
-- configs, because child git processes do not
-- need them to be provided with -c.
Annex.adjustGitRepo (pure . annexconfigadjuster clusters)
return $ Just $ renamedr
where
renamedr =
let c = adduuid configkeyUUID $
Git.fullconfig r
in r
{ Git.remoteName = Just proxyname
, Git.config = M.map Prelude.head c
, Git.fullconfig = c
}
annexconfigadjuster clusters r' =
let c = adduuid (configRepoUUID renamedr) $
addurl $
addproxiedby $
adjustclusternode clusters $
inheritconfigs $ Git.fullconfig r'
in r'
{ Git.config = M.map Prelude.head c
, Git.fullconfig = c
}
adduuid ck = M.insert ck
[Git.ConfigValue $ fromUUID $ proxyRemoteUUID p]
addurl = M.insert (remoteConfig renamedr (remoteGitConfigKey UrlField))
[Git.ConfigValue $ encodeBS $ Git.repoLocation r]
addproxiedby = case remoteAnnexUUID gc of
Just u -> addremoteannexfield ProxiedByField
[Git.ConfigValue $ fromUUID u]
Nothing -> id
-- A node of a cluster that is being proxied along with
-- that cluster does not need to be synced with
-- by default, because syncing with the cluster will
-- effectively sync with all of its nodes.
--
-- Also, give it a slightly higher cost than the
-- cluster by default, to encourage using the cluster.
adjustclusternode clusters =
case M.lookup (ClusterNodeUUID (proxyRemoteUUID p)) (clusterNodeUUIDs clusters) of
Just cs
| any (\c -> S.member (fromClusterUUID c) proxieduuids) (S.toList cs) ->
addremoteannexfield SyncField
[Git.ConfigValue $ Git.Config.boolConfig' False]
. addremoteannexfield CostField
[Git.ConfigValue $ encodeBS $ show $ defaultRepoCost r + 0.1]
_ -> id
proxieduuids = S.map proxyRemoteUUID proxied
addremoteannexfield f = M.insert
(remoteAnnexConfig renamedr (remoteGitConfigKey f))
inheritconfigs c = foldl' inheritconfig c proxyInheritedFields
inheritconfig c k = case (M.lookup dest c, M.lookup src c) of
(Nothing, Just v) -> M.insert dest v c
_ -> c
where
src = remoteAnnexConfig r k
dest = remoteAnnexConfig renamedr k
-- When the git config has anything set for a remote,
-- avoid making a proxied remote with the same name.
-- It is possible to set git configs of proxies, but it
-- needs both the url and uuid config to be manually set.
isconfig (Git.ConfigKey configkey) =
proxyconfigprefix `B.isPrefixOf` configkey
where
Git.ConfigKey proxyconfigprefix = remoteConfig proxyname mempty
-- Git remotes that are gcrypt or git-lfs special remotes cannot
-- proxy. Local git remotes cannot proxy either because
-- git-annex-shell is not used to access a local git url.
-- Proxing is also yet supported for remotes using P2P
-- addresses.
canproxy gc r
| remoteAnnexGitLFS gc = False
| Git.GCrypt.isEncrypted r = False
| Git.repoIsLocal r || Git.repoIsLocalUnknown r = False
| otherwise = isNothing (repoP2PAddress r)

View file

@ -64,5 +64,7 @@ gitRepoInfo r = do
repo <- Remote.getRepo r repo <- Remote.getRepo r
return return
[ ("repository location", Git.repoLocation repo) [ ("repository location", Git.repoLocation repo)
, ("proxied", Git.Config.boolConfig
(isJust (remoteAnnexProxiedBy (Remote.gitconfig r))))
, ("last synced", lastsynctime) , ("last synced", lastsynctime)
] ]

View file

@ -19,6 +19,7 @@ import Utility.Metered
import Utility.Tuple import Utility.Tuple
import Types.NumCopies import Types.NumCopies
import Annex.Verify import Annex.Verify
import Logs.Location
import Control.Concurrent import Control.Concurrent
@ -32,14 +33,20 @@ type ProtoConnRunner c = forall a. P2P.Proto a -> ClosableConnection c -> Annex
-- the pool when done. -- the pool when done.
type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) -> Annex a type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) -> Annex a
store :: RemoteGitConfig -> ProtoRunner Bool -> Key -> AssociatedFile -> MeterUpdate -> Annex () store :: UUID -> RemoteGitConfig -> ProtoRunner (Maybe [UUID]) -> Key -> AssociatedFile -> MeterUpdate -> Annex ()
store gc runner k af p = do store remoteuuid gc runner k af p = do
let sizer = KeySizer k (fmap (toRawFilePath . fst3) <$> prepSendAnnex k) let sizer = KeySizer k (fmap (toRawFilePath . fst3) <$> prepSendAnnex k)
let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc let bwlimit = remoteAnnexBwLimitUpload gc <|> remoteAnnexBwLimit gc
metered (Just p) sizer bwlimit $ \_ p' -> metered (Just p) sizer bwlimit $ \_ p' ->
runner (P2P.put k af p') >>= \case runner (P2P.put k af p') >>= \case
Just True -> return () Just (Just fanoutuuids) -> do
Just False -> giveup "Transfer failed" -- Storing on the remote can cause it
-- to be stored on additional UUIDs,
-- so record those.
forM_ fanoutuuids $ \u ->
when (u /= remoteuuid) $
logChange k u InfoPresent
Just Nothing -> giveup "Transfer failed"
Nothing -> remoteUnavail Nothing -> remoteUnavail
retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification retrieve :: RemoteGitConfig -> (ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
@ -52,11 +59,20 @@ retrieve gc runner k af dest p verifyconfig = do
Just (False, _) -> giveup "Transfer failed" Just (False, _) -> giveup "Transfer failed"
Nothing -> remoteUnavail Nothing -> remoteUnavail
remove :: ProtoRunner Bool -> Key -> Annex () remove :: UUID -> ProtoRunner (Bool, Maybe [UUID]) -> Key -> Annex ()
remove runner k = runner (P2P.remove k) >>= \case remove remoteuuid runner k = runner (P2P.remove k) >>= \case
Just True -> return () Just (True, alsoremoveduuids) -> note alsoremoveduuids
Just False -> giveup "removing content from remote failed" Just (False, alsoremoveduuids) -> do
note alsoremoveduuids
giveup "removing content from remote failed"
Nothing -> remoteUnavail Nothing -> remoteUnavail
where
-- The remote reports removal from other UUIDs than its own,
-- so record those.
note alsoremoveduuids =
forM_ (fromMaybe [] alsoremoveduuids) $ \u ->
when (u /= remoteuuid) $
logChange k u InfoMissing
checkpresent :: ProtoRunner Bool -> Key -> Annex Bool checkpresent :: ProtoRunner Bool -> Key -> Annex Bool
checkpresent runner k = maybe remoteUnavail return =<< runner (P2P.checkPresent k) checkpresent runner k = maybe remoteUnavail return =<< runner (P2P.checkPresent k)

View file

@ -180,67 +180,78 @@ rsyncParams r direction = do
| otherwise = remoteAnnexRsyncUploadOptions gc | otherwise = remoteAnnexRsyncUploadOptions gc
gc = gitconfig r gc = gitconfig r
-- A connection over ssh to git-annex shell speaking the P2P protocol. -- A connection over ssh or locally to git-annex shell,
type P2PSshConnection = P2P.ClosableConnection -- speaking the P2P protocol.
type P2PShellConnection = P2P.ClosableConnection
(P2P.RunState, P2P.P2PConnection, ProcessHandle) (P2P.RunState, P2P.P2PConnection, ProcessHandle)
closeP2PSshConnection :: P2PSshConnection -> IO (P2PSshConnection, Maybe ExitCode) closeP2PShellConnection :: P2PShellConnection -> IO (P2PShellConnection, Maybe ExitCode)
closeP2PSshConnection P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing) closeP2PShellConnection P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing)
closeP2PSshConnection (P2P.OpenConnection (_st, conn, pid)) = closeP2PShellConnection (P2P.OpenConnection (_st, conn, pid)) =
-- mask async exceptions, avoid cleanup being interrupted -- mask async exceptions, avoid cleanup being interrupted
uninterruptibleMask_ $ do uninterruptibleMask_ $ do
P2P.closeConnection conn P2P.closeConnection conn
exitcode <- waitForProcess pid exitcode <- waitForProcess pid
return (P2P.ClosedConnection, Just exitcode) return (P2P.ClosedConnection, Just exitcode)
-- Pool of connections over ssh to git-annex-shell p2pstdio. -- Pool of connections to git-annex-shell p2pstdio.
type P2PSshConnectionPool = TVar (Maybe P2PSshConnectionPoolState) type P2PShellConnectionPool = TVar (Maybe P2PShellConnectionPoolState)
data P2PSshConnectionPoolState data P2PShellConnectionPoolState
= P2PSshConnections [P2PSshConnection] = P2PShellConnections [P2PShellConnection]
-- Remotes using an old version of git-annex-shell don't support P2P -- Remotes using an old version of git-annex-shell don't support P2P
| P2PSshUnsupported | P2PShellUnsupported
mkP2PSshConnectionPool :: Annex P2PSshConnectionPool mkP2PShellConnectionPool :: Annex P2PShellConnectionPool
mkP2PSshConnectionPool = liftIO $ newTVarIO Nothing mkP2PShellConnectionPool = liftIO $ newTVarIO Nothing
-- Takes a connection from the pool, if any are available, otherwise -- Takes a connection from the pool, if any are available, otherwise
-- tries to open a new one. -- tries to open a new one.
getP2PSshConnection :: Remote -> P2PSshConnectionPool -> Annex (Maybe P2PSshConnection) getP2PShellConnection :: Remote -> P2PShellConnectionPool -> Annex (Maybe P2PShellConnection)
getP2PSshConnection r connpool = getexistingconn >>= \case getP2PShellConnection r connpool = getexistingconn >>= \case
Nothing -> return Nothing Nothing -> return Nothing
Just Nothing -> openP2PSshConnection r connpool Just Nothing -> openP2PShellConnection r connpool
Just (Just c) -> return (Just c) Just (Just c) -> return (Just c)
where where
getexistingconn = liftIO $ atomically $ readTVar connpool >>= \case getexistingconn = liftIO $ atomically $ readTVar connpool >>= \case
Just P2PSshUnsupported -> return Nothing Just P2PShellUnsupported -> return Nothing
Just (P2PSshConnections (c:cs)) -> do Just (P2PShellConnections (c:cs)) -> do
writeTVar connpool (Just (P2PSshConnections cs)) writeTVar connpool (Just (P2PShellConnections cs))
return (Just (Just c)) return (Just (Just c))
Just (P2PSshConnections []) -> return (Just Nothing) Just (P2PShellConnections []) -> return (Just Nothing)
Nothing -> return (Just Nothing) Nothing -> return (Just Nothing)
-- Add a connection to the pool, unless it's closed. -- Add a connection to the pool, unless it's closed.
storeP2PSshConnection :: P2PSshConnectionPool -> P2PSshConnection -> IO () storeP2PShellConnection :: P2PShellConnectionPool -> P2PShellConnection -> IO ()
storeP2PSshConnection _ P2P.ClosedConnection = return () storeP2PShellConnection _ P2P.ClosedConnection = return ()
storeP2PSshConnection connpool conn = atomically $ modifyTVar' connpool $ \case storeP2PShellConnection connpool conn = atomically $ modifyTVar' connpool $ \case
Just (P2PSshConnections cs) -> Just (P2PSshConnections (conn:cs)) Just (P2PShellConnections cs) -> Just (P2PShellConnections (conn:cs))
_ -> Just (P2PSshConnections [conn]) _ -> Just (P2PShellConnections [conn])
-- Try to open a P2PSshConnection. -- Try to open a P2PShellConnection.
-- The new connection is not added to the pool, so it's available -- The new connection is not added to the pool, so it's available
-- for the caller to use. -- for the caller to use.
-- If the remote does not support the P2P protocol, that's remembered in -- If the remote does not support the P2P protocol, that's remembered in
-- the connection pool. -- the connection pool.
openP2PSshConnection :: Remote -> P2PSshConnectionPool -> Annex (Maybe P2PSshConnection) openP2PShellConnection :: Remote -> P2PShellConnectionPool -> Annex (Maybe P2PShellConnection)
openP2PSshConnection r connpool = do openP2PShellConnection r connpool =
openP2PShellConnection' r P2P.maxProtocolVersion mempty >>= \case
Just conn -> return (Just conn)
Nothing -> do
liftIO $ rememberunsupported
return Nothing
where
rememberunsupported = atomically $
modifyTVar' connpool $
maybe (Just P2PShellUnsupported) Just
openP2PShellConnection' :: Remote -> P2P.ProtocolVersion -> P2P.Bypass -> Annex (Maybe P2PShellConnection)
openP2PShellConnection' r maxprotoversion bypass = do
u <- getUUID u <- getUUID
let ps = [Param (fromUUID u)] let ps = [Param (fromUUID u)]
repo <- getRepo r repo <- getRepo r
git_annex_shell ConsumeStdin repo "p2pstdio" ps [] >>= \case git_annex_shell ConsumeStdin repo "p2pstdio" ps [] >>= \case
Nothing -> do Nothing -> return Nothing
liftIO $ rememberunsupported
return Nothing
Just (cmd, params) -> start cmd params Just (cmd, params) -> start cmd params
where where
start cmd params = liftIO $ do start cmd params = liftIO $ do
@ -256,45 +267,41 @@ openP2PSshConnection r connpool = do
, P2P.connIhdl = to , P2P.connIhdl = to
, P2P.connOhdl = from , P2P.connOhdl = from
, P2P.connIdent = P2P.ConnIdent $ , P2P.connIdent = P2P.ConnIdent $
Just $ "ssh connection " ++ show pidnum Just $ "git-annex-shell connection " ++ show pidnum
} }
runst <- P2P.mkRunState P2P.Client runst <- P2P.mkRunState P2P.Client
let c = P2P.OpenConnection (runst, conn, pid) let c = P2P.OpenConnection (runst, conn, pid)
-- When the connection is successful, the remote -- When the connection is successful, the remote
-- will send an AUTH_SUCCESS with its uuid. -- will send an AUTH_SUCCESS with its uuid.
let proto = P2P.postAuth $ let proto = P2P.postAuth $ do
P2P.negotiateProtocolVersion P2P.maxProtocolVersion P2P.negotiateProtocolVersion maxprotoversion
P2P.sendBypass bypass
tryNonAsync (P2P.runNetProto runst conn proto) >>= \case tryNonAsync (P2P.runNetProto runst conn proto) >>= \case
Right (Right (Just theiruuid)) | theiruuid == uuid r -> Right (Right (Just theiruuid)) | theiruuid == uuid r ->
return $ Just c return $ Just c
_ -> do _ -> do
(cclosed, exitcode) <- closeP2PSshConnection c (cclosed, exitcode) <- closeP2PShellConnection c
-- ssh exits 255 when unable to connect to -- ssh exits 255 when unable to connect to
-- server. -- server.
if exitcode == Just (ExitFailure 255) if exitcode == Just (ExitFailure 255)
then return (Just cclosed) then return (Just cclosed)
else do else return Nothing
rememberunsupported
return Nothing
rememberunsupported = atomically $
modifyTVar' connpool $
maybe (Just P2PSshUnsupported) Just
-- Runs a P2P Proto action on a remote when it supports that, -- Runs a P2P Proto action on a remote when it supports that,
-- otherwise the fallback action. -- otherwise the fallback action.
runProto :: Remote -> P2PSshConnectionPool -> Annex a -> P2P.Proto a -> Annex (Maybe a) runProto :: Remote -> P2PShellConnectionPool -> Annex a -> P2P.Proto a -> Annex (Maybe a)
runProto r connpool onerr proto = Just <$> runProto r connpool onerr proto = Just <$>
(getP2PSshConnection r connpool >>= maybe onerr go) (getP2PShellConnection r connpool >>= maybe onerr go)
where where
go c = do go c = do
(c', v) <- runProtoConn proto c (c', v) <- runProtoConn proto c
case v of case v of
Just res -> do Just res -> do
liftIO $ storeP2PSshConnection connpool c' liftIO $ storeP2PShellConnection connpool c'
return res return res
Nothing -> onerr Nothing -> onerr
runProtoConn :: P2P.Proto a -> P2PSshConnection -> Annex (P2PSshConnection, Maybe a) runProtoConn :: P2P.Proto a -> P2PShellConnection -> Annex (P2PShellConnection, Maybe a)
runProtoConn _ P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing) runProtoConn _ P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing)
runProtoConn a conn@(P2P.OpenConnection (runst, c, _)) = do runProtoConn a conn@(P2P.OpenConnection (runst, c, _)) = do
P2P.runFullProto runst c a >>= \case P2P.runFullProto runst c a >>= \case
@ -303,24 +310,24 @@ runProtoConn a conn@(P2P.OpenConnection (runst, c, _)) = do
-- usable, so close it. -- usable, so close it.
Left e -> do Left e -> do
warning $ UnquotedString $ "Lost connection (" ++ P2P.describeProtoFailure e ++ ")" warning $ UnquotedString $ "Lost connection (" ++ P2P.describeProtoFailure e ++ ")"
conn' <- fst <$> liftIO (closeP2PSshConnection conn) conn' <- fst <$> liftIO (closeP2PShellConnection conn)
return (conn', Nothing) return (conn', Nothing)
-- Allocates a P2P ssh connection from the pool, and runs the action with it, -- Allocates a P2P shell connection from the pool, and runs the action with
-- returning the connection to the pool once the action is done. -- it, returning the connection to the pool once the action is done.
-- --
-- If the remote does not support the P2P protocol, runs the fallback -- If the remote does not support the P2P protocol, runs the fallback
-- action instead. -- action instead.
withP2PSshConnection withP2PShellConnection
:: Remote :: Remote
-> P2PSshConnectionPool -> P2PShellConnectionPool
-> Annex a -> Annex a
-> (P2PSshConnection -> Annex (P2PSshConnection, a)) -> (P2PShellConnection -> Annex (P2PShellConnection, a))
-> Annex a -> Annex a
withP2PSshConnection r connpool fallback a = bracketOnError get cache go withP2PShellConnection r connpool fallback a = bracketOnError get cache go
where where
get = getP2PSshConnection r connpool get = getP2PShellConnection r connpool
cache (Just conn) = liftIO $ storeP2PSshConnection connpool conn cache (Just conn) = liftIO $ storeP2PShellConnection connpool conn
cache Nothing = return () cache Nothing = return ()
go (Just conn) = do go (Just conn) = do
(conn', res) <- a conn (conn', res) <- a conn

View file

@ -65,7 +65,7 @@ remoteTypes = map adjustExportImportRemoteType
, Remote.External.remote , Remote.External.remote
] ]
{- Builds a list of all available Remotes. {- Builds a list of all Remotes.
- Since doing so can be expensive, the list is cached. -} - Since doing so can be expensive, the list is cached. -}
remoteList :: Annex [Remote] remoteList :: Annex [Remote]
remoteList = do remoteList = do

View file

@ -57,11 +57,11 @@ chainGen addr r u rc gc rs = do
{ uuid = u { uuid = u
, cost = cst , cost = cst
, name = Git.repoDescribe r , name = Git.repoDescribe r
, storeKey = store gc protorunner , storeKey = store u gc protorunner
, retrieveKeyFile = retrieve gc protorunner , retrieveKeyFile = retrieve gc protorunner
, retrieveKeyFileCheap = Nothing , retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure , retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = remove protorunner , removeKey = remove u protorunner
, lockContent = Just $ lock withconn runProtoConn u , lockContent = Just $ lock withconn runProtoConn u
, checkPresent = checkpresent protorunner , checkPresent = checkpresent protorunner
, checkPresentCheap = False , checkPresentCheap = False

84
Types/Cluster.hs Normal file
View file

@ -0,0 +1,84 @@
{- git-annex cluster types
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, OverloadedStrings #-}
module Types.Cluster (
ClusterUUID,
mkClusterUUID,
genClusterUUID,
fromClusterUUID,
isClusterUUID,
ClusterNodeUUID(..),
Clusters(..),
noClusters,
) where
import Types.UUID
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString as B
import Data.Char
-- The UUID of a cluster as a whole.
--
-- Cluster UUIDs are specially constructed so that regular repository UUIDs
-- can never be used as a cluster UUID. This is ncessary for security.
-- They are a version 8 UUID with the first octet set to 'a' and the second
-- to 'c'.
newtype ClusterUUID = ClusterUUID UUID
deriving (Show, Eq, Ord)
-- Smart constructor for a ClusterUUID. Only allows valid cluster UUIDs.
mkClusterUUID :: UUID -> Maybe ClusterUUID
mkClusterUUID u
| isClusterUUID u = Just (ClusterUUID u)
| otherwise = Nothing
-- Check if it is a valid cluster UUID.
isClusterUUID :: UUID -> Bool
isClusterUUID (UUID b)
| B.take 2 b == "ac" =
#if MIN_VERSION_bytestring(0,11,0)
B.indexMaybe b 14 == Just eight
#else
B.length b > 14 && B.head (B.drop 14 b) == eight
#endif
where
eight = fromIntegral (ord '8')
isClusterUUID _ = False
{-# INLINE isClusterUUID #-}
-- Generates a ClusterUUID from any regular UUID (eg V4).
-- It is converted to a valid cluster UUID.
genClusterUUID :: UUID -> Maybe ClusterUUID
genClusterUUID (UUID b)
| B.length b > 14 = Just $ ClusterUUID $ UUID $
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
| otherwise = Nothing
genClusterUUID NoUUID = Nothing
fromClusterUUID :: ClusterUUID -> UUID
fromClusterUUID (ClusterUUID u) = u
-- The UUID of a node in a cluster. The UUID can be either the UUID of a
-- repository, or of another cluster.
newtype ClusterNodeUUID = ClusterNodeUUID { fromClusterNodeUUID :: UUID }
deriving (Show, Eq, Ord)
-- The same information is indexed two ways to allow fast lookups in either
-- direction.
data Clusters = Clusters
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
}
deriving (Show)
noClusters :: Clusters
noClusters = Clusters mempty mempty

View file

@ -142,4 +142,5 @@ data CommandCheckId
| RepoExists | RepoExists
| NoDaemonRunning | NoDaemonRunning
| GitAnnexShellOk | GitAnnexShellOk
| GitAnnexShellNotProxyable
deriving (Show, Ord, Eq) deriving (Show, Ord, Eq)

View file

@ -22,6 +22,9 @@ module Types.GitConfig (
RemoteNameable(..), RemoteNameable(..),
remoteAnnexConfig, remoteAnnexConfig,
remoteConfig, remoteConfig,
RemoteGitConfigField(..),
remoteGitConfigKey,
proxyInheritedFields,
) where ) where
import Common import Common
@ -30,7 +33,7 @@ import qualified Git.Config
import qualified Git.Construct import qualified Git.Construct
import Git.Types import Git.Types
import Git.ConfigTypes import Git.ConfigTypes
import Git.Remote (isRemoteKey, remoteKeyToRemoteName) import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName)
import Git.Branch (CommitMode(..)) import Git.Branch (CommitMode(..))
import Git.Quote (QuotePath(..)) import Git.Quote (QuotePath(..))
import Utility.DataUnits import Utility.DataUnits
@ -44,6 +47,7 @@ import Types.RefSpec
import Types.RepoVersion import Types.RepoVersion
import Types.StallDetection import Types.StallDetection
import Types.View import Types.View
import Types.Cluster
import Config.DynamicConfig import Config.DynamicConfig
import Utility.HumanTime import Utility.HumanTime
import Utility.Gpg (GpgCmd, mkGpgCmd) import Utility.Gpg (GpgCmd, mkGpgCmd)
@ -154,6 +158,7 @@ data GitConfig = GitConfig
, annexPrivateRepos :: S.Set UUID , annexPrivateRepos :: S.Set UUID
, annexAdviceNoSshCaching :: Bool , annexAdviceNoSshCaching :: Bool
, annexViewUnsetDirectory :: ViewUnset , annexViewUnsetDirectory :: ViewUnset
, annexClusters :: M.Map RemoteName ClusterUUID
} }
extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig
@ -282,6 +287,10 @@ extractGitConfig configsource r = GitConfig
, annexAdviceNoSshCaching = getbool (annexConfig "advicenosshcaching") True , annexAdviceNoSshCaching = getbool (annexConfig "advicenosshcaching") True
, annexViewUnsetDirectory = ViewUnset $ fromMaybe "_" $ , annexViewUnsetDirectory = ViewUnset $ fromMaybe "_" $
getmaybe (annexConfig "viewunsetdirectory") getmaybe (annexConfig "viewunsetdirectory")
, annexClusters =
M.mapMaybe (mkClusterUUID . toUUID) $
M.mapKeys removeclusterprefix $
M.filterWithKey isclusternamekey (config r)
} }
where where
getbool k d = fromMaybe d $ getmaybebool k getbool k d = fromMaybe d $ getmaybebool k
@ -306,6 +315,11 @@ extractGitConfig configsource r = GitConfig
hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid") hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid")
clusterprefix = annexConfigPrefix <> "cluster."
isclusternamekey k _ = clusterprefix `B.isPrefixOf` (fromConfigKey' k)
&& isLegalName (removeclusterprefix k)
removeclusterprefix k = drop (B.length clusterprefix) (fromConfigKey k)
{- Merge a GitConfig that comes from git-config with one containing {- Merge a GitConfig that comes from git-config with one containing
- repository-global defaults. -} - repository-global defaults. -}
mergeGitConfig :: GitConfig -> GitConfig -> GitConfig mergeGitConfig :: GitConfig -> GitConfig -> GitConfig
@ -372,9 +386,14 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexBwLimitUpload :: Maybe BwRate , remoteAnnexBwLimitUpload :: Maybe BwRate
, remoteAnnexBwLimitDownload :: Maybe BwRate , remoteAnnexBwLimitDownload :: Maybe BwRate
, remoteAnnexAllowUnverifiedDownloads :: Bool , remoteAnnexAllowUnverifiedDownloads :: Bool
, remoteAnnexUUID :: Maybe UUID
, remoteAnnexConfigUUID :: Maybe UUID , remoteAnnexConfigUUID :: Maybe UUID
, remoteAnnexMaxGitBundles :: Int , remoteAnnexMaxGitBundles :: Int
, remoteAnnexAllowEncryptedGitRepo :: Bool , remoteAnnexAllowEncryptedGitRepo :: Bool
, remoteAnnexProxy :: Bool
, remoteAnnexProxiedBy :: Maybe UUID
, remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteAnnexClusterGateway :: [ClusterUUID]
, remoteUrl :: Maybe String , remoteUrl :: Maybe String
{- These settings are specific to particular types of remotes {- These settings are specific to particular types of remotes
@ -409,99 +428,254 @@ data RemoteGitConfig = RemoteGitConfig
extractRemoteGitConfig :: Git.Repo -> RemoteName -> STM RemoteGitConfig extractRemoteGitConfig :: Git.Repo -> RemoteName -> STM RemoteGitConfig
extractRemoteGitConfig r remotename = do extractRemoteGitConfig r remotename = do
annexcost <- mkDynamicConfig readCommandRunner annexcost <- mkDynamicConfig readCommandRunner
(notempty $ getmaybe "cost-command") (notempty $ getmaybe CostCommandField)
(getmayberead "cost") (getmayberead CostField)
annexignore <- mkDynamicConfig unsuccessfullCommandRunner annexignore <- mkDynamicConfig unsuccessfullCommandRunner
(notempty $ getmaybe "ignore-command") (notempty $ getmaybe IgnoreCommandField)
(getbool "ignore" False) (getbool IgnoreField False)
annexsync <- mkDynamicConfig successfullCommandRunner annexsync <- mkDynamicConfig successfullCommandRunner
(notempty $ getmaybe "sync-command") (notempty $ getmaybe SyncCommandField)
(getbool "sync" True) (getbool SyncField True)
return $ RemoteGitConfig return $ RemoteGitConfig
{ remoteAnnexCost = annexcost { remoteAnnexCost = annexcost
, remoteAnnexIgnore = annexignore , remoteAnnexIgnore = annexignore
, remoteAnnexSync = annexsync , remoteAnnexSync = annexsync
, remoteAnnexPull = getbool "pull" True , remoteAnnexPull = getbool PullField True
, remoteAnnexPush = getbool "push" True , remoteAnnexPush = getbool PushField True
, remoteAnnexReadOnly = getbool "readonly" False , remoteAnnexReadOnly = getbool ReadOnlyField False
, remoteAnnexCheckUUID = getbool "checkuuid" True , remoteAnnexCheckUUID = getbool CheckUUIDField True
, remoteAnnexVerify = getbool "verify" True , remoteAnnexVerify = getbool VerifyField True
, remoteAnnexTrackingBranch = Git.Ref . encodeBS <$> , remoteAnnexTrackingBranch = Git.Ref . encodeBS <$>
( notempty (getmaybe "tracking-branch") ( notempty (getmaybe TrackingBranchField)
<|> notempty (getmaybe "export-tracking") -- old name <|> notempty (getmaybe ExportTrackingField) -- old name
) )
, remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel" , remoteAnnexTrustLevel = notempty $ getmaybe TrustLevelField
, remoteAnnexStartCommand = notempty $ getmaybe "start-command" , remoteAnnexStartCommand = notempty $ getmaybe StartCommandField
, remoteAnnexStopCommand = notempty $ getmaybe "stop-command" , remoteAnnexStopCommand = notempty $ getmaybe StopCommandField
, remoteAnnexSpeculatePresent = , remoteAnnexSpeculatePresent =
getbool "speculate-present" False getbool SpeculatePresentField False
, remoteAnnexBare = getmaybebool "bare" , remoteAnnexBare = getmaybebool BareField
, remoteAnnexRetry = getmayberead "retry" , remoteAnnexRetry = getmayberead RetryField
, remoteAnnexForwardRetry = getmayberead "forward-retry" , remoteAnnexForwardRetry = getmayberead ForwardRetryField
, remoteAnnexRetryDelay = Seconds , remoteAnnexRetryDelay = Seconds
<$> getmayberead "retrydelay" <$> getmayberead RetryDelayField
, remoteAnnexStallDetection = , remoteAnnexStallDetection =
readStallDetection =<< getmaybe "stalldetection" readStallDetection =<< getmaybe StallDetectionField
, remoteAnnexStallDetectionUpload = , remoteAnnexStallDetectionUpload =
readStallDetection =<< getmaybe "stalldetection-upload" readStallDetection =<< getmaybe StallDetectionUploadField
, remoteAnnexStallDetectionDownload = , remoteAnnexStallDetectionDownload =
readStallDetection =<< getmaybe "stalldetection-download" readStallDetection =<< getmaybe StallDetectionDownloadField
, remoteAnnexBwLimit = , remoteAnnexBwLimit =
readBwRatePerSecond =<< getmaybe "bwlimit" readBwRatePerSecond =<< getmaybe BWLimitField
, remoteAnnexBwLimitUpload = , remoteAnnexBwLimitUpload =
readBwRatePerSecond =<< getmaybe "bwlimit-upload" readBwRatePerSecond =<< getmaybe BWLimitUploadField
, remoteAnnexBwLimitDownload = , remoteAnnexBwLimitDownload =
readBwRatePerSecond =<< getmaybe "bwlimit-download" readBwRatePerSecond =<< getmaybe BWLimitDownloadField
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ , remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe ("security-allow-unverified-downloads") getmaybe SecurityAllowUnverifiedDownloadsField
, remoteAnnexUUID = toUUID <$> getmaybe UUIDField
, remoteAnnexConfigUUID = toUUID <$> getmaybe ConfigUUIDField
, remoteAnnexMaxGitBundles = , remoteAnnexMaxGitBundles =
fromMaybe 100 (getmayberead "max-git-bundles") fromMaybe 100 (getmayberead MaxGitBundlesField)
, remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid"
, remoteAnnexShell = getmaybe "shell"
, remoteAnnexSshOptions = getoptions "ssh-options"
, remoteAnnexRsyncOptions = getoptions "rsync-options"
, remoteAnnexRsyncDownloadOptions = getoptions "rsync-download-options"
, remoteAnnexRsyncUploadOptions = getoptions "rsync-upload-options"
, remoteAnnexRsyncTransport = getoptions "rsync-transport"
, remoteAnnexGnupgOptions = getoptions "gnupg-options"
, remoteAnnexGnupgDecryptOptions = getoptions "gnupg-decrypt-options"
, remoteAnnexSharedSOPCommand = SOPCmd <$>
notempty (getmaybe "shared-sop-command")
, remoteAnnexSharedSOPProfile = SOPProfile <$>
notempty (getmaybe "shared-sop-profile")
, remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl"
, remoteAnnexBupRepo = getmaybe "buprepo"
, remoteAnnexBorgRepo = getmaybe "borgrepo"
, remoteAnnexTahoe = getmaybe "tahoe"
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
, remoteAnnexDirectory = notempty $ getmaybe "directory"
, remoteAnnexAndroidDirectory = notempty $ getmaybe "androiddirectory"
, remoteAnnexAndroidSerial = notempty $ getmaybe "androidserial"
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"
, remoteAnnexGitLFS = getbool "git-lfs" False
, remoteAnnexDdarRepo = getmaybe "ddarrepo"
, remoteAnnexHookType = notempty $ getmaybe "hooktype"
, remoteAnnexExternalType = notempty $ getmaybe "externaltype"
, remoteAnnexAllowEncryptedGitRepo = , remoteAnnexAllowEncryptedGitRepo =
getbool "allow-encrypted-gitrepo" False getbool AllowEncryptedGitRepoField False
, remoteAnnexProxy = getbool ProxyField False
, remoteAnnexProxiedBy = toUUID <$> getmaybe ProxiedByField
, remoteAnnexClusterNode =
(filter isLegalName . words)
<$> getmaybe ClusterNodeField
, remoteAnnexClusterGateway = fromMaybe [] $
(mapMaybe (mkClusterUUID . toUUID) . words)
<$> getmaybe ClusterGatewayField
, remoteUrl = , remoteUrl =
case Git.Config.getMaybe (remoteConfig remotename "url") r of case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b) Just (ConfigValue b)
| B.null b -> Nothing | B.null b -> Nothing
| otherwise -> Just (decodeBS b) | otherwise -> Just (decodeBS b)
_ -> Nothing _ -> Nothing
, remoteAnnexShell = getmaybe ShellField
, remoteAnnexSshOptions = getoptions SshOptionsField
, remoteAnnexRsyncOptions = getoptions RsyncOptionsField
, remoteAnnexRsyncDownloadOptions = getoptions RsyncDownloadOptionsField
, remoteAnnexRsyncUploadOptions = getoptions RsyncUploadOptionsField
, remoteAnnexRsyncTransport = getoptions RsyncTransportField
, remoteAnnexGnupgOptions = getoptions GnupgOptionsField
, remoteAnnexGnupgDecryptOptions = getoptions GnupgDecryptOptionsField
, remoteAnnexSharedSOPCommand = SOPCmd <$>
notempty (getmaybe SharedSOPCommandField)
, remoteAnnexSharedSOPProfile = SOPProfile <$>
notempty (getmaybe SharedSOPProfileField)
, remoteAnnexRsyncUrl = notempty $ getmaybe RsyncUrlField
, remoteAnnexBupRepo = getmaybe BupRepoField
, remoteAnnexBorgRepo = getmaybe BorgRepoField
, remoteAnnexTahoe = getmaybe TahoeField
, remoteAnnexBupSplitOptions = getoptions BupSplitOptionsField
, remoteAnnexDirectory = notempty $ getmaybe DirectoryField
, remoteAnnexAndroidDirectory = notempty $ getmaybe AndroidDirectoryField
, remoteAnnexAndroidSerial = notempty $ getmaybe AndroidSerialField
, remoteAnnexGCrypt = notempty $ getmaybe GCryptField
, remoteAnnexGitLFS = getbool GitLFSField False
, remoteAnnexDdarRepo = getmaybe DdarRepoField
, remoteAnnexHookType = notempty $ getmaybe HookTypeField
, remoteAnnexExternalType = notempty $ getmaybe ExternalTypeField
} }
where where
getbool k d = fromMaybe d $ getmaybebool k getbool k d = fromMaybe d $ getmaybebool k
getmaybebool k = Git.Config.isTrueFalse' =<< getmaybe' k getmaybebool k = Git.Config.isTrueFalse' =<< getmaybe' k
getmayberead k = readish =<< getmaybe k getmayberead k = readish =<< getmaybe k
getmaybe = fmap fromConfigValue . getmaybe' getmaybe = fmap fromConfigValue . getmaybe'
getmaybe' k = getmaybe' :: RemoteGitConfigField -> Maybe ConfigValue
Git.Config.getMaybe (remoteAnnexConfig remotename k) r getmaybe' f =
<|> let k = remoteGitConfigKey f
Git.Config.getMaybe (annexConfig k) r in Git.Config.getMaybe (remoteAnnexConfig remotename k) r
<|> Git.Config.getMaybe (annexConfig k) r
getoptions k = fromMaybe [] $ words <$> getmaybe k getoptions k = fromMaybe [] $ words <$> getmaybe k
data RemoteGitConfigField
= CostField
| CostCommandField
| IgnoreField
| IgnoreCommandField
| SyncField
| SyncCommandField
| PullField
| PushField
| ReadOnlyField
| CheckUUIDField
| VerifyField
| TrackingBranchField
| ExportTrackingField
| TrustLevelField
| StartCommandField
| StopCommandField
| SpeculatePresentField
| BareField
| RetryField
| ForwardRetryField
| RetryDelayField
| StallDetectionField
| StallDetectionUploadField
| StallDetectionDownloadField
| BWLimitField
| BWLimitUploadField
| BWLimitDownloadField
| UUIDField
| ConfigUUIDField
| SecurityAllowUnverifiedDownloadsField
| MaxGitBundlesField
| AllowEncryptedGitRepoField
| ProxyField
| ProxiedByField
| ClusterNodeField
| ClusterGatewayField
| UrlField
| ShellField
| SshOptionsField
| RsyncOptionsField
| RsyncDownloadOptionsField
| RsyncUploadOptionsField
| RsyncTransportField
| GnupgOptionsField
| GnupgDecryptOptionsField
| SharedSOPCommandField
| SharedSOPProfileField
| RsyncUrlField
| BupRepoField
| BorgRepoField
| TahoeField
| BupSplitOptionsField
| DirectoryField
| AndroidDirectoryField
| AndroidSerialField
| GCryptField
| GitLFSField
| DdarRepoField
| HookTypeField
| ExternalTypeField
deriving (Enum, Bounded)
remoteGitConfigField :: RemoteGitConfigField -> (UnqualifiedConfigKey, ProxyInherited)
remoteGitConfigField = \case
-- Hard to know the true cost of accessing eg a slow special
-- remote via the proxy. The cost of the proxy is the best guess
-- so do inherit it.
CostField -> inherited "cost"
CostCommandField -> inherited "cost-command"
IgnoreField -> inherited "ignore"
IgnoreCommandField -> inherited "ignore-command"
SyncField -> inherited "sync"
SyncCommandField -> inherited "sync-command"
PullField -> inherited "pull"
PushField -> inherited "push"
ReadOnlyField -> inherited "readonly"
CheckUUIDField -> uninherited "checkuuid"
VerifyField -> inherited "verify"
TrackingBranchField -> uninherited "tracking-branch"
ExportTrackingField -> uninherited "export-tracking"
TrustLevelField -> uninherited "trustlevel"
StartCommandField -> uninherited "start-command"
StopCommandField -> uninherited "stop-command"
SpeculatePresentField -> inherited "speculate-present"
BareField -> inherited "bare"
RetryField -> inherited "retry"
ForwardRetryField -> inherited "forward-retry"
RetryDelayField -> inherited "retrydelay"
StallDetectionField -> inherited "stalldetection"
StallDetectionUploadField -> inherited "stalldetection-upload"
StallDetectionDownloadField -> inherited "stalldetection-download"
BWLimitField -> inherited "bwlimit"
BWLimitUploadField -> inherited "bwlimit-upload"
BWLimitDownloadField -> inherited "bwlimit-upload"
UUIDField -> uninherited "uuid"
ConfigUUIDField -> uninherited "config-uuid"
SecurityAllowUnverifiedDownloadsField -> inherited "security-allow-unverified-downloads"
MaxGitBundlesField -> inherited "max-git-bundles"
AllowEncryptedGitRepoField -> inherited "allow-encrypted-gitrepo"
-- Allow proxy chains.
ProxyField -> inherited "proxy"
ProxiedByField -> uninherited "proxied-by"
ClusterNodeField -> uninherited "cluster-node"
ClusterGatewayField -> uninherited "cluster-gateway"
UrlField -> uninherited "url"
ShellField -> inherited "shell"
SshOptionsField -> inherited "ssh-options"
RsyncOptionsField -> inherited "rsync-options"
RsyncDownloadOptionsField -> inherited "rsync-download-options"
RsyncUploadOptionsField -> inherited "rsync-upload-options"
RsyncTransportField -> inherited "rsync-transport"
GnupgOptionsField -> inherited "gnupg-options"
GnupgDecryptOptionsField -> inherited "gnupg-decrypt-options"
SharedSOPCommandField -> inherited "shared-sop-command"
SharedSOPProfileField -> inherited "shared-sop-profile"
RsyncUrlField -> uninherited "rsyncurl"
BupRepoField -> uninherited "buprepo"
BorgRepoField -> uninherited "borgrepo"
TahoeField -> uninherited "tahoe"
BupSplitOptionsField -> uninherited "bup-split-options"
DirectoryField -> uninherited "directory"
AndroidDirectoryField -> uninherited "androiddirectory"
AndroidSerialField -> uninherited "androidserial"
GCryptField -> uninherited "gcrypt"
GitLFSField -> uninherited "git-lfs"
DdarRepoField -> uninherited "ddarrepo"
HookTypeField -> uninherited "hooktype"
ExternalTypeField -> uninherited "externaltype"
where
inherited f = (f, ProxyInherited True)
uninherited f = (f, ProxyInherited False)
newtype ProxyInherited = ProxyInherited Bool
-- All remote config fields that are inherited from a proxy.
proxyInheritedFields :: [UnqualifiedConfigKey]
proxyInheritedFields =
map fst $
filter (\(_, ProxyInherited p) -> p) $
map remoteGitConfigField [minBound..maxBound]
remoteGitConfigKey :: RemoteGitConfigField -> UnqualifiedConfigKey
remoteGitConfigKey = fst . remoteGitConfigField
notempty :: Maybe String -> Maybe String notempty :: Maybe String -> Maybe String
notempty Nothing = Nothing notempty Nothing = Nothing
notempty (Just "") = Nothing notempty (Just "") = Nothing
@ -513,9 +687,12 @@ dummyRemoteGitConfig = atomically $
type UnqualifiedConfigKey = B.ByteString type UnqualifiedConfigKey = B.ByteString
annexConfigPrefix :: B.ByteString
annexConfigPrefix = "annex."
{- A global annex setting in git config. -} {- A global annex setting in git config. -}
annexConfig :: UnqualifiedConfigKey -> ConfigKey annexConfig :: UnqualifiedConfigKey -> ConfigKey
annexConfig key = ConfigKey ("annex." <> key) annexConfig key = ConfigKey (annexConfigPrefix <> key)
class RemoteNameable r where class RemoteNameable r where
getRemoteName :: r -> RemoteName getRemoteName :: r -> RemoteName

View file

@ -89,6 +89,12 @@ instance Observable (Maybe a) where
observeBool Nothing = False observeBool Nothing = False
observeFailure = Nothing observeFailure = Nothing
instance Observable (Either e (Maybe a)) where
observeBool (Left _) = False
observeBool (Right Nothing) = False
observeBool (Right (Just _)) = True
observeFailure = Right Nothing
class Transferrable t where class Transferrable t where
descTransfrerrable :: t -> Maybe String descTransfrerrable :: t -> Maybe String

View file

@ -1,6 +1,6 @@
{- Simple line-based protocols. {- Simple line-based protocols.
- -
- Copyright 2013-2020 Joey Hess <id@joeyh.name> - Copyright 2013-2024 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
@ -21,6 +21,7 @@ module Utility.SimpleProtocol (
parse3, parse3,
parse4, parse4,
parse5, parse5,
parseList,
dupIoHandles, dupIoHandles,
getProtocolLine, getProtocolLine,
) where ) where
@ -111,6 +112,10 @@ parse5 mk s = mk <$> deserialize p1 <*> deserialize p2 <*> deserialize p3 <*> de
splitWord :: String -> (String, String) splitWord :: String -> (String, String)
splitWord = separate isSpace splitWord = separate isSpace
{- Only safe to use when the serialization does not include whitespace. -}
parseList :: Serializable p => ([p] -> a) -> Parser a
parseList mk v = mk <$> mapM deserialize (words v)
{- When a program speaks a simple protocol over stdio, any other output {- When a program speaks a simple protocol over stdio, any other output
- to stdout (or anything that attempts to read from stdin) - to stdout (or anything that attempts to read from stdin)
- will mess up the protocol. To avoid that, close stdin, - will mess up the protocol. To avoid that, close stdin,

View file

@ -124,9 +124,16 @@ See [[todo/proving_preferred_content_behavior]].
## rebalancing ## rebalancing
In both the 3 of 5 use case and a split brain situation, it's possible for In both the 3 of 5 use case and a split brain situation, it's possible for
content to end up not optimally balanced between repositories. git-annex content to end up not optimally balanced between repositories.
can be made to operate in a mode where it does additional work to rebalance
repositories. (There are also situations where a cluster node ends up without a copy
of a file that is preferred content, or where adding a copy to a node
would satisfy numcopies. This can happen eg, when a client sends a file
to a single node rather than to the cluster. Rebalancing also will deal
with those.)
git-annex can be made to operate in a mode where it does additional work
to rebalance repositories.
This can be an option like --rebalance, that changes how the preferred content This can be an option like --rebalance, that changes how the preferred content
expression is evaluated. The user can choose where and when to run that. expression is evaluated. The user can choose where and when to run that.

View file

@ -40,8 +40,8 @@ The server responds with either its own UUID when authentication
is successful. Or, it can fail the authentication, and close the is successful. Or, it can fail the authentication, and close the
connection. connection.
AUTH_SUCCESS UUID AUTH-SUCCESS UUID
AUTH_FAILURE AUTH-FAILURE
Note that authentication does not guarantee that the client is talking to Note that authentication does not guarantee that the client is talking to
who they expect to be talking to. This, and encryption of the connection, who they expect to be talking to. This, and encryption of the connection,
@ -64,6 +64,19 @@ that is less than or equal to the version the client sent:
Now both client and server should use version 1. Now both client and server should use version 1.
## Cluster cycle prevention
In protocol version 2, immediately after VERSION, the
client can send an additional message that is used to
prevent cycles when accessing clusters.
BYPASS UUID1 UUID2 ...
The UUIDs are cluster gateways to avoid connecting to when
serving a cluster.
The server makes no response to this message.
## Binary data ## Binary data
The protocol allows raw binary data to be sent. This is done The protocol allows raw binary data to be sent. This is done
@ -117,6 +130,10 @@ To remove a key's content from the server, the client sends:
The server responds with either SUCCESS or FAILURE. The server responds with either SUCCESS or FAILURE.
In protocol version 2, the server can optionally reply with SUCCESS-PLUS
or FAILURE-PLUS. Each has a subsequent list of UUIDs of repositories
that the content was removed from.
## Storing content on the server ## Storing content on the server
To store content on the server, the client sends: To store content on the server, the client sends:
@ -132,7 +149,14 @@ spaces, since it's not the last token in the line. Use '%' to indicate
whitespace.) whitespace.)
The server may respond with ALREADY-HAVE if it already The server may respond with ALREADY-HAVE if it already
had the conent of that key. Otherwise, it responds with: had the conent of that key.
In protocol version 2, the server can optionally reply with
ALREADY-HAVE-PLUS. The subsequent list of UUIDs are additional
UUIDs where the content is stored, in addition to the UUID where
the client was going to send it.
Otherwise, it responds with:
PUT-FROM Offset PUT-FROM Offset
@ -152,6 +176,9 @@ was being sent.
If the server successfully receives the data and stores the content, If the server successfully receives the data and stores the content,
it replies with SUCCESS. Otherwise, FAILURE. it replies with SUCCESS. Otherwise, FAILURE.
In protocol version 2, the server can optionally reply with SUCCESS-PLUS
and a list of UUIDs where the content was stored.
## Getting content from the server ## Getting content from the server
To get content from the server, the client sends: To get content from the server, the client sends:
@ -192,6 +219,8 @@ its exit code.
CONNECTDONE ExitCode CONNECTDONE ExitCode
After that, the server closes the connection.
## Change notification ## Change notification
The client can request to be notified when a ref in The client can request to be notified when a ref in

View file

@ -35,7 +35,7 @@ For example (eliding the full HTTP responses, only showing the data):
> Content-Length: ... > Content-Length: ...
> >
> AUTH 79a5a1f4-07e8-11ef-873d-97f93ca91925 > AUTH 79a5a1f4-07e8-11ef-873d-97f93ca91925
< AUTH_SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 < AUTH-SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
> POST /git-annex HTTP/1.0 > POST /git-annex HTTP/1.0
> Content-Type: x-git-annex-p2p > Content-Type: x-git-annex-p2p
@ -80,7 +80,7 @@ correspond to each action in the P2P protocol.
Something like this: Something like this:
> GET /git-annex/v1/AUTH?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.0 > GET /git-annex/v1/AUTH?clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925 HTTP/1.0
< AUTH_SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 < AUTH-SUCCESS ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6
> GET /git-annex/v1/CHECKPRESENT?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0 > GET /git-annex/v1/CHECKPRESENT?key=SHA1--foo&clientuuid=79a5a1f4-07e8-11ef-873d-97f93ca91925&serveruuid=ecf6d4ca-07e8-11ef-8990-9b8c1f696bf6 HTTP/1.0
> SUCCESS > SUCCESS

View file

@ -219,11 +219,6 @@ And, if the proxy repository itself contains the requested key, it can send
it directly. This allows the proxy repository to be primed with frequently it directly. This allows the proxy repository to be primed with frequently
accessed files when it has the space. accessed files when it has the space.
(Should uploads check preferred content of the proxy repository and also
store a copy there when allowed? I think this would be ok, so long as when
preferred content is not set, it does not default to storing content
there.)
When a drop is requested from the cluster's UUID, git-annex-shell drops When a drop is requested from the cluster's UUID, git-annex-shell drops
from all nodes, as well as from the proxy itself. Only indicating success from all nodes, as well as from the proxy itself. Only indicating success
if it is able to delete all copies from the cluster. This needs if it is able to delete all copies from the cluster. This needs
@ -238,6 +233,14 @@ always fail. Also, when constructing a drop proof for a cluster's UUID,
the nodes of that cluster should be omitted, otherwise a drop from the the nodes of that cluster should be omitted, otherwise a drop from the
cluster can lock content on individual nodes, causing the drop to fail. cluster can lock content on individual nodes, causing the drop to fail.
Moving from a cluster is a special case because it may reduce the number
of copies. So move's `willDropMakeItWorse` check needs to special case
clusters. Since dropping from the cluster may remove content from any of
its nodes, which may include copies on nodes that the local location log does
not know about yet, the special case probably needs to always assume
that dropping from a cluster in a move risks reducing numcopies,
and so only allow it when a drop proof can be constructed.
Some commands like `git-annex whereis` will list content as being stored in Some commands like `git-annex whereis` will list content as being stored in
the cluster, as well as on whichever of its nodes, and whereis currently the cluster, as well as on whichever of its nodes, and whereis currently
says "n copies", but since the cluster doesn't count as a copy, that says "n copies", but since the cluster doesn't count as a copy, that
@ -279,9 +282,9 @@ configuration of the cluster. But the cluster is configured via the
git-annex branch, particularly preferred content, and the proxy log, and git-annex branch, particularly preferred content, and the proxy log, and
the cluster log. the cluster log.
A user could, for example, make the cluster's frontend want all A user could, for example, make a small cluster node want all content, and
content, and so fill up its small disk. They could make a particular node so fill up its small disk. They could make a particular node not want any
not want any content. They could remove nodes from the cluster. content. They could remove nodes from the cluster.
One way to deal with this is for the cluster to reject git-annex branch One way to deal with this is for the cluster to reject git-annex branch
pushes that make such changes. Or only allow them if they are signed with a pushes that make such changes. Or only allow them if they are signed with a
@ -296,24 +299,43 @@ A remote will only be treated as a node of a cluster when the git
configuration remote.name.annex-cluster-node is set, which will prevent configuration remote.name.annex-cluster-node is set, which will prevent
creating clusters in places where they are not intended to be. creating clusters in places where they are not intended to be.
## distributed clusters
A cluster's nodes may be geographically distributed amoung several
locations, which are effectivly subclusters. To support this, an upload
or removal sent to one frontend proxy of the cluster will be repeated to
other frontend proxies that are remotes of that one and have the cluster's
UUID.
This is better than supporting a cluster that is a node of another cluster,
because rather than a hierarchical structure, this allows for organic
structures of any shape. For example, there could be two frontends to a
cluster, in different locations. An upload to either frontend fans out to
its local nodes as well as over to the other frontend, and to its local
nodes.
This does mean that cycles need to be prevented. See section below.
## speed ## speed
A passthrough proxy should be as fast as possible so as not to add overhead A proxy should be as fast as possible so as not to add overhead
to a file retrieve, store, or checkpresent. This probably means that to a file retrieve, store, or checkpresent. This probably means that
it keeps TCP connections open to each host in the cluster. It might use a it keeps TCP connections open to each host. It might use a
protocol with less overhead than ssh. protocol with less overhead than ssh.
In the case of checkpresent, it would be possible for the proxy to not In the case of checkpresent, it would be possible for the gateway to not
communicate with the cluster to check that the data is still present on it. communicate with cluster nodes to check that the data is still present
As long as all access is intermediated via the proxy, its git-annex branch in the cluster. As long as all access is intermediated via a single gateway,
could be relied on to always be correct, in theory. Proving that theory, its git-annex branch could be relied on to always be correct, in theory.
making sure to account for all possible race conditions and other scenarios, Proving that theory, making sure to account for all possible race conditions
would be necessary for such an optimisation. and other scenarios, would be necessary for such an optimisation. This
would not work for multi-gateway clusters unless the gateways were kept in
sync about locations, which they currently are not.
Another way the proxy could speed things up is to cache some subset of Another way the cluster gateway could speed things up is to cache some
content. Eg, analize what files are typically requested, and store another subset of content. Eg, analize what files are typically requested, and
copy of those on the proxy. Perhaps prioritize storing smaller files, where store another copy of those on the proxy. Perhaps prioritize storing
latency tends to swamp transfer speed. smaller files, where latency tends to swamp transfer speed.
## proxying to special remotes ## proxying to special remotes
@ -446,7 +468,7 @@ So overall, it seems better to do proxy-side encryption. But it may be
worth adding a special remote that does its own client-side encryption worth adding a special remote that does its own client-side encryption
in front of the proxy. in front of the proxy.
## cycles ## cycles of proxies
A repo can advertise that it proxies for a repo which has the same uuid as A repo can advertise that it proxies for a repo which has the same uuid as
itself. Or there can be a larger cycle involving a proxy that proxies to a itself. Or there can be a larger cycle involving a proxy that proxies to a
@ -454,36 +476,43 @@ proxy, etc.
Since the proxied repo uuid is communicated to git-annex-shell via Since the proxied repo uuid is communicated to git-annex-shell via
--uuid, a repo that advertises proxying for itself will be connected to --uuid, a repo that advertises proxying for itself will be connected to
with its own uuid. No proxying is done in this case. Same happens with a with its own uuid. No proxying is done in that case.
larger cycle.
Instantiating remotes needs to identity cycles and break them. Otherwise
it would construct an infinite number of proxied remotes with names
like "foo-foo-foo-foo-..." or "foo-bar-foo-bar-..."
Once `git-annex copy --to proxy` is implemented, and the proxy decides
where to send content that is being sent directly to it, cycles will
become an issue with that as well.
What if repo A is a proxy and has repo B as a remote. Meanwhile, repo B is What if repo A is a proxy and has repo B as a remote. Meanwhile, repo B is
a proxy and has repo A as a remote? a proxy and has repo A as a remote? git-annex-shell on repo A will get
A's uuid, and so will operate on it directly without proxying. So larger
cycles are also not a problem on the proxy side.
An upload to repo A will start by checking if repo B wants the content and if so, On the client side, instantiating remotes needs to identity cycles and
start an upload to repo B. Then the same happens on repo B, leading it to break them. Otherwise it would construct an infinite number of proxied
start an upload to repo A. remotes with names like "foo-foo-foo-foo-..." or "foo-bar-foo-bar-..."
At this point, it might be possible for git-annex to detect the cycle, ## cycles of cluster proxies
if the proxy uses a transfer lock file. If repo B or repo A had some other
remote that is not part of a cycle, they could deposit the upload there and
the upload still succeed. Otherwise the upload would fail, which is
probably the best that can be done with such a broken configuration.
So, it seems like proxies would need to take transfer locks for uploads, If an PUT or REMOVE message is sent to a proxy for a cluster, and that
even though the content is being proxied to elsewhere. repository has a remote that is also a proxy for the same cluster,
the message gets repeated on to it. This can lead to cycles, which have to
be broken.
Dropping could have similar cycles with content presence locking, which To break the cycle, extend the P2P protocol with an additional message,
needs to be thought through as well. A cycle of the actual dropContent like:
operation might also be possible.
VIA uuid1 uuid2
This indicates to a proxy that the message has been received via the other
listed proxies. It can then avoid repeating the message out via any of
those proxies. When repeating a message out to another proxy, just add
the UUID of the local repository to the list.
This will be an extension to the protocol, but so long as it's added in
the same git-annex version that adds support for proxies, every cluster
proxy will support it.
This avoids cycles, but it does not avoid situations where there are
multiple paths through a proxy network that reach the same node. In such a
situation, a REMOVE might happen twice (no problem) or a PUT be received
twice from different paths (one of them would fail due to the other one
taking the transfer lock).
## exporttree=yes ## exporttree=yes

View file

@ -0,0 +1,44 @@
# NAME
git-annex extendcluster - add an additional gateway to a cluster
# SYNOPSIS
git-annex extendcluster gateway clustername
# DESCRIPTION
This command is used to configure a repository to serve as an additional
gateway to a cluster. It is run in that repository.
The repository this command is run in should have a remote that is a
gateway to the cluster. The `gateway` parameter is the name of that remote.
The `clustername` parameter is the name of the cluster.
The next step after running this command is to configure
any additional cluster nodes that this gateway serves to the cluster,
then run [[git-annex-updatecluster]]. See the documentation of that
command for details about configuring nodes.
After running this command in the new gateway repository, it typically
also needs to be run in the other gateway repositories as well,
after adding the new gateway repository as a remote.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-initcluster]](1)
[[git-annex-updatecluster]](1)
[[git-annex-updateproxy]](1)
<https://git-annex.branchable.com/tips/clusters/>
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -0,0 +1,39 @@
# NAME
git-annex initcluster - initialize a new cluster
# SYNOPSIS
git-annex initcluster name [description]
# DESCRIPTION
This command initializes a new cluster with the specified name. If no
description is provided, one will be set automatically.
This command should be run in the repository that will serve as the gateway
to the cluster.
The next step after running this command is to configure
the cluster nodes, then run [[git-annex-updatecluster]]. See the
documentation of that command for details about configuring nodes.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-updatecluster]](1)
[[git-annex-extendcluster]](1)
[[git-annex-preferred-content]](1)
[[git-annex-updateproxy]](1)
<https://git-annex.branchable.com/tips/clusters/>
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -8,7 +8,7 @@ Each repository has a preferred content setting, which specifies content
that the repository wants to have present. These settings can be configured that the repository wants to have present. These settings can be configured
using `git annex vicfg` or `git annex wanted`. using `git annex vicfg` or `git annex wanted`.
They are used by the `--auto` option, by `git annex sync --content`, They are used by the `--auto` option, by `git annex sync --content`,
and by the git-annex assistant. by clusters, and by the git-annex assistant.
While preferred content expresses a preference, it can be overridden While preferred content expresses a preference, it can be overridden
by simply using `git annex drop`. On the other hand, required content by simply using `git annex drop`. On the other hand, required content

View file

@ -9,7 +9,7 @@ git annex required `repository [expression]`
# DESCRIPTION # DESCRIPTION
When run with an expression, configures the content that is required When run with an expression, configures the content that is required
to be held in the archive. to be held in the repository.
For example: For example:

View file

@ -86,7 +86,9 @@ first "/~/" or "/~user/" is expanded to the specified home directory.
* --uuid=UUID * --uuid=UUID
git-annex uses this to specify the UUID of the repository it was expecting git-annex uses this to specify the UUID of the repository it was expecting
git-annex-shell to access, as a sanity check. git-annex-shell to access. This is both a sanity check, and allows
git-annex shell to proxy access to remotes, when configured
by [[git-annex-update-proxy]].
* Also the [[git-annex-common-options]](1) can be used. * Also the [[git-annex-common-options]](1) can be used.

View file

@ -0,0 +1,43 @@
# NAME
git-annex updatecluster - update records of cluster nodes
# SYNOPSIS
git-annex updatecluster
# DESCRIPTION
This command is used to record the nodes of a cluster in the git-annex
branch, and set up proxying to the nodes. It should be run in the
repository that will serve as a gateway to the cluster.
It looks at the git config `remote.name.annex-cluster-node` of
each remote. When that is set to the name of a cluster that has been
initialized with `git-annex initcluster`, the node will be recorded in the
git-annex branch.
To remove a node from a cluster, unset `remote.name.annex-cluster-node`
and run this command.
To add additional gateways to a cluster, after running this command,
use [[git-annex-extendcluster]].
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-initcluster]](1)
[[git-annex-extendcluster]](1)
[[git-annex-updateproxy]](1)
<https://git-annex.branchable.com/tips/clusters/>
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -0,0 +1,44 @@
# NAME
git-annex updateproxy - update records with proxy configuration
# SYNOPSIS
git annex updateproxy
# DESCRIPTION
A git-annex repository can act as a proxy for its remotes. That allows
annexed content to be stored and removed from the proxy's remotes, by
repositories that do not have a direct connection to the remotes.
By default, no proxying is done. To configure the local repository to act
as a proxy for its remote named "foo", run `git config remote.foo.annex-proxy`
true`.
After setting or unsetting `remote.<name>.annex-proxy` git configurations,
run `git-annex updateproxy` to record the proxy configuration in the
git-annex branch. That tells other repositories about the proxy
configuration.
Suppose, for example, that remote "work" has had this command run in
it. Then after pulling from "work", git-annex will know about an
additional remote, "work-foo". That remote will be accessed using "work" as
a proxy.
Proxies can only be accessed via ssh.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-updatecluster]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -9,7 +9,7 @@ git annex wanted `repository [expression]`
# DESCRIPTION # DESCRIPTION
When run with an expression, configures the content that is preferred When run with an expression, configures the content that is preferred
to be held in the archive. See [[git-annex-preferred-content]](1) to be held in the repository. See [[git-annex-preferred-content]](1)
For example: For example:

View file

@ -252,7 +252,6 @@ content from the key-value store.
See [[git-annex-configremote]](1) for details. See [[git-annex-configremote]](1) for details.
* `renameremote` * `renameremote`
Renames a special remote. Renames a special remote.
@ -327,6 +326,31 @@ content from the key-value store.
See [[git-annex-required]](1) for details. See [[git-annex-required]](1) for details.
* `initcluster`
Initializes a new cluster.
See [[git-annex-initcluster](1) for details.
* `updatecluster`
Update records of cluster nodes.
See [[git-annex-updatecluster](1) for details.
* `extendcluster`
Adds an additional gateway to a cluster.
See [[git-annex-extendcluster](1) for details.
* `updateproxy`
Update records with proxy configuration.
See [[git-annex-updateproxy](1) for details.
* `schedule repository [expression]` * `schedule repository [expression]`
Get or set scheduled jobs. Get or set scheduled jobs.
@ -1372,6 +1396,15 @@ repository, using [[git-annex-config]]. See its man page for a list.)
set in global git configuration. set in global git configuration.
For details, see <https://git-annex.branchable.com/tuning/>. For details, see <https://git-annex.branchable.com/tuning/>.
* `annex.cluster.<name>`
This is set to make the repository be a gateway to a cluster.
The value is the cluster UUID. Note that cluster UUIDs are not
the same as repository UUIDs, and a repository UUID cannot be used here.
Usually this is set up by running [[git-annex-initcluster]] or
[[git-annex-extendcluster]].
# CONFIGURATION OF REMOTES # CONFIGURATION OF REMOTES
Remotes are configured using these settings in `.git/config`. Remotes are configured using these settings in `.git/config`.
@ -1640,6 +1673,38 @@ Remotes are configured using these settings in `.git/config`.
content of any file, even though its normal location tracking does not content of any file, even though its normal location tracking does not
indicate that it does. This will cause git-annex to try to get all file indicate that it does. This will cause git-annex to try to get all file
contents from the remote. Can be useful in setting up a caching remote. contents from the remote. Can be useful in setting up a caching remote.
* `remote.<name>.annex-proxy`
Set to "true" to make the local repository able to act as a proxy to this
remote.
After configuring this, run [[git-annex-updateproxy](1) to store
the new configuration in the git-annex branch.
* `remote.<name>.annex-proxied-by`
Usually this is used internally, when git-annex sets up proxied remotes,
and will not need to be configured. The value is the UUID of the
git-annex repository that proxies access to this remote.
* `remote.<name>.annex-cluster-node`
Set to the name of a cluster to make this remote be part of
the cluster. Names of multiple clusters can be separated by
whitespace to make a remote be part of more than one cluster.
After configuring this, run [[git-annex-updatecluster](1) to store
the new configuration in the git-annex branch.
* `remote.<name>.annex-cluster-gateway`
Set to the UUID of a cluster that this remote serves as a gateway for.
Multiple UUIDs can be listed, separated by whitespace. When the local
repository is also a gateway for that cluster, it will proxy for the
nodes of the remote gateway.
Usually this is set up by running [[git-annex-extendcluster]].
* `remote.<name>.annex-private` * `remote.<name>.annex-private`

View file

@ -288,7 +288,7 @@ For example:
These log files store per-remote content identifiers for keys. These log files store per-remote content identifiers for keys.
A given key may have any number of content identifiers. A given key may have any number of content identifiers.
The format is a timestamp, followed by the uuid of the remote, The format is a timestamp, followed by the UUID of the remote,
followed by the content identifiers which are separated by colons. followed by the content identifiers which are separated by colons.
If a content identifier contains a colon or \r or \n, it will be base64 If a content identifier contains a colon or \r or \n, it will be base64
encoded. Base64 encoded values are indicated by prefixing them with "!". encoded. Base64 encoded values are indicated by prefixing them with "!".
@ -308,6 +308,33 @@ For example, this logs that a remote has an object stored using both
(When those chunks are removed from the remote, the 9 is changed to 0.) (When those chunks are removed from the remote, the 9 is changed to 0.)
## `proxy.log`
Used to record what repositories are accessible via a proxy.
Each line starts with a timestamp, then the UUID of the repository
that can serve as a proxy, and then a list of the remotes that it can
proxy to, separated by spaces.
Each remote in the list consists of a repository's UUID,
followed by a colon (`:`) and then a remote name.
For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 26339d22-446b-11e0-9101-002170d25c55:foo c076460c-2290-11ef-be53-b7f0d194c863:bar
## `cluster.log`
Used to record the UUIDs of clusters, and the UUIDs of the nodes
comprising each cluster.
Each line starts with a timestamp, then the UUID the cluster,
followed by a list of the UUIDs of its nodes, separated by spaces.
For example:
1317929100.012345s 5b070cc8-29b8-11ef-80e1-0fd524be241b 5c0c97d2-29b8-11ef-b1d2-5f3d1c80940d 5c40375e-29b8-11ef-814d-872959d2c013
## `schedule.log` ## `schedule.log`
Used to record scheduled events, such as periodic fscks. Used to record scheduled events, such as periodic fscks.

View file

@ -4,4 +4,10 @@
* [[how_it_works]] * [[how_it_works]]
* [[special_remotes]] * [[special_remotes]]
* [[workflows|workflow]] * [[workflows|workflow]]
* [[preferred_content]]
* [[sync]] * [[sync]]
### new features
* [[tips/clusters]]
* [[git-remote-annex|tips/storing_a_git_repository_on_any_special_remote]]

217
doc/tips/clusters.mdwn Normal file
View file

@ -0,0 +1,217 @@
A cluster is a collection of git-annex repositories which are combined to
form a single logical repository.
A cluster is accessed via a gateway repository. The gateway is not itself
a node of the cluster.
[[!toc ]]
## using a cluster
To use a cluster, your repository needs to have its gateway configured as a
remote. Clusters can currently only be accessed via ssh. This gateway
remote is added the same as any other remote:
git remote add bigserver me@bigserver:annex
The gateway publishes information about the cluster to the git-annex
branch. So you may need to fetch from it to learn about the cluster:
git fetch bigserver
That will make available an additional remote for the cluster, eg
"bigserver-mycluster", as well as some remotes for each node eg
"bigserver-node1", "bigserver-node2", etc.
You can get files from the cluster without caring which node it comes
from:
$ git-annex get foo --from bigserver-mycluster
copy foo (from bigserver-mycluster...) ok
And you can send files to the cluster, without caring what nodes
they are stored to:
$ git-annex move bar --to bigserver-mycluster
move bar (to bigserver-mycluster...) ok
In fact, a single upload like that can be sent to every node of the cluster
at once, very efficiently.
$ git-annex whereis bar
whereis bar (3 copies)
acae2ff6-6c1e-8bec-b8b9-397a3755f397 -- [bigserver-mycluster]
9f514001-6dc0-4d83-9af3-c64c96626892 -- node 1 [bigserver-node1]
d81e0b28-612e-4d73-a4e6-6dabbb03aba1 -- node 2 [bigserver-node2]
5657baca-2f11-11ef-ae1a-5b68c6321dd9 -- node 3 [bigserver-node3]
Notice that the file is shown as present in the cluster, as well as on
individual nodes. But the cluster itself does not count as a copy of the file,
so the 3 copies are the copies on individual nodes.
Most other git-annex commands that operate on repositories can also operate on
clusters.
A cluster is not a git repository, and so `git pull bigserver-mycluster`
will not work.
## preferred content of clusters
The preferred content of the cluster can be configured. This tells
users what files the cluster as a whole should contain.
To configure the preferred content of a cluster, as well as other related
things like [[groups|git-annex-group]] and [[required_content]], it's easiest
to do the configuration in a repository that has the cluster as a remote.
For example:
$ git-annex wanted bigserver-mycluster standard
$ git-annex group bigserver-mycluster archive
By default, when a file is uploaded to a cluster, it is stored on every node of
the cluster. To control which nodes to store to, the [[preferred_content]] of
each node can be configured.
It's also a good idea to configure the preferred content of the cluster's
gateway. To avoid files redundantly being stored on the gateway
(which remember, is not a node of the cluster), you might make it not want
any files:
$ git-annex wanted bigserver nothing
## setting up a cluster
A new cluster first needs to be initialized. Run [[git-annex-initcluster]] in
the repository that will serve as the cluster's gateway. In the example above,
this was the "bigserver" repository.
$ git-annex initcluster mycluster
Once a cluster is initialized, the next step is to add nodes to it.
To make a remote be a node of the cluster, configure
`git config remote.name.annex-cluster-node`, setting it to the
name of the cluster.
In the example above, the three cluster nodes were configured like this:
$ git remote add node1 /media/disk1/repo
$ git remote add node2 /media/disk2/repo
$ git remote add node3 /media/disk3/repo
$ git config remote.node1.annex-cluster-node mycluster
$ git config remote.node2.annex-cluster-node mycluster
$ git config remote.node3.annex-cluster-node mycluster
Finally, run `git-annex updatecluster` to record the cluster configuration
in the git-annex branch. That tells other repositories about the cluster.
$ git-annex updatecluster
Added node node1 to cluster: mycluster
Added node node2 to cluster: mycluster
Added node node3 to cluster: mycluster
Started proxying for node1
Started proxying for node2
Started proxying for node3
Operations that affect multiple nodes of a cluster can often be sped up by
configuring annex.jobs in the repository that will serve the cluster to
clients. In the example above, the nodes are all disk bound, so operating
on more than one at a time will likely be faster.
$ git config annex.jobs cpus
## adding additional gateways to a cluster
A cluster can have more than one gateway. One way to use this is to
make a cluster that is distributed across several locations.
Suppose you have a datacenter in AMS, and one in NYC. There
will be a gateway in each datacenter which provides access to the nodes
there. And the gateways will relay data between each other as well.
Start by setting up the cluster in Amsterdam. The process is the same
as in the previous section.
AMS$ git-annex initcluster mycluster
AMS$ git remote add node1 /media/disk1/repo
AMS$ git remote add node2 /media/disk2/repo
AMS$ git config remote.node1.annex-cluster-node mycluster
AMS$ git config remote.node2.annex-cluster-node mycluster
AMS$ git-annex updatecluster
AMS$ git config annex.jobs cpus
Now in a clone of the same repository in NYC, add AMS as a git remote
accessed with ssh:
NYC$ git remote add AMS me@amsterdam.example.com:annex
NYC$ git fetch AMS
Setting up the cluster in NYC is different, rather than using
`git-annex initcluster` again (which would make a new, different
cluster), we ask git-annex to extend the cluster from AMS:
NYC$ git-annex extendcluster AMS mycluster
The rest of the setup process for NYC is the same, of course different
nodes are added.
NYC$ git remote add node3 /media/disk3/repo
NYC$ git remote add node4 /media/disk4/repo
NYC$ git config remote.node3.annex-cluster-node mycluster
NYC$ git config remote.node4.annex-cluster-node mycluster
NYC$ git-annex updatecluster
NYC$ git config annex.jobs cpus
Finally, the AMS side of the cluster has to be updated, adding a git remote
for NYC, and extending the cluster to there as well:
AMS$ git remote add NYC me@nyc.example.com:annex
AMS$ git-annex sync NYC
NYC$ git-annex extendcluster NYC mycluster
A user can now add either AMS or NYC as a remote, and will have access
to the entire cluster as either `AMS-mycluster` or `NYC-mycluster`.
user$ git-annex move foo --to AMS-mycluster
move foo (to AMS-mycluster...) ok
Looking at where files end up, all the nodes are visible, not only those
served by the current gateway.
user$ git-annex whereis foo
whereis foo (4 copies)
acfc1cb2-b8d5-8393-b8dc-4a419ea38183 -- cluster mycluster [AMS-mycluster]
11ab09a9-7448-45bd-ab81-3997780d00b3 -- node4 [AMS-NYC-node4]
36197d0e-6d49-4213-8440-71cbb121e670 -- node2 [AMS-node2]
43652651-1efa-442a-8333-eb346db31553 -- node3 [AMS-NYC-node3]
7fb5a77b-77a3-4032-b3e5-536698e308b3 -- node1 [AMS-node1]
ok
Notice that remotes for cluster nodes have names indicating the path through
the cluster used to access them. For example, "AMS-NYC-node3" is accessed via
the AMS gateway, which then relays to NYC where node3 is located.
## considerations for multi-gateway clusters
When a cluster has multiple gateways, nothing keeps the git repositories on
the gateways in sync. A branch pushed to one gateway will not be able to
be pulled from another one. And gateways only learn about the locations of
keys that are uploaded to the cluster via them. So in the example above,
after an upload to AMS-mycluster, NYC-mycluster will only know that the
key is stored in its nodes, but won't know that it's stored in nodes
behind AMS. So, it's best to have a single git repository that is synced
with, or perhaps run [[git-annex-remotedaemon]] on each gateway to keep
its git repository in sync with the other gateways.
Clusters can be constructed with any number of gateways, and any internal
topology of connections between gateways. But there must always be a path
from any gateway to all nodes of the cluster, otherwise a key won't
be able to be stored from, or retrieved from some nodes.
It's best to avoid there being multiple paths to a node that go via
different gateways, since all paths will be tried in parallel when eg,
uploading a key to the cluster.
A breakdown in communication between gateways will temporarily split the
cluster. When communication resumes, some keys may need to be copied to
additional nodes.

View file

@ -11,7 +11,7 @@ repositories.
Joey has received funding to work on this. Joey has received funding to work on this.
Planned schedule of work: Planned schedule of work:
* June: git-annex proxy * June: git-annex proxies and clusters
* July, part 1: git-annex proxy support for exporttree * July, part 1: git-annex proxy support for exporttree
* July, part 2: p2p protocol over http * July, part 2: p2p protocol over http
* August: balanced preferred content * August: balanced preferred content
@ -24,7 +24,49 @@ Planned schedule of work:
In development on the `proxy` branch. In development on the `proxy` branch.
For June's work on [[design/passthrough_proxy]], implementation plan: For June's work on [[design/passthrough_proxy]], remaining todos:
* Since proxying to special remotes is not supported yet, and won't be for
the first release, make it fail in a reasonable way.
- or -
* Proxying for special remotes.
Including encryption and chunking. See design for issues.
# items deferred until later for [[design/passthrough_proxy]]
* Indirect uploads when proxying for special remote
(to be considered). See design.
* Getting a key from a cluster currently picks from amoung
the lowest cost remotes at random. This could be smarter,
eg prefer to avoid using remotes that are doing other transfers at the
same time.
* The cost of a proxied node that is accessed via an intermediate gateway
is currently the same as a node accessed via the cluster gateway.
To fix this, there needs to be some way to tell how many hops through
gateways it takes to get to a node. Currently the only way is to
guess based on number of dashes in the node name, which is not satisfying.
Even counting hops is not very satisfying, one cluster gateway could
be much more expensive to traverse than another one.
If seriously tackling this, it might be worth making enough information
available to use spanning tree protocol for routing inside clusters.
* Optimise proxy speed. See design for ideas.
* Use `sendfile()` to avoid data copying overhead when
`receiveBytes` is being fed right into `sendBytes`.
Library to use:
<https://hackage.haskell.org/package/hsyscall-0.4/docs/System-Syscall.html>
* Support using a proxy when its url is a P2P address.
(Eg tor-annex remotes.)
# completed items for June's work on [[design/passthrough_proxy]]:
* UUID discovery via git-annex branch. Add a log file listing UUIDs * UUID discovery via git-annex branch. Add a log file listing UUIDs
accessible via proxy UUIDs. It also will contain the names accessible via proxy UUIDs. It also will contain the names
@ -40,7 +82,7 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Proxy should update location tracking information for proxied remotes, * Proxy should update location tracking information for proxied remotes,
so it is available to other users who sync with it. (done) so it is available to other users who sync with it. (done)
* Implement `git-annex updatecluster` command (done) * Implement `git-annex initcluster` and `git-annex updatecluster` commands (done)
* Implement cluster UUID insertation on location log load, and removal * Implement cluster UUID insertation on location log load, and removal
on location log store. (done) on location log store. (done)
@ -48,66 +90,39 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Omit cluster UUIDs when constructing drop proofs, since lockcontent will * Omit cluster UUIDs when constructing drop proofs, since lockcontent will
always fail on a cluster. (done) always fail on a cluster. (done)
* Don't count cluster UUID as a copy. (done) * Don't count cluster UUID as a copy in numcopies checking etc. (done)
* Tab complete proxied remotes and clusters in eg --from option. (done) * Tab complete proxied remotes and clusters in eg --from option. (done)
* Getting a key from a cluster should proxy from one of the nodes that has * Getting a key from a cluster should proxy from one of the nodes that has
it. (done) it. (done)
* Getting a key from a cluster currently always selects the lowest cost * Implement upload with fanout to multiple cluster nodes and reporting back
remote, and always the same remote if cost is the same. Should additional UUIDs over P2P protocol. (done)
round-robin amoung remotes, and prefer to avoid using remotes that
other git-annex processes are currently using.
* Implement upload with fanout and reporting back additional UUIDs over P2P
protocol. (done, but need to check for fencepost errors on resume of
incomplete upload with remotes at different points)
* On upload to cluster, send to nodes where it's preferred content, and not
to other nodes.
* Implement cluster drops, trying to remove from all nodes, and returning * Implement cluster drops, trying to remove from all nodes, and returning
which UUIDs it was dropped from. which UUIDs it was dropped from. (done)
Problem: May lock content on cluster * `git-annex testremote` works against proxied remote and cluster. (done)
nodes to satisfy numcopies (rather than locking elsewhere) and so not be
able to drop from nodes. Avoid using cluster nodes when constructing drop
proof for cluster.
Problem: When nodes are special remotes, may * Avoid `git-annex sync --content` etc from operating on cluster nodes by
treat nodes as copies while dropping from cluster, and so violate default since syncing with a cluster implicitly syncs with its nodes. (done)
numcopies. (But not mincopies.)
Problem: `move --from cluster` in "does this make it worse" * On upload to cluster, send to nodes where its preferred content, and not
check may fail to realize that dropping from multiple nodes does in fact to other nodes. (done)
make it worse.
* On upload to a cluster, as well as fanout to nodes, if the key is * Support annex.jobs for clusters. (done)
preferred content of the proxy repository, store it there.
(But not when preferred content is not configured.)
And on download from a cluster, if the proxy repository has the content,
get it from there to avoid the overhead of proxying to a node.
* Basic proxying to special remote support (non-streaming). * Add `git-annex extendcluster` command and extend `git-annex updatecluster`
to support clusters with multiple gateways. (done)
* Support proxies-of-proxies better, eg foo-bar-baz. * Support proxying for a remote that is proxied by another gateway of
Currently, it does work, but have to run `git-annex updateproxy` a cluster. (done)
on foo in order for it to notice the bar-baz proxied remote exists,
and record it as foo-bar-baz. Make it skip recording proxies of
proxies like that, and instead automatically generate those from the log.
(With cycle prevention there of course.)
* Cycle prevention including cluster-in-cluster cycles. See design. * Support distributed clusters: Make a proxy for a cluster repeat
protocol messages on to any remotes that have the same UUID as
the cluster. Needs extension to P2P protocol to avoid cycles.
(done)
* Optimise proxy speed. See design for ideas. * Proxied cluster nodes should have slightly higher cost than the cluster
gateway. (done)
* Use `sendfile()` to avoid data copying overhead when
`receiveBytes` is being fed right into `sendBytes`.
* Encryption and chunking. See design for issues.
* Indirect uploads (to be considered). See design.
* Support using a proxy when its url is a P2P address.
(Eg tor-annex remotes.)

View file

@ -6,7 +6,7 @@ remotes.
So this todo remains open, but is now only concerned with So this todo remains open, but is now only concerned with
streaming an object that is being received from one remote out to another streaming an object that is being received from one remote out to another
remote without first needing to buffer the whole object on disk. repository without first needing to buffer the whole object on disk.
git-annex's remote interface does not currently support that. git-annex's remote interface does not currently support that.
`retrieveKeyFile` stores the object into a file. And `storeKey` `retrieveKeyFile` stores the object into a file. And `storeKey`
@ -27,3 +27,7 @@ Recieving to a file, and sending from the same file as it grows is one
possibility, since that would handle buffering, and it might avoid needing possibility, since that would handle buffering, and it might avoid needing
to change interfaces as much. It would still need a new interface since the to change interfaces as much. It would still need a new interface since the
current one does not guarantee the file is written in-order. current one does not guarantee the file is written in-order.
A fifo is a possibility, but would certianly not work with remotes
that don't write to the file in-order. Also resuming a download would not
work with a fifo, the sending remote wouldn't know where to resume from.

View file

@ -508,6 +508,7 @@ Executable git-annex
Annex.ChangedRefs Annex.ChangedRefs
Annex.CheckAttr Annex.CheckAttr
Annex.CheckIgnore Annex.CheckIgnore
Annex.Cluster
Annex.Common Annex.Common
Annex.Concurrent Annex.Concurrent
Annex.Concurrent.Utility Annex.Concurrent.Utility
@ -549,6 +550,7 @@ Executable git-annex
Annex.Path Annex.Path
Annex.Perms Annex.Perms
Annex.PidLock Annex.PidLock
Annex.Proxy
Annex.Queue Annex.Queue
Annex.ReplaceFile Annex.ReplaceFile
Annex.RemoteTrackingBranch Annex.RemoteTrackingBranch
@ -556,6 +558,7 @@ Executable git-annex
Annex.SpecialRemote.Config Annex.SpecialRemote.Config
Annex.Ssh Annex.Ssh
Annex.StallDetection Annex.StallDetection
Annex.Startup
Annex.TaggedPush Annex.TaggedPush
Annex.Tmp Annex.Tmp
Annex.Transfer Annex.Transfer
@ -635,6 +638,7 @@ Executable git-annex
Command.EnableRemote Command.EnableRemote
Command.EnableTor Command.EnableTor
Command.ExamineKey Command.ExamineKey
Command.ExtendCluster
Command.Expire Command.Expire
Command.Export Command.Export
Command.FilterBranch Command.FilterBranch
@ -658,6 +662,7 @@ Executable git-annex
Command.Indirect Command.Indirect
Command.Info Command.Info
Command.Init Command.Init
Command.InitCluster
Command.InitRemote Command.InitRemote
Command.Inprogress Command.Inprogress
Command.List Command.List
@ -720,6 +725,8 @@ Executable git-annex
Command.UnregisterUrl Command.UnregisterUrl
Command.Untrust Command.Untrust
Command.Unused Command.Unused
Command.UpdateCluster
Command.UpdateProxy
Command.Upgrade Command.Upgrade
Command.VAdd Command.VAdd
Command.VCycle Command.VCycle
@ -814,6 +821,8 @@ Executable git-annex
Logs.AdjustedBranchUpdate Logs.AdjustedBranchUpdate
Logs.Chunk Logs.Chunk
Logs.Chunk.Pure Logs.Chunk.Pure
Logs.Cluster
Logs.Cluster.Basic
Logs.Config Logs.Config
Logs.ContentIdentifier Logs.ContentIdentifier
Logs.ContentIdentifier.Pure Logs.ContentIdentifier.Pure
@ -838,6 +847,7 @@ Executable git-annex
Logs.PreferredContent.Raw Logs.PreferredContent.Raw
Logs.Presence Logs.Presence
Logs.Presence.Pure Logs.Presence.Pure
Logs.Proxy
Logs.Remote Logs.Remote
Logs.Remote.Pure Logs.Remote.Pure
Logs.RemoteState Logs.RemoteState
@ -868,6 +878,7 @@ Executable git-annex
P2P.Auth P2P.Auth
P2P.IO P2P.IO
P2P.Protocol P2P.Protocol
P2P.Proxy
Remote Remote
Remote.Adb Remote.Adb
Remote.BitTorrent Remote.BitTorrent
@ -930,6 +941,7 @@ Executable git-annex
Types.BranchState Types.BranchState
Types.CatFileHandles Types.CatFileHandles
Types.CleanupActions Types.CleanupActions
Types.Cluster
Types.Command Types.Command
Types.Concurrency Types.Concurrency
Types.Creds Types.Creds