honor preferred content settings of cluster nodes
Except when no nodes want a file, it has to be stored somewhere, so store it on all. Which is not really desirable, but neither is having to pick one. ProtoAssociatedFile deserialization is rather broken, and this could possibly affect preferred content expressions that match on filenames. The inability to roundtrip whitespace like tabs and newlines through is not a problem because preferred content expressions can't be written that match on whitespace such as a tab. For example: joey@darkstar:~/tmp/bench/z>git-annex wanted origin-node2 'exclude=*CTRL-VTab*' wanted origin-node2 git-annex: Parse error: Parse failure: near "*" But, the filtering of control characters could perhaps be a problem. I think that filtering is now obsolete, git-annex has comprehensive filtering of control characters when displaying filenames, that happens at a higher level. However, I don't want to risk a security hole so am leaving in that filtering in ProtoAssociatedFile deserialization for now.
This commit is contained in:
parent
a23b0abf28
commit
1bfe7f8a53
4 changed files with 48 additions and 28 deletions
|
@ -18,6 +18,7 @@ import P2P.Protocol
|
||||||
import P2P.IO
|
import P2P.IO
|
||||||
import Annex.Proxy
|
import Annex.Proxy
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
import Logs.PreferredContent
|
||||||
import Types.Command
|
import Types.Command
|
||||||
import Remote.List
|
import Remote.List
|
||||||
import qualified Remote
|
import qualified Remote
|
||||||
|
@ -59,28 +60,30 @@ proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
|
||||||
|
|
||||||
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Annex ProxySelector
|
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Annex ProxySelector
|
||||||
clusterProxySelector clusteruuid protocolversion = do
|
clusterProxySelector clusteruuid protocolversion = do
|
||||||
nodes <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
|
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
|
||||||
<$> getClusters
|
<$> getClusters
|
||||||
clusternames <- annexClusters <$> Annex.getGitConfig
|
clusternames <- annexClusters <$> Annex.getGitConfig
|
||||||
remotes <- filter (isnode nodes clusternames) <$> remoteList
|
remotes <- filter (isnode nodeuuids clusternames) <$> remoteList
|
||||||
remotesides <- mapM (proxySshRemoteSide protocolversion) remotes
|
nodes <- mapM (proxySshRemoteSide protocolversion) remotes
|
||||||
return $ ProxySelector
|
return $ ProxySelector
|
||||||
{ proxyCHECKPRESENT = nodecontaining remotesides
|
{ proxyCHECKPRESENT = nodecontaining nodes
|
||||||
, proxyGET = nodecontaining remotesides
|
, proxyGET = nodecontaining nodes
|
||||||
-- The key is sent to multiple nodes at the same time,
|
-- The key is sent to multiple nodes at the same time,
|
||||||
-- skipping nodes where it's known/expected to already be
|
-- skipping nodes where it's known/expected to already be
|
||||||
-- present to avoid needing to connect to those.
|
-- present to avoid needing to connect to those, and
|
||||||
, proxyPUT = \k -> do
|
-- skipping nodes where it's not preferred content.
|
||||||
|
, proxyPUT = \af k -> do
|
||||||
locs <- S.fromList <$> loggedLocations k
|
locs <- S.fromList <$> loggedLocations k
|
||||||
let l = filter (flip S.notMember locs . remoteUUID) remotesides
|
let l = filter (flip S.notMember locs . remoteUUID) nodes
|
||||||
return $ if null l
|
l' <- filterM (\n -> isPreferredContent (Just (remoteUUID n)) mempty (Just k) af True) l
|
||||||
then remotesides
|
-- PUT to no nodes doesn't work, so fall
|
||||||
else l
|
-- back to all nodes.
|
||||||
|
return $ nonempty [l', l] nodes
|
||||||
-- Remove the key from every node that contains it.
|
-- Remove the key from every node that contains it.
|
||||||
-- But, since it's possible the location log for some nodes
|
-- But, since it's possible the location log for some nodes
|
||||||
-- could be out of date, actually try to remove from every
|
-- could be out of date, actually try to remove from every
|
||||||
-- node.
|
-- node.
|
||||||
, proxyREMOVE = const (pure remotesides)
|
, proxyREMOVE = const (pure nodes)
|
||||||
-- Content is not locked on the cluster as a whole,
|
-- Content is not locked on the cluster as a whole,
|
||||||
-- instead it can be locked on individual nodes that are
|
-- instead it can be locked on individual nodes that are
|
||||||
-- proxied to the client.
|
-- proxied to the client.
|
||||||
|
@ -90,22 +93,26 @@ clusterProxySelector clusteruuid protocolversion = do
|
||||||
where
|
where
|
||||||
-- Nodes of the cluster have remote.name.annex-cluster-node
|
-- Nodes of the cluster have remote.name.annex-cluster-node
|
||||||
-- containing its name.
|
-- containing its name.
|
||||||
isnode nodes clusternames r =
|
isnode nodeuuids clusternames r =
|
||||||
case remoteAnnexClusterNode (Remote.gitconfig r) of
|
case remoteAnnexClusterNode (Remote.gitconfig r) of
|
||||||
Nothing -> False
|
Nothing -> False
|
||||||
Just names
|
Just names
|
||||||
| any (isclustername clusternames) names ->
|
| any (isclustername clusternames) names ->
|
||||||
flip S.member nodes $
|
flip S.member nodeuuids $
|
||||||
ClusterNodeUUID $ Remote.uuid r
|
ClusterNodeUUID $ Remote.uuid r
|
||||||
| otherwise -> False
|
| otherwise -> False
|
||||||
|
|
||||||
isclustername clusternames name =
|
isclustername clusternames name =
|
||||||
M.lookup name clusternames == Just clusteruuid
|
M.lookup name clusternames == Just clusteruuid
|
||||||
|
|
||||||
nodecontaining remotesides k = do
|
nodecontaining nodes k = do
|
||||||
locs <- S.fromList <$> loggedLocations k
|
locs <- S.fromList <$> loggedLocations k
|
||||||
case filter (flip S.member locs . remoteUUID) remotesides of
|
case filter (flip S.member locs . remoteUUID) nodes of
|
||||||
-- TODO: Avoid always using same remote
|
-- TODO: Avoid always using same remote
|
||||||
(r:_) -> return (Just r)
|
(r:_) -> return (Just r)
|
||||||
[] -> return Nothing
|
[] -> return Nothing
|
||||||
|
|
||||||
|
nonempty (l:ls) fallback
|
||||||
|
| null l = nonempty ls fallback
|
||||||
|
| otherwise = l
|
||||||
|
nonempty [] fallback = fallback
|
||||||
|
|
|
@ -173,12 +173,15 @@ instance Proto.Serializable Service where
|
||||||
-- its serialization cannot contain any whitespace. This is handled
|
-- its serialization cannot contain any whitespace. This is handled
|
||||||
-- by replacing whitespace with '%' (and '%' with '%%')
|
-- by replacing whitespace with '%' (and '%' with '%%')
|
||||||
--
|
--
|
||||||
-- When deserializing an AssociatedFile from a peer, it's sanitized,
|
-- When deserializing an AssociatedFile from a peer, that escaping is
|
||||||
-- to avoid any unusual characters that might cause problems when it's
|
-- reversed. Unfortunately, an input tab will be deescaped to a space
|
||||||
-- displayed to the user.
|
-- though. And it's sanitized, to avoid any control characters that might
|
||||||
|
-- cause problems when it's displayed to the user.
|
||||||
--
|
--
|
||||||
-- These mungings are ok, because a ProtoAssociatedFile is only ever displayed
|
-- These mungings are ok, because a ProtoAssociatedFile is normally
|
||||||
-- to the user and does not need to match a file on disk.
|
-- only displayed to the user and so does not need to match a file on disk.
|
||||||
|
-- It may also be used in checking preferred content, which is very
|
||||||
|
-- unlikely to care about spaces vs tabs or control characters.
|
||||||
instance Proto.Serializable ProtoAssociatedFile where
|
instance Proto.Serializable ProtoAssociatedFile where
|
||||||
serialize (ProtoAssociatedFile (AssociatedFile Nothing)) = ""
|
serialize (ProtoAssociatedFile (AssociatedFile Nothing)) = ""
|
||||||
serialize (ProtoAssociatedFile (AssociatedFile (Just af))) =
|
serialize (ProtoAssociatedFile (AssociatedFile (Just af))) =
|
||||||
|
|
18
P2P/Proxy.hs
18
P2P/Proxy.hs
|
@ -14,6 +14,7 @@ import Annex.Common
|
||||||
import P2P.Protocol
|
import P2P.Protocol
|
||||||
import P2P.IO
|
import P2P.IO
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
|
import Git.FilePath
|
||||||
|
|
||||||
import Data.Either
|
import Data.Either
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
|
@ -63,7 +64,7 @@ data ProxySelector = ProxySelector
|
||||||
, proxyREMOVE :: Key -> Annex [RemoteSide]
|
, proxyREMOVE :: Key -> Annex [RemoteSide]
|
||||||
-- ^ remove from all of these remotes
|
-- ^ remove from all of these remotes
|
||||||
, proxyGET :: Key -> Annex (Maybe RemoteSide)
|
, proxyGET :: Key -> Annex (Maybe RemoteSide)
|
||||||
, proxyPUT :: Key -> Annex [RemoteSide]
|
, proxyPUT :: AssociatedFile -> Key -> Annex [RemoteSide]
|
||||||
-- ^ put to some/all of these remotes
|
-- ^ put to some/all of these remotes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +75,7 @@ singleProxySelector r = ProxySelector
|
||||||
, proxyUNLOCKCONTENT = pure (Just r)
|
, proxyUNLOCKCONTENT = pure (Just r)
|
||||||
, proxyREMOVE = const (pure [r])
|
, proxyREMOVE = const (pure [r])
|
||||||
, proxyGET = const (pure (Just r))
|
, proxyGET = const (pure (Just r))
|
||||||
, proxyPUT = const (pure [r])
|
, proxyPUT = const (const (pure [r]))
|
||||||
}
|
}
|
||||||
|
|
||||||
{- To keep this module limited to P2P protocol actions,
|
{- To keep this module limited to P2P protocol actions,
|
||||||
|
@ -196,8 +197,9 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
|
||||||
protoerrhandler proxynextclientmessage $
|
protoerrhandler proxynextclientmessage $
|
||||||
client $ net $ sendMessage $
|
client $ net $ sendMessage $
|
||||||
ERROR "content not present"
|
ERROR "content not present"
|
||||||
PUT _ k -> do
|
PUT paf k -> do
|
||||||
remotesides <- proxyPUT proxyselector k
|
af <- getassociatedfile paf
|
||||||
|
remotesides <- proxyPUT proxyselector af k
|
||||||
servermodechecker checkPUTServerMode $
|
servermodechecker checkPUTServerMode $
|
||||||
handlePUT remotesides k message
|
handlePUT remotesides k message
|
||||||
-- These messages involve the git repository, not the
|
-- These messages involve the git repository, not the
|
||||||
|
@ -481,3 +483,11 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
|
||||||
| protocolversion < 2 -> SUCCESS
|
| protocolversion < 2 -> SUCCESS
|
||||||
| otherwise -> SUCCESS_PLUS us
|
| otherwise -> SUCCESS_PLUS us
|
||||||
|
|
||||||
|
-- The associated file received from the P2P protocol
|
||||||
|
-- is relative to the top of the git repository. But this process
|
||||||
|
-- may be running with a different cwd.
|
||||||
|
getassociatedfile (ProtoAssociatedFile (AssociatedFile (Just f))) =
|
||||||
|
AssociatedFile . Just
|
||||||
|
<$> fromRepo (fromTopFilePath (asTopFilePath f))
|
||||||
|
getassociatedfile (ProtoAssociatedFile (AssociatedFile Nothing)) =
|
||||||
|
return $ AssociatedFile Nothing
|
||||||
|
|
|
@ -26,9 +26,6 @@ In development on the `proxy` branch.
|
||||||
|
|
||||||
For June's work on [[design/passthrough_proxy]], remaining todos:
|
For June's work on [[design/passthrough_proxy]], remaining todos:
|
||||||
|
|
||||||
* On upload to cluster, send to nodes where its preferred content, and not
|
|
||||||
to other nodes. Unless no nodes prefer it, then what?
|
|
||||||
|
|
||||||
* Getting a key from a cluster currently always selects the lowest cost
|
* Getting a key from a cluster currently always selects the lowest cost
|
||||||
remote, and always the same remote if cost is the same. Should
|
remote, and always the same remote if cost is the same. Should
|
||||||
round-robin amoung remotes, and prefer to avoid using remotes that
|
round-robin amoung remotes, and prefer to avoid using remotes that
|
||||||
|
@ -110,3 +107,6 @@ For June's work on [[design/passthrough_proxy]], remaining todos:
|
||||||
|
|
||||||
* Avoid `git-annex sync --content` etc from operating on cluster nodes by
|
* Avoid `git-annex sync --content` etc from operating on cluster nodes by
|
||||||
default since syncing with a cluster implicitly syncs with its nodes. (done)
|
default since syncing with a cluster implicitly syncs with its nodes. (done)
|
||||||
|
|
||||||
|
* On upload to cluster, send to nodes where its preferred content, and not
|
||||||
|
to other nodes. (done)
|
||||||
|
|
Loading…
Reference in a new issue