Merge commit '71f0dd5a32' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next into for-6.15/io_uring-rx-zc

Merge networking zerocopy receive tree, to get the prep patches for
the io_uring rx zc support.

* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (63 commits)
  net: add helpers for setting a memory provider on an rx queue
  net: page_pool: add memory provider helpers
  net: prepare for non devmem TCP memory providers
  net: page_pool: add a mp hook to unregister_netdevice*
  net: page_pool: add callback for mp info printing
  netdev: add io_uring memory provider info
  net: page_pool: create hooks for custom memory providers
  net: generalise net_iov chunk owners
  net: prefix devmem specific helpers
  net: page_pool: don't cast mp param to devmem
  tools: ynl: add all headers to makefile deps
  eth: fbnic: set IFF_UNICAST_FLT to avoid enabling promiscuous mode when adding unicast addrs
  eth: fbnic: add MAC address TCAM to debugfs
  tools: ynl-gen: support limits using definitions
  tools: ynl-gen: don't output external constants
  net/mlx5e: Avoid WARN_ON when configuring MQPRIO with HTB offload enabled
  net/mlx5e: Remove unused mlx5e_tc_flow_action struct
  net/mlx5: Remove stray semicolon in LAG port selection table creation
  net/mlx5e: Support FEC settings for 200G per lane link modes
  net/mlx5: Add support for 200Gbps per lane link modes
  ...
This commit is contained in:
Jens Axboe 2025-02-17 05:38:28 -07:00
commit 5c496ff11d
85 changed files with 1570 additions and 3359 deletions

View file

@ -244,7 +244,7 @@ information about the interrupt from the irb parameter.
--------------------
The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
devices, like lcs or ctc.
devices, like qeth or ctc.
The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
this attributes creates a ccwgroup device consisting of these ccw devices (if

View file

@ -44,6 +44,9 @@ properties:
phy-mode:
enum:
- rgmii
- rgmii-id
- rgmii-rxid
- rgmii-txid
- rmii
phy-handle: true

View file

@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
# literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
# literal int, const name, or limit based on fixed-width type
# e.g. u8-min, u16-max, etc.
type: [ string, integer ]
pattern: ^[su](8|16|32|64)-(min|max)$
pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs

View file

@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
# literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
# literal int, const name, or limit based on fixed-width type
# e.g. u8-min, u16-max, etc.
type: [ string, integer ]
pattern: ^[su](8|16|32|64)-(min|max)$
pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs

View file

@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
# literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
# literal int, const name, or limit based on fixed-width type
# e.g. u8-min, u16-max, etc.
type: [ string, integer ]
pattern: ^[su](8|16|32|64)-(min|max)$
pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs

View file

@ -114,6 +114,9 @@ attribute-sets:
doc: Bitmask of enabled AF_XDP features.
type: u64
enum: xsk-flags
-
name: io-uring-provider-info
attributes: []
-
name: page-pool
attributes:
@ -171,6 +174,11 @@ attribute-sets:
name: dmabuf
doc: ID of the dmabuf this page-pool is attached to.
type: u32
-
name: io-uring
doc: io-uring memory provider information.
type: nest
nested-attributes: io-uring-provider-info
-
name: page-pool-info
subset-of: page-pool
@ -296,6 +304,11 @@ attribute-sets:
name: dmabuf
doc: ID of the dmabuf attached to this queue, if any.
type: u32
-
name: io-uring
doc: io_uring memory provider information.
type: nest
nested-attributes: io-uring-provider-info
-
name: qstats
@ -572,6 +585,7 @@ operations:
- inflight-mem
- detach-time
- dmabuf
- io-uring
dump:
reply: *pp-reply
config-cond: page-pool
@ -637,6 +651,7 @@ operations:
- napi-id
- ifindex
- dmabuf
- io-uring
dump:
request:
attributes:

View file

@ -54,7 +54,6 @@ enum interruption_class {
IRQIO_C70,
IRQIO_TAP,
IRQIO_VMR,
IRQIO_LCS,
IRQIO_CTC,
IRQIO_ADM,
IRQIO_CSC,

View file

@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
{.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
{.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},

View file

@ -432,9 +432,6 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs)
struct bonding *bond;
struct slave *slave;
if (!bond_dev)
return NULL;
bond = netdev_priv(bond_dev);
if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
return NULL;

View file

@ -226,7 +226,6 @@ struct __packed offload_info {
struct offload_port_info ports;
struct offload_ka_info kas;
struct offload_rr_info rrs;
u8 buf[];
};
struct __packed hw_atl_utils_fw_rpc {

View file

@ -1433,22 +1433,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
}
EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init);
/* Get the octeon id assigned to the octeon device passed as argument.
* This function is exported to other modules.
* @param dev - octeon device pointer passed as a void *.
* @return octeon device id
*/
int lio_get_device_id(void *dev)
{
struct octeon_device *octeon_dev = (struct octeon_device *)dev;
u32 i;
for (i = 0; i < MAX_OCTEON_DEVICES; i++)
if (octeon_device[i] == octeon_dev)
return octeon_dev->octeon_id;
return -1;
}
void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
{
u64 instr_cnt;

View file

@ -705,13 +705,6 @@ octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode,
*/
struct octeon_device *lio_get_device(u32 octeon_id);
/** Get the octeon id assigned to the octeon device passed as argument.
* This function is exported to other modules.
* @param dev - octeon device pointer passed as a void *.
* @return octeon device id
*/
int lio_get_device_id(void *dev);
/** Read windowed register.
* @param oct - pointer to the Octeon device.
* @param addr - Address of the register to read.

View file

@ -1211,9 +1211,6 @@ struct adapter {
struct timer_list flower_stats_timer;
struct work_struct flower_stats_work;
/* Ethtool Dump */
struct ethtool_dump eth_dump;
/* HMA */
struct hma_data hma;
@ -1233,6 +1230,10 @@ struct adapter {
/* Ethtool n-tuple */
struct cxgb4_ethtool_filter *ethtool_filters;
/* Ethtool Dump */
/* Must be last - ends in a flex-array member. */
struct ethtool_dump eth_dump;
};
/* Support for "sched-class" command to allow a TX Scheduling Class to be

View file

@ -526,28 +526,6 @@ out:
return res;
}
u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count)
{
struct mlx4_zone_entry *zone;
int res = 0;
spin_lock(&zones->lock);
zone = __mlx4_find_zone_by_uid(zones, uid);
if (NULL == zone) {
res = -1;
goto out;
}
__mlx4_free_from_zone(zone, obj, count);
out:
spin_unlock(&zones->lock);
return res;
}
u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count)
{
struct mlx4_zone_entry *zone;

View file

@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc);
u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
int align, u32 skip_mask, u32 *puid);
/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator
* <zones>.
*/
u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones,
u32 uid, u32 obj, u32 count);
/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of
* specifying the uid when freeing an object, zone allocator could figure it by
* itself. Other parameters are similar to mlx4_zone_free.

View file

@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
return err;
}
int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
{
struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
struct mlx4_mac_table *table = &info->mac_table;
int i;
for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if (!table->refs[i])
continue;
if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
*idx = i;
return 0;
}
}
return -ENOENT;
}
EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
static bool mlx4_need_mf_bond(struct mlx4_dev *dev)
{
int i, num_eth_ports = 0;

View file

@ -296,11 +296,16 @@ enum mlx5e_fec_supported_link_mode {
MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X,
MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X,
MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X,
MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X,
MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X,
MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X,
MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X,
MLX5E_MAX_FEC_SUPPORTED_LINK_MODE,
};
#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X
#define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X
#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X
#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \
do { \
@ -320,8 +325,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev,
return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE ||
(link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE &&
MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) ||
(link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE &&
MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm));
(link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE &&
MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) ||
(link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE &&
MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm));
}
/* get/set FEC admin field for a given speed */
@ -368,6 +375,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write,
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x);
break;
default:
return -EINVAL;
}
@ -421,6 +440,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap,
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x);
break;
case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x);
break;
default:
return -EINVAL;
}
@ -494,6 +525,26 @@ out:
return 0;
}
static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode,
u16 conf_fec)
{
/* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514.
* For link modes up to 25G per lane, the value is kept.
* For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514.
* For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD.
*/
if (conf_fec != BIT(MLX5E_FEC_RS_528_514))
return conf_fec;
if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE)
return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD);
if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
return BIT(MLX5E_FEC_RS_544_514);
return conf_fec;
}
int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
{
bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
@ -530,14 +581,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
if (!mlx5e_is_fec_supported_link_mode(dev, i))
break;
/* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514
* to link modes up to 25G per lane and to
* MLX5E_FEC_RS_544_514 in the new link modes based on
* 50G or 100G per lane
*/
if (conf_fec == (1 << MLX5E_FEC_RS_528_514) &&
i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
conf_fec = (1 << MLX5E_FEC_RS_544_514);
conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec);
mlx5e_get_fec_cap_field(out, &fec_caps, i);

View file

@ -61,6 +61,7 @@ enum {
MLX5E_FEC_NOFEC,
MLX5E_FEC_FIRECODE,
MLX5E_FEC_RS_528_514,
MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4,
MLX5E_FEC_RS_544_514 = 7,
MLX5E_FEC_LLRS_272_257_1 = 9,
};

View file

@ -326,7 +326,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
sq->clock = &mdev->clock;
sq->clock = mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->priv = c->priv;
@ -696,7 +696,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
rq->pdev = c->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
rq->clock = &mdev->clock;
rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);

View file

@ -73,11 +73,6 @@ struct mlx5e_tc_act {
bool is_terminating_action;
};
struct mlx5e_tc_flow_action {
unsigned int num_entries;
struct flow_action_entry **entries;
};
extern struct mlx5e_tc_act mlx5e_tc_act_drop;
extern struct mlx5e_tc_act mlx5e_tc_act_trap;
extern struct mlx5e_tc_act mlx5e_tc_act_accept;

View file

@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params
rq->pdev = t->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
rq->clock = &mdev->clock;
rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);

View file

@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv)
ts = get_cqe_ts(priv->cqe);
if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev))
return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts);
return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts);
return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts);
return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts);
}
static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv)

View file

@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->netdev = c->netdev;
rq->priv = c->priv;
rq->tstamp = c->tstamp;
rq->clock = &mdev->clock;
rq->clock = mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;

View file

@ -237,6 +237,27 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT,
ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT,
ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext,
ETHTOOL_LINK_MODE_200000baseCR_Full_BIT,
ETHTOOL_LINK_MODE_200000baseKR_Full_BIT,
ETHTOOL_LINK_MODE_200000baseDR_Full_BIT,
ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT,
ETHTOOL_LINK_MODE_200000baseSR_Full_BIT,
ETHTOOL_LINK_MODE_200000baseVR_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext,
ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT,
ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT,
ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT,
ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT,
ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT,
ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext,
ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT,
ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT,
ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT,
ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT,
ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT,
ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT);
}
static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
@ -931,6 +952,7 @@ static const u32 pplm_fec_2_ethtool[] = {
[MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
[MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS,
[MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS,
[MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS,
};
static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)

View file

@ -737,7 +737,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
rq->netdev = c->netdev;
rq->priv = c->priv;
rq->tstamp = c->tstamp;
rq->clock = &mdev->clock;
rq->clock = mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
@ -1614,7 +1614,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
sq->clock = &mdev->clock;
sq->clock = mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->mdev = c->mdev;
@ -3816,8 +3816,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
/* MQPRIO is another toplevel qdisc that can't be attached
* simultaneously with the offloaded HTB.
*/
if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
return -EINVAL;
if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
NL_SET_ERR_MSG_MOD(mqprio->extack,
"MQPRIO cannot be configured when HTB offload is enabled.");
return -EOPNOTSUPP;
}
switch (mqprio->mode) {
case TC_MQPRIO_MODE_DCB:

View file

@ -97,7 +97,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
mlx5_del_flow_rules(lag_definer->rules[idx]);
}
j = ldev->buckets;
};
}
goto destroy_fg;
}
}

View file

@ -43,6 +43,8 @@
#include <linux/cpufeature.h>
#endif /* CONFIG_X86 */
#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity)
enum {
MLX5_PIN_MODE_IN = 0x0,
MLX5_PIN_MODE_OUT = 0x1,
@ -77,6 +79,56 @@ enum {
MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000,
};
struct mlx5_clock_dev_state {
struct mlx5_core_dev *mdev;
struct mlx5_devcom_comp_dev *compdev;
struct mlx5_nb pps_nb;
struct work_struct out_work;
};
struct mlx5_clock_priv {
struct mlx5_clock clock;
struct mlx5_core_dev *mdev;
struct mutex lock; /* protect mdev and used in PTP callbacks */
struct mlx5_core_dev *event_mdev;
};
static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock)
{
return container_of(clock, struct mlx5_clock_priv, clock);
}
static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock)
{
if (!clock->shared)
return;
lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock));
}
static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock)
{
mlx5_clock_lockdep_assert(clock);
return clock_priv(clock)->mdev;
}
static void mlx5_clock_lock(struct mlx5_clock *clock)
{
if (!clock->shared)
return;
mutex_lock(&clock_priv(clock)->lock);
}
static void mlx5_clock_unlock(struct mlx5_clock *clock)
{
if (!clock->shared)
return;
mutex_unlock(&clock_priv(clock)->lock);
}
static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
{
return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
}
static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev,
u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE])
{
u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {};
int err;
err = mlx5_core_access_reg(mdev, in, sizeof(in),
out, sizeof(out), MLX5_REG_MRTCQ, 0, 0);
if (!err)
memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity),
MLX5_RT_CLOCK_IDENTITY_SIZE);
return err;
}
static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
{
/* Optimal shift constant leads to corrections above just 1 scaled ppm.
@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz));
}
static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp)
static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
mdev = container_of(clock, struct mlx5_core_dev, clock);
return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ?
MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX :
MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX;
}
static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
s32 ret;
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
ret = mlx5_clock_getmaxphase(mdev);
mlx5_clock_unlock(clock);
return ret;
}
static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta)
{
s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info);
s64 max = mlx5_clock_getmaxphase(mdev);
if (delta < -max || delta > max)
return false;
@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time,
if (real_time_mode)
*device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX));
else
*device_time = mlx5_timecounter_cyc2time(&mdev->clock, device);
*device_time = mlx5_timecounter_cyc2time(mdev->clock, device);
return 0;
}
@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp,
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct system_time_snapshot history_begin = {0};
struct mlx5_core_dev *mdev;
int err;
mdev = container_of(clock, struct mlx5_core_dev, clock);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
if (!mlx5_is_ptm_source_time_available(mdev))
return -EBUSY;
if (!mlx5_is_ptm_source_time_available(mdev)) {
err = -EBUSY;
goto unlock;
}
ktime_get_snapshot(&history_begin);
return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
&history_begin, cts);
err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
&history_begin, cts);
unlock:
mlx5_clock_unlock(clock);
return err;
}
#endif /* CONFIG_X86 */
@ -263,8 +347,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
{
struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles);
struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer);
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock);
return mlx5_read_time(mdev, NULL, false) & cc->mask;
}
@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer;
u32 sign;
@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
static void mlx5_pps_out(struct work_struct *work)
{
struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
out_work);
struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
pps_info);
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state,
out_work);
struct mlx5_core_dev *mdev = clock_state->mdev;
struct mlx5_clock *clock = mdev->clock;
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
unsigned long flags;
int i;
@ -330,7 +411,8 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
unsigned long flags;
clock = container_of(ptp_info, struct mlx5_clock, ptp_info);
mdev = container_of(clock, struct mlx5_core_dev, clock);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
timer = &clock->timer;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
@ -342,6 +424,7 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
write_sequnlock_irqrestore(&clock->lock, flags);
out:
mlx5_clock_unlock(clock);
return timer->overflow_period;
}
@ -361,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
return mlx5_set_mtutc(mdev, in, sizeof(in));
}
static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock,
const struct timespec64 *ts)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
mdev = container_of(clock, struct mlx5_core_dev, clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
int err = mlx5_ptp_settime_real_time(mdev, ts);
@ -385,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64
return 0;
}
static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
int err;
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
err = mlx5_clock_settime(mdev, clock, ts);
mlx5_clock_unlock(clock);
return err;
}
static
struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev,
struct ptp_system_timestamp *sts)
@ -404,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct mlx5_core_dev *mdev;
u64 cycles, ns;
mdev = container_of(clock, struct mlx5_core_dev, clock);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
if (mlx5_real_time_mode(mdev)) {
*ts = mlx5_ptp_gettimex_real_time(mdev, sts);
goto out;
@ -414,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
ns = mlx5_timecounter_cyc2time(clock, cycles);
*ts = ns_to_timespec64(ns);
out:
mlx5_clock_unlock(clock);
return 0;
}
@ -444,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
int err = 0;
mdev = container_of(clock, struct mlx5_core_dev, clock);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
int err = mlx5_ptp_adjtime_real_time(mdev, delta);
err = mlx5_ptp_adjtime_real_time(mdev, delta);
if (err)
return err;
goto unlock;
}
write_seqlock_irqsave(&clock->lock, flags);
@ -459,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
unlock:
mlx5_clock_unlock(clock);
return err;
}
static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
int err;
mdev = container_of(clock, struct mlx5_core_dev, clock);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
err = mlx5_ptp_adjtime_real_time(mdev, delta);
mlx5_clock_unlock(clock);
return mlx5_ptp_adjtime_real_time(mdev, delta);
return err;
}
static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
@ -498,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
int err = 0;
u32 mult;
mdev = container_of(clock, struct mlx5_core_dev, clock);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
if (err)
return err;
goto unlock;
}
mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm);
@ -518,7 +624,9 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
write_sequnlock_irqrestore(&clock->lock, flags);
ptp_schedule_worker(clock->ptp, timer->overflow_period);
return 0;
unlock:
mlx5_clock_unlock(clock);
return err;
}
static int mlx5_extts_configure(struct ptp_clock_info *ptp,
@ -527,18 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
{
struct mlx5_clock *clock =
container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev =
container_of(clock, struct mlx5_core_dev, clock);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
struct mlx5_core_dev *mdev;
u32 field_select = 0;
u8 pin_mode = 0;
u8 pattern = 0;
int pin = -1;
int err = 0;
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
/* Reject requests with unsupported flags */
if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
PTP_RISING_EDGE |
@ -569,6 +673,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
field_select = MLX5_MTPPS_FS_ENABLE;
}
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
if (!MLX5_PPS_CAP(mdev)) {
err = -EOPNOTSUPP;
goto unlock;
}
MLX5_SET(mtpps_reg, in, pin, pin);
MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
MLX5_SET(mtpps_reg, in, pattern, pattern);
@ -577,15 +689,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
return err;
goto unlock;
return mlx5_set_mtppse(mdev, pin, 0,
MLX5_EVENT_MODE_REPETETIVE & on);
err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on);
if (err)
goto unlock;
clock->pps_info.pin_armed[pin] = on;
clock_priv(clock)->event_mdev = mdev;
unlock:
mlx5_clock_unlock(clock);
return err;
}
static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns)
{
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
u64 cycles_now, cycles_delta;
u64 nsec_now, nsec_delta;
struct mlx5_timer *timer;
@ -644,7 +764,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
struct ptp_clock_request *rq,
u32 *out_pulse_duration_ns)
{
struct mlx5_pps *pps_info = &mdev->clock.pps_info;
struct mlx5_pps *pps_info = &mdev->clock->pps_info;
u32 out_pulse_duration;
struct timespec64 ts;
@ -677,7 +797,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo
u32 *field_select, u32 *out_pulse_duration_ns,
u64 *period, u64 *time_stamp)
{
struct mlx5_pps *pps_info = &mdev->clock.pps_info;
struct mlx5_pps *pps_info = &mdev->clock->pps_info;
struct ptp_clock_time *time = &rq->perout.start;
struct timespec64 ts;
@ -712,26 +832,18 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
{
struct mlx5_clock *clock =
container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev =
container_of(clock, struct mlx5_core_dev, clock);
bool rt_mode = mlx5_real_time_mode(mdev);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
u32 out_pulse_duration_ns = 0;
struct mlx5_core_dev *mdev;
u32 field_select = 0;
u64 npps_period = 0;
u64 time_stamp = 0;
u8 pin_mode = 0;
u8 pattern = 0;
bool rt_mode;
int pin = -1;
int err = 0;
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
/* Reject requests with unsupported flags */
if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
return -EOPNOTSUPP;
if (rq->perout.index >= clock->ptp_info.n_pins)
return -EINVAL;
@ -740,14 +852,29 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
if (pin < 0)
return -EBUSY;
if (on) {
bool rt_mode = mlx5_real_time_mode(mdev);
mlx5_clock_lock(clock);
mdev = mlx5_clock_mdev_get(clock);
rt_mode = mlx5_real_time_mode(mdev);
if (!MLX5_PPS_CAP(mdev)) {
err = -EOPNOTSUPP;
goto unlock;
}
/* Reject requests with unsupported flags */
if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) {
err = -EOPNOTSUPP;
goto unlock;
}
if (on) {
pin_mode = MLX5_PIN_MODE_OUT;
pattern = MLX5_OUT_PATTERN_PERIODIC;
if (rt_mode && rq->perout.start.sec > U32_MAX)
return -EINVAL;
if (rt_mode && rq->perout.start.sec > U32_MAX) {
err = -EINVAL;
goto unlock;
}
field_select |= MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
@ -760,7 +887,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
else
err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
if (err)
return err;
goto unlock;
}
MLX5_SET(mtpps_reg, in, pin, pin);
@ -773,13 +900,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
return err;
goto unlock;
if (rt_mode)
return 0;
goto unlock;
return mlx5_set_mtppse(mdev, pin, 0,
MLX5_EVENT_MODE_REPETETIVE & on);
err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on);
unlock:
mlx5_clock_unlock(clock);
return err;
}
static int mlx5_pps_configure(struct ptp_clock_info *ptp,
@ -866,10 +996,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
mtpps_size, MLX5_REG_MTPPS, 0, 0);
}
static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin)
{
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
u8 mode;
int err;
@ -888,8 +1016,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
return PTP_PF_NONE;
}
static void mlx5_init_pin_config(struct mlx5_clock *clock)
static void mlx5_init_pin_config(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = mdev->clock;
int i;
if (!clock->ptp_info.n_pins)
@ -910,15 +1039,15 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock)
sizeof(clock->ptp_info.pin_config[i].name),
"mlx5_pps%d", i);
clock->ptp_info.pin_config[i].index = i;
clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i);
clock->ptp_info.pin_config[i].chan = 0;
}
}
static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
struct mlx5_clock *clock = mdev->clock;
mlx5_query_mtpps(mdev, out, sizeof(out));
@ -968,16 +1097,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev,
static int mlx5_pps_event(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state,
pps_nb);
struct mlx5_core_dev *mdev = clock_state->mdev;
struct mlx5_clock *clock = mdev->clock;
struct ptp_clock_event ptp_event;
struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
struct mlx5_core_dev *mdev;
unsigned long flags;
u64 ns;
mdev = container_of(clock, struct mlx5_core_dev, clock);
switch (clock->ptp_info.pin_config[pin].func) {
case PTP_PF_EXTTS:
ptp_event.index = pin;
@ -997,11 +1126,15 @@ static int mlx5_pps_event(struct notifier_block *nb,
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
if (clock->shared) {
mlx5_core_warn(mdev, " Received unexpected PPS out event\n");
break;
}
ns = perout_conf_next_event_timer(mdev, clock);
write_seqlock_irqsave(&clock->lock, flags);
clock->pps_info.start[pin] = ns;
write_sequnlock_irqrestore(&clock->lock, flags);
schedule_work(&clock->pps_info.out_work);
schedule_work(&clock_state->out_work);
break;
default:
mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
@ -1013,7 +1146,7 @@ static int mlx5_pps_event(struct notifier_block *nb,
static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer = &clock->timer;
u32 dev_freq;
@ -1029,10 +1162,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
}
static void mlx5_init_overflow_period(struct mlx5_clock *clock)
static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev)
{
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer = &clock->timer;
u64 overflow_cycles;
u64 frac = 0;
@ -1065,7 +1198,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock)
static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
struct mlx5_ib_clock_info *info;
struct mlx5_timer *timer;
@ -1088,7 +1221,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
u8 log_max_freq_adjustment = 0;
@ -1107,7 +1240,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
/* Configure the PHC */
clock->ptp_info = mlx5_ptp_clock_info;
@ -1123,38 +1256,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
mlx5_timecounter_init(mdev);
mlx5_init_clock_info(mdev);
mlx5_init_overflow_period(clock);
mlx5_init_overflow_period(mdev);
if (mlx5_real_time_mode(mdev)) {
struct timespec64 ts;
ktime_get_real_ts64(&ts);
mlx5_ptp_settime(&clock->ptp_info, &ts);
mlx5_clock_settime(mdev, clock, &ts);
}
}
static void mlx5_init_pps(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
if (!MLX5_PPS_CAP(mdev))
return;
mlx5_get_pps_caps(mdev);
mlx5_init_pin_config(clock);
mlx5_init_pin_config(mdev);
}
void mlx5_init_clock(struct mlx5_core_dev *mdev)
static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
return;
}
struct mlx5_clock *clock = mdev->clock;
seqlock_init(&clock->lock);
INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
/* Initialize the device clock */
mlx5_init_timer_clock(mdev);
@ -1163,35 +1288,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
mlx5_init_pps(mdev);
clock->ptp = ptp_clock_register(&clock->ptp_info,
&mdev->pdev->dev);
clock->shared ? NULL : &mdev->pdev->dev);
if (IS_ERR(clock->ptp)) {
mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n",
clock->shared ? "shared clock " : "",
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
mlx5_eq_notifier_register(mdev, &clock->pps_nb);
if (clock->ptp)
ptp_schedule_worker(clock->ptp, 0);
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
struct mlx5_clock *clock = mdev->clock;
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
}
cancel_work_sync(&clock->pps_info.out_work);
if (mdev->clock_info) {
free_page((unsigned long)mdev->clock_info);
mdev->clock_info = NULL;
@ -1199,3 +1316,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
kfree(clock->ptp_info.pin_config);
}
static void mlx5_clock_free(struct mlx5_core_dev *mdev)
{
struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock);
mlx5_destroy_clock_dev(mdev);
mutex_destroy(&cpriv->lock);
kfree(cpriv);
mdev->clock = NULL;
}
static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared)
{
struct mlx5_clock_priv *cpriv;
struct mlx5_clock *clock;
cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL);
if (!cpriv)
return -ENOMEM;
mutex_init(&cpriv->lock);
cpriv->mdev = mdev;
clock = &cpriv->clock;
clock->shared = shared;
mdev->clock = clock;
mlx5_clock_lock(clock);
mlx5_init_clock_dev(mdev);
mlx5_clock_unlock(clock);
if (!clock->shared)
return 0;
if (!clock->ptp) {
mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions");
mlx5_clock_free(mdev);
return -EINVAL;
}
return 0;
}
static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key)
{
struct mlx5_core_dev *peer_dev, *next = NULL;
struct mlx5_devcom_comp_dev *pos;
mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc,
MLX5_DEVCOM_SHARED_CLOCK,
key, NULL, mdev);
if (IS_ERR(mdev->clock_state->compdev))
return;
mlx5_devcom_comp_lock(mdev->clock_state->compdev);
mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
if (peer_dev->clock) {
next = peer_dev;
break;
}
}
if (next) {
mdev->clock = next->clock;
/* clock info is shared among all the functions using the same clock */
mdev->clock_info = next->clock_info;
} else {
mlx5_clock_alloc(mdev, true);
}
mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
if (!mdev->clock) {
mlx5_devcom_unregister_component(mdev->clock_state->compdev);
mdev->clock_state->compdev = NULL;
}
}
static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev)
{
struct mlx5_core_dev *peer_dev, *next = NULL;
struct mlx5_clock *clock = mdev->clock;
struct mlx5_devcom_comp_dev *pos;
mlx5_devcom_comp_lock(mdev->clock_state->compdev);
mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
if (peer_dev->clock && peer_dev != mdev) {
next = peer_dev;
break;
}
}
if (next) {
struct mlx5_clock_priv *cpriv = clock_priv(clock);
mlx5_clock_lock(clock);
if (mdev == cpriv->mdev)
cpriv->mdev = next;
mlx5_clock_unlock(clock);
} else {
mlx5_clock_free(mdev);
}
mdev->clock = NULL;
mdev->clock_info = NULL;
mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
mlx5_devcom_unregister_component(mdev->clock_state->compdev);
}
static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock,
struct mlx5_core_dev *new_mdev,
struct mlx5_core_dev *old_mdev)
{
struct ptp_clock_info *ptp_info = &clock->ptp_info;
struct mlx5_clock_priv *cpriv = clock_priv(clock);
int i;
for (i = 0; i < ptp_info->n_pins; i++) {
if (ptp_info->pin_config[i].func != PTP_PF_EXTTS ||
!clock->pps_info.pin_armed[i])
continue;
if (new_mdev) {
mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE);
cpriv->event_mdev = new_mdev;
} else {
cpriv->event_mdev = NULL;
}
if (old_mdev)
mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE);
}
}
void mlx5_clock_load(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = mdev->clock;
struct mlx5_clock_priv *cpriv;
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out);
MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT);
mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb);
if (!clock->shared) {
mlx5_clock_arm_pps_in_event(clock, mdev, NULL);
return;
}
cpriv = clock_priv(clock);
mlx5_devcom_comp_lock(mdev->clock_state->compdev);
mlx5_clock_lock(clock);
if (mdev == cpriv->mdev && mdev != cpriv->event_mdev)
mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev);
mlx5_clock_unlock(clock);
mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
}
void mlx5_clock_unload(struct mlx5_core_dev *mdev)
{
struct mlx5_core_dev *peer_dev, *next = NULL;
struct mlx5_clock *clock = mdev->clock;
struct mlx5_devcom_comp_dev *pos;
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
if (!clock->shared) {
mlx5_clock_arm_pps_in_event(clock, NULL, mdev);
goto out;
}
mlx5_devcom_comp_lock(mdev->clock_state->compdev);
mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
if (peer_dev->clock && peer_dev != mdev) {
next = peer_dev;
break;
}
}
mlx5_clock_lock(clock);
if (mdev == clock_priv(clock)->event_mdev)
mlx5_clock_arm_pps_in_event(clock, next, mdev);
mlx5_clock_unlock(clock);
mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
out:
mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb);
cancel_work_sync(&mdev->clock_state->out_work);
}
static struct mlx5_clock null_clock;
int mlx5_init_clock(struct mlx5_core_dev *mdev)
{
u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE];
struct mlx5_clock_dev_state *clock_state;
u64 key;
int err;
if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
mdev->clock = &null_clock;
mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
return 0;
}
clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL);
if (!clock_state)
return -ENOMEM;
clock_state->mdev = mdev;
mdev->clock_state = clock_state;
if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) {
if (mlx5_clock_identity_get(mdev, identity)) {
mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n");
} else {
memcpy(&key, &identity, sizeof(key));
mlx5_shared_clock_register(mdev, key);
}
}
if (!mdev->clock) {
err = mlx5_clock_alloc(mdev, false);
if (err) {
kfree(clock_state);
mdev->clock_state = NULL;
return err;
}
}
return 0;
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
{
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
if (mdev->clock->shared)
mlx5_shared_clock_unregister(mdev);
else
mlx5_clock_free(mdev);
kfree(mdev->clock_state);
mdev->clock_state = NULL;
}

View file

@ -33,6 +33,35 @@
#ifndef __LIB_CLOCK_H__
#define __LIB_CLOCK_H__
#include <linux/ptp_clock_kernel.h>
#define MAX_PIN_NUM 8
struct mlx5_pps {
u8 pin_caps[MAX_PIN_NUM];
u64 start[MAX_PIN_NUM];
u8 enabled;
u64 min_npps_period;
u64 min_out_pulse_duration_ns;
bool pin_armed[MAX_PIN_NUM];
};
struct mlx5_timer {
struct cyclecounter cycles;
struct timecounter tc;
u32 nominal_c_mult;
unsigned long overflow_period;
};
struct mlx5_clock {
seqlock_t lock;
struct hwtstamp_config hwtstamp_config;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
struct mlx5_pps pps_info;
struct mlx5_timer timer;
bool shared;
};
static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev)
{
u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format);
@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev)
typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64);
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void mlx5_init_clock(struct mlx5_core_dev *mdev);
int mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
void mlx5_clock_load(struct mlx5_core_dev *mdev);
void mlx5_clock_unload(struct mlx5_core_dev *mdev);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1;
}
static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
return ns_to_ktime(time);
}
#else
static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; }
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {}
static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {}
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;

View file

@ -11,6 +11,7 @@ enum mlx5_devcom_component {
MLX5_DEVCOM_MPV,
MLX5_DEVCOM_HCA_PORTS,
MLX5_DEVCOM_SD_GROUP,
MLX5_DEVCOM_SHARED_CLOCK,
MLX5_DEVCOM_NUM_COMPONENTS,
};

View file

@ -1038,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
mlx5_init_reserved_gids(dev);
mlx5_init_clock(dev);
err = mlx5_init_clock(dev);
if (err) {
mlx5_core_err(dev, "failed to initialize hardware clock\n");
goto err_tables_cleanup;
}
dev->vxlan = mlx5_vxlan_create(dev);
dev->geneve = mlx5_geneve_create(dev);
@ -1046,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
err = mlx5_init_rl_table(dev);
if (err) {
mlx5_core_err(dev, "Failed to init rate limiting\n");
goto err_tables_cleanup;
goto err_clock_cleanup;
}
err = mlx5_mpfs_init(dev);
@ -1123,10 +1127,11 @@ err_mpfs_cleanup:
mlx5_mpfs_cleanup(dev);
err_rl_cleanup:
mlx5_cleanup_rl_table(dev);
err_tables_cleanup:
err_clock_cleanup:
mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_clock(dev);
err_tables_cleanup:
mlx5_cleanup_reserved_gids(dev);
mlx5_cq_debugfs_cleanup(dev);
mlx5_fw_reset_cleanup(dev);
@ -1359,6 +1364,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_eq_table;
}
mlx5_clock_load(dev);
err = mlx5_fw_tracer_init(dev->tracer);
if (err) {
mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
@ -1442,6 +1449,7 @@ err_fpga_start:
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_clock_unload(dev);
mlx5_eq_table_destroy(dev);
err_eq_table:
mlx5_irq_table_destroy(dev);
@ -1468,6 +1476,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_clock_unload(dev);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
mlx5_pagealloc_stop(dev);

View file

@ -1105,6 +1105,9 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
[MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
[MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
[MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000,
[MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000,
[MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000,
[MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000,
};
int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,

View file

@ -516,30 +516,6 @@ def_xa_destroy:
return NULL;
}
/* Assure synchronization of the device steering tables with updates made by SW
* insertion.
*/
int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
{
int ret = 0;
if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
mlx5dr_domain_lock(dmn);
ret = mlx5dr_send_ring_force_drain(dmn);
mlx5dr_domain_unlock(dmn);
if (ret) {
mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
flags, ret);
return ret;
}
}
if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
ret = mlx5dr_cmd_sync_steering(dmn->mdev);
return ret;
}
int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
{
if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1))

View file

@ -1331,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
kfree(send_ring->sync_buff);
kfree(send_ring);
}
int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
{
struct mlx5dr_send_ring *send_ring = dmn->send_ring;
struct postsend_info send_info = {};
u8 data[DR_STE_SIZE];
int num_of_sends_req;
int ret;
int i;
/* Sending this amount of requests makes sure we will get drain */
num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
/* Send fake requests forcing the last to be signaled */
send_info.write.addr = (uintptr_t)data;
send_info.write.length = DR_STE_SIZE;
send_info.write.lkey = 0;
/* Using the sync_mr in order to write/read */
send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
send_info.rkey = send_ring->sync_mr->mkey;
for (i = 0; i < num_of_sends_req; i++) {
ret = dr_postsend_icm_data(dmn, &send_info);
if (ret)
return ret;
}
spin_lock(&send_ring->lock);
ret = dr_handle_pending_wc(dmn, send_ring);
spin_unlock(&send_ring->lock);
return ret;
}

View file

@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring {
int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
struct mlx5dr_send_ring *send_ring);
int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn);
int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn,
struct mlx5dr_ste *ste,
u8 *data,

View file

@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn,
u16 peer_vhca_id);

View file

@ -754,9 +754,6 @@ void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev);
bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev);
u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,

View file

@ -8184,41 +8184,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
return NULL;
}
bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
struct mlxsw_sp_rif *rif;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
mutex_unlock(&mlxsw_sp->router->lock);
return rif;
}
u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
{
struct mlxsw_sp_rif *rif;
u16 vid = 0;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!rif)
goto out;
/* We only return the VID for VLAN RIFs. Otherwise we return an
* invalid value (0).
*/
if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
goto out;
vid = mlxsw_sp_fid_8021q_vid(rif->fid);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return vid;
}
static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
{
char ritr_pl[MLXSW_REG_RITR_LEN];
@ -8417,19 +8382,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
return lb_rif->common.rif_index;
}
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
{
struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
struct mlxsw_sp_vr *ul_vr;
ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
if (WARN_ON(IS_ERR(ul_vr)))
return 0;
return ul_vr->id;
}
u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
{
return lb_rif->ul_rif_id;

View file

@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif);
u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);

View file

@ -10,6 +10,40 @@
static struct dentry *fbnic_dbg_root;
static void fbnic_dbg_desc_break(struct seq_file *s, int i)
{
while (i--)
seq_putc(s, '-');
seq_putc(s, '\n');
}
static int fbnic_dbg_mac_addr_show(struct seq_file *s, void *v)
{
struct fbnic_dev *fbd = s->private;
char hdr[80];
int i;
/* Generate Header */
snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n",
"Idx", "S", "TCAM Bitmap", "Addr/Mask");
seq_puts(s, hdr);
fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr)));
for (i = 0; i < FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES; i++) {
struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i];
seq_printf(s, "%02d %d %64pb %pm\n",
i, mac_addr->state, mac_addr->act_tcam,
mac_addr->value.addr8);
seq_printf(s, " %pm\n",
mac_addr->mask.addr8);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_mac_addr);
static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v)
{
struct fbnic_dev *fbd = s->private;
@ -48,6 +82,8 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd)
fbd->dbg_fbd = debugfs_create_dir(name, fbnic_dbg_root);
debugfs_create_file("pcie_stats", 0400, fbd->dbg_fbd, fbd,
&fbnic_dbg_pcie_stats_fops);
debugfs_create_file("mac_addr", 0400, fbd->dbg_fbd, fbd,
&fbnic_dbg_mac_addr_fops);
}
void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd)

View file

@ -628,6 +628,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
fbnic_rss_key_fill(fbn->rss_key);
fbnic_rss_init_en_mask(fbn);
netdev->priv_flags |= IFF_UNICAST_FLT;
netdev->features |=
NETIF_F_RXHASH |
NETIF_F_SG |

View file

@ -3033,7 +3033,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn,
u16 length;
int rc;
/* Valiate PF can send such a request */
/* Validate PF can send such a request */
if (!vf->vport_instance) {
DP_VERBOSE(p_hwfn,
QED_MSG_IOV,
@ -3312,7 +3312,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn,
goto out;
}
/* Determine if the unicast filtering is acceptible by PF */
/* Determine if the unicast filtering is acceptable by PF */
if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) &&
(params.type == QED_FILTER_VLAN ||
params.type == QED_FILTER_MAC_VLAN)) {
@ -3729,7 +3729,7 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn,
rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, p_vf);
if (rc) {
DP_ERR(p_hwfn, "Failed to re-enable VF[%d] acces\n",
DP_ERR(p_hwfn, "Failed to re-enable VF[%d] access\n",
vfid);
return rc;
}
@ -4480,7 +4480,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
/* Failure to acquire the ptt in 100g creates an odd error
* where the first engine has already relased IOV.
* where the first engine has already released IOV.
*/
if (!ptt) {
DP_ERR(hwfn, "Failed to acquire ptt\n");

View file

@ -114,7 +114,8 @@ config R8169
will be called r8169. This is recommended.
config R8169_LEDS
def_bool R8169 && LEDS_TRIGGER_NETDEV
bool "Support for controlling the NIC LEDs"
depends on R8169 && LEDS_TRIGGER_NETDEV
depends on !(R8169=y && LEDS_CLASS=m)
help
Optional support for controlling the NIC LED's with the netdev

View file

@ -5222,6 +5222,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
new_bus->priv = tp;
new_bus->parent = &pdev->dev;
new_bus->irq[0] = PHY_MAC_INTERRUPT;
new_bus->phy_mask = GENMASK(31, 1);
snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x",
pci_domain_nr(pdev->bus), pci_dev_id(pdev));

View file

@ -13,7 +13,7 @@
*/
const char *phy_speed_to_str(int speed)
{
BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103,
BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 121,
"Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
"If a speed or mode has been added please update phy_speed_to_str "
"and the PHY settings array.\n");
@ -169,6 +169,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ),
PHY_SETTING( 800000, FULL, 800000baseSR8_Full ),
PHY_SETTING( 800000, FULL, 800000baseVR8_Full ),
PHY_SETTING( 800000, FULL, 800000baseCR4_Full ),
PHY_SETTING( 800000, FULL, 800000baseKR4_Full ),
PHY_SETTING( 800000, FULL, 800000baseDR4_Full ),
PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ),
PHY_SETTING( 800000, FULL, 800000baseSR4_Full ),
PHY_SETTING( 800000, FULL, 800000baseVR4_Full ),
/* 400G */
PHY_SETTING( 400000, FULL, 400000baseCR8_Full ),
PHY_SETTING( 400000, FULL, 400000baseKR8_Full ),
@ -180,6 +186,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseDR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseSR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseCR2_Full ),
PHY_SETTING( 400000, FULL, 400000baseKR2_Full ),
PHY_SETTING( 400000, FULL, 400000baseDR2_Full ),
PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ),
PHY_SETTING( 400000, FULL, 400000baseSR2_Full ),
PHY_SETTING( 400000, FULL, 400000baseVR2_Full ),
/* 200G */
PHY_SETTING( 200000, FULL, 200000baseCR4_Full ),
PHY_SETTING( 200000, FULL, 200000baseKR4_Full ),
@ -191,6 +203,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseDR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseSR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseCR_Full ),
PHY_SETTING( 200000, FULL, 200000baseKR_Full ),
PHY_SETTING( 200000, FULL, 200000baseDR_Full ),
PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ),
PHY_SETTING( 200000, FULL, 200000baseSR_Full ),
PHY_SETTING( 200000, FULL, 200000baseVR_Full ),
/* 100G */
PHY_SETTING( 100000, FULL, 100000baseCR4_Full ),
PHY_SETTING( 100000, FULL, 100000baseKR4_Full ),

View file

@ -4,8 +4,12 @@ config REALTEK_PHY
help
Currently supports RTL821x/RTL822x and fast ethernet PHYs
if REALTEK_PHY
config REALTEK_PHY_HWMON
def_bool REALTEK_PHY && HWMON
depends on !(REALTEK_PHY=y && HWMON=m)
bool "HWMON support for Realtek PHYs"
depends on HWMON && !(REALTEK_PHY=y && HWMON=m)
help
Optional hwmon support for the temperature sensor
endif # REALTEK_PHY

View file

@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/clk.h>
#include <linux/string_choices.h>
#include "realtek.h"
@ -422,11 +423,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
} else if (ret) {
dev_dbg(dev,
"%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n",
val_txdly ? "Enabling" : "Disabling");
str_enable_disable(val_txdly));
} else {
dev_dbg(dev,
"2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n",
val_txdly ? "enabled" : "disabled");
str_enabled_disabled(val_txdly));
}
ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY,
@ -437,11 +438,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
} else if (ret) {
dev_dbg(dev,
"%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n",
val_rxdly ? "Enabling" : "Disabling");
str_enable_disable(val_rxdly));
} else {
dev_dbg(dev,
"2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n",
val_rxdly ? "enabled" : "disabled");
str_enabled_disabled(val_rxdly));
}
if (priv->has_phycr2) {

View file

@ -227,9 +227,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
be32_to_cpu(fdb->vni)))
goto nla_put_failure;
ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used));
ci.ndm_confirmed = 0;
ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated));
ci.ndm_refcnt = 0;
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
@ -434,8 +434,12 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
f = __vxlan_find_mac(vxlan, mac, vni);
if (f && f->used != jiffies)
f->used = jiffies;
if (f) {
unsigned long now = jiffies;
if (READ_ONCE(f->used) != now)
WRITE_ONCE(f->used, now);
}
return f;
}
@ -1009,12 +1013,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
!(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
if (f->state != state) {
f->state = state;
f->updated = jiffies;
notify = 1;
}
if (f->flags != fdb_flags) {
f->flags = fdb_flags;
f->updated = jiffies;
notify = 1;
}
}
@ -1048,12 +1050,13 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
}
if (ndm_flags & NTF_USE)
f->used = jiffies;
WRITE_ONCE(f->updated, jiffies);
if (notify) {
if (rd == NULL)
rd = first_remote_rtnl(f);
WRITE_ONCE(f->updated, jiffies);
err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
swdev_notify, extack);
if (err)
@ -1292,7 +1295,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
int err = -ENOENT;
f = vxlan_find_mac(vxlan, addr, src_vni);
f = __vxlan_find_mac(vxlan, addr, src_vni);
if (!f)
return err;
@ -1459,9 +1462,13 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
ifindex = src_ifindex;
#endif
f = vxlan_find_mac(vxlan, src_mac, vni);
f = __vxlan_find_mac(vxlan, src_mac, vni);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
unsigned long now = jiffies;
if (READ_ONCE(f->updated) != now)
WRITE_ONCE(f->updated, now);
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
rdst->remote_ifindex == ifindex))
@ -1481,7 +1488,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
src_mac, &rdst->remote_ip.sa, &src_ip->sa);
rdst->remote_ip = *src_ip;
f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
@ -2852,7 +2858,7 @@ static void vxlan_cleanup(struct timer_list *t)
if (f->flags & NTF_EXT_LEARNED)
continue;
timeout = f->used + vxlan->cfg.age_interval * HZ;
timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ;
if (time_before_eq(timeout, jiffies)) {
netdev_dbg(vxlan->dev,
"garbage collect %pM\n",
@ -4768,7 +4774,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev,
spin_lock_bh(&vxlan->hash_lock[hash_index]);
f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
goto out;
@ -4824,7 +4830,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev,
hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
spin_lock_bh(&vxlan->hash_lock[hash_index]);
f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
err = -ENOENT;
else if (f->flags & NTF_EXT_LEARNED)

View file

@ -2,15 +2,6 @@
menu "S/390 network device drivers"
depends on NETDEVICES && S390
config LCS
def_tristate m
prompt "Lan Channel Station Interface"
depends on CCW && NETDEVICES && ETHERNET
help
Select this option if you want to use LCS networking on IBM System z.
To compile as a module, choose M. The module name is lcs.
If you do not use LCS, choose N.
config CTCM
def_tristate m
prompt "CTC and MPC SNA device support"
@ -98,7 +89,7 @@ config QETH_OSX
config CCWGROUP
tristate
default (LCS || CTCM || QETH || SMC)
default (CTCM || QETH || SMC)
config ISM
tristate "Support for ISM vPCI Adapter"

View file

@ -8,7 +8,6 @@ obj-$(CONFIG_CTCM) += ctcm.o fsm.o
obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o
obj-$(CONFIG_SMSGIUCV) += smsgiucv.o
obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o
obj-$(CONFIG_LCS) += lcs.o
qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o
obj-$(CONFIG_QETH) += qeth.o
qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o

File diff suppressed because it is too large Load diff

View file

@ -1,342 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*lcs.h*/
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/workqueue.h>
#include <linux/refcount.h>
#include <asm/ccwdev.h>
#define LCS_DBF_TEXT(level, name, text) \
do { \
debug_text_event(lcs_dbf_##name, level, text); \
} while (0)
#define LCS_DBF_HEX(level,name,addr,len) \
do { \
debug_event(lcs_dbf_##name,level,(void*)(addr),len); \
} while (0)
#define LCS_DBF_TEXT_(level,name,text...) \
do { \
if (debug_level_enabled(lcs_dbf_##name, level)) { \
scnprintf(debug_buffer, sizeof(debug_buffer), text); \
debug_text_event(lcs_dbf_##name, level, debug_buffer); \
} \
} while (0)
/**
* sysfs related stuff
*/
#define CARD_FROM_DEV(cdev) \
(struct lcs_card *) dev_get_drvdata( \
&((struct ccwgroup_device *)dev_get_drvdata(&cdev->dev))->dev);
/**
* Enum for classifying detected devices.
*/
enum lcs_channel_types {
/* Device is not a channel */
lcs_channel_type_none,
/* Device is a 2216 channel */
lcs_channel_type_parallel,
/* Device is a 2216 channel */
lcs_channel_type_2216,
/* Device is a OSA2 card */
lcs_channel_type_osa2
};
/**
* CCW commands used in this driver
*/
#define LCS_CCW_WRITE 0x01
#define LCS_CCW_READ 0x02
#define LCS_CCW_TRANSFER 0x08
/**
* LCS device status primitives
*/
#define LCS_CMD_STARTLAN 0x01
#define LCS_CMD_STOPLAN 0x02
#define LCS_CMD_LANSTAT 0x04
#define LCS_CMD_STARTUP 0x07
#define LCS_CMD_SHUTDOWN 0x08
#define LCS_CMD_QIPASSIST 0xb2
#define LCS_CMD_SETIPM 0xb4
#define LCS_CMD_DELIPM 0xb5
#define LCS_INITIATOR_TCPIP 0x00
#define LCS_INITIATOR_LGW 0x01
#define LCS_STD_CMD_SIZE 16
#define LCS_MULTICAST_CMD_SIZE 404
/**
* LCS IPASSIST MASKS,only used when multicast is switched on
*/
/* Not supported by LCS */
#define LCS_IPASS_ARP_PROCESSING 0x0001
#define LCS_IPASS_IN_CHECKSUM_SUPPORT 0x0002
#define LCS_IPASS_OUT_CHECKSUM_SUPPORT 0x0004
#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008
#define LCS_IPASS_IP_FILTERING 0x0010
/* Supported by lcs 3172 */
#define LCS_IPASS_IPV6_SUPPORT 0x0020
#define LCS_IPASS_MULTICAST_SUPPORT 0x0040
/**
* LCS sense byte definitions
*/
#define LCS_SENSE_BYTE_0 0
#define LCS_SENSE_BYTE_1 1
#define LCS_SENSE_BYTE_2 2
#define LCS_SENSE_BYTE_3 3
#define LCS_SENSE_INTERFACE_DISCONNECT 0x01
#define LCS_SENSE_EQUIPMENT_CHECK 0x10
#define LCS_SENSE_BUS_OUT_CHECK 0x20
#define LCS_SENSE_INTERVENTION_REQUIRED 0x40
#define LCS_SENSE_CMD_REJECT 0x80
#define LCS_SENSE_RESETTING_EVENT 0x80
#define LCS_SENSE_DEVICE_ONLINE 0x20
/**
* LCS packet type definitions
*/
#define LCS_FRAME_TYPE_CONTROL 0
#define LCS_FRAME_TYPE_ENET 1
#define LCS_FRAME_TYPE_TR 2
#define LCS_FRAME_TYPE_FDDI 7
#define LCS_FRAME_TYPE_AUTO -1
/**
* some more definitions,we will sort them later
*/
#define LCS_ILLEGAL_OFFSET 0xffff
#define LCS_IOBUFFERSIZE 0x5000
#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */
#define LCS_MAC_LENGTH 6
#define LCS_INVALID_PORT_NO -1
#define LCS_LANCMD_TIMEOUT_DEFAULT 5
/**
* Multicast state
*/
#define LCS_IPM_STATE_SET_REQUIRED 0
#define LCS_IPM_STATE_DEL_REQUIRED 1
#define LCS_IPM_STATE_ON_CARD 2
/**
* LCS IP Assist declarations
* seems to be only used for multicast
*/
#define LCS_IPASS_ARP_PROCESSING 0x0001
#define LCS_IPASS_INBOUND_CSUM_SUPP 0x0002
#define LCS_IPASS_OUTBOUND_CSUM_SUPP 0x0004
#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008
#define LCS_IPASS_IP_FILTERING 0x0010
#define LCS_IPASS_IPV6_SUPPORT 0x0020
#define LCS_IPASS_MULTICAST_SUPPORT 0x0040
/**
* LCS Buffer states
*/
enum lcs_buffer_states {
LCS_BUF_STATE_EMPTY, /* buffer is empty */
LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */
LCS_BUF_STATE_READY, /* buffer is ready for read/write */
LCS_BUF_STATE_PROCESSED,
};
/**
* LCS Channel State Machine declarations
*/
enum lcs_channel_states {
LCS_CH_STATE_INIT,
LCS_CH_STATE_HALTED,
LCS_CH_STATE_STOPPED,
LCS_CH_STATE_RUNNING,
LCS_CH_STATE_SUSPENDED,
LCS_CH_STATE_CLEARED,
LCS_CH_STATE_ERROR,
};
/**
* LCS device state machine
*/
enum lcs_dev_states {
DEV_STATE_DOWN,
DEV_STATE_UP,
DEV_STATE_RECOVER,
};
enum lcs_threads {
LCS_SET_MC_THREAD = 1,
LCS_RECOVERY_THREAD = 2,
};
/**
* LCS struct declarations
*/
struct lcs_header {
__u16 offset;
__u8 type;
__u8 slot;
} __attribute__ ((packed));
struct lcs_ip_mac_pair {
__be32 ip_addr;
__u8 mac_addr[LCS_MAC_LENGTH];
__u8 reserved[2];
} __attribute__ ((packed));
struct lcs_ipm_list {
struct list_head list;
struct lcs_ip_mac_pair ipm;
__u8 ipm_state;
};
struct lcs_cmd {
__u16 offset;
__u8 type;
__u8 slot;
__u8 cmd_code;
__u8 initiator;
__u16 sequence_no;
__u16 return_code;
union {
struct {
__u8 lan_type;
__u8 portno;
__u16 parameter_count;
__u8 operator_flags[3];
__u8 reserved[3];
} lcs_std_cmd;
struct {
__u16 unused1;
__u16 buff_size;
__u8 unused2[6];
} lcs_startup;
struct {
__u8 lan_type;
__u8 portno;
__u8 unused[10];
__u8 mac_addr[LCS_MAC_LENGTH];
__u32 num_packets_deblocked;
__u32 num_packets_blocked;
__u32 num_packets_tx_on_lan;
__u32 num_tx_errors_detected;
__u32 num_tx_packets_disgarded;
__u32 num_packets_rx_from_lan;
__u32 num_rx_errors_detected;
__u32 num_rx_discarded_nobuffs_avail;
__u32 num_rx_packets_too_large;
} lcs_lanstat_cmd;
#ifdef CONFIG_IP_MULTICAST
struct {
__u8 lan_type;
__u8 portno;
__u16 num_ip_pairs;
__u16 ip_assists_supported;
__u16 ip_assists_enabled;
__u16 version;
struct {
struct lcs_ip_mac_pair
ip_mac_pair[32];
__u32 response_data;
} lcs_ipass_ctlmsg __attribute ((packed));
} lcs_qipassist __attribute__ ((packed));
#endif /*CONFIG_IP_MULTICAST */
} cmd __attribute__ ((packed));
} __attribute__ ((packed));
/**
* Forward declarations.
*/
struct lcs_card;
struct lcs_channel;
/**
* Definition of an lcs buffer.
*/
struct lcs_buffer {
enum lcs_buffer_states state;
void *data;
int count;
/* Callback for completion notification. */
void (*callback)(struct lcs_channel *, struct lcs_buffer *);
};
struct lcs_reply {
struct list_head list;
__u16 sequence_no;
refcount_t refcnt;
/* Callback for completion notification. */
void (*callback)(struct lcs_card *, struct lcs_cmd *);
wait_queue_head_t wait_q;
struct lcs_card *card;
struct timer_list timer;
int received;
int rc;
};
/**
* Definition of an lcs channel
*/
struct lcs_channel {
enum lcs_channel_states state;
struct ccw_device *ccwdev;
struct ccw1 ccws[LCS_NUM_BUFFS + 1];
wait_queue_head_t wait_q;
struct tasklet_struct irq_tasklet;
struct lcs_buffer iob[LCS_NUM_BUFFS];
int io_idx;
int buf_idx;
};
/**
* definition of the lcs card
*/
struct lcs_card {
spinlock_t lock;
spinlock_t ipm_lock;
enum lcs_dev_states state;
struct net_device *dev;
struct net_device_stats stats;
__be16 (*lan_type_trans)(struct sk_buff *skb,
struct net_device *dev);
struct ccwgroup_device *gdev;
struct lcs_channel read;
struct lcs_channel write;
struct lcs_buffer *tx_buffer;
int tx_emitted;
struct list_head lancmd_waiters;
int lancmd_timeout;
struct work_struct kernel_thread_starter;
spinlock_t mask_lock;
unsigned long thread_start_mask;
unsigned long thread_running_mask;
unsigned long thread_allowed_mask;
wait_queue_head_t wait_q;
#ifdef CONFIG_IP_MULTICAST
struct list_head ipm_list;
#endif
__u8 mac[LCS_MAC_LENGTH];
__u16 ip_assists_supported;
__u16 ip_assists_enabled;
__s8 lan_type;
__u32 pkt_seq;
__u16 sequence_no;
__s16 portno;
/* Some info copied from probeinfo */
u8 device_forced;
u8 max_port_no;
u8 hint_port_no;
s16 port_protocol_no;
} __attribute__ ((aligned(8)));

View file

@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
bool *vlan_offload_disabled);
void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
struct _rule_hw *eth_header);
int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);

View file

@ -54,7 +54,6 @@
#include <linux/mlx5/doorbell.h>
#include <linux/mlx5/eq.h>
#include <linux/timecounter.h>
#include <linux/ptp_clock_kernel.h>
#include <net/devlink.h>
#define MLX5_ADEV_NAME "mlx5_core"
@ -679,33 +678,8 @@ struct mlx5_rsvd_gids {
struct ida ida;
};
#define MAX_PIN_NUM 8
struct mlx5_pps {
u8 pin_caps[MAX_PIN_NUM];
struct work_struct out_work;
u64 start[MAX_PIN_NUM];
u8 enabled;
u64 min_npps_period;
u64 min_out_pulse_duration_ns;
};
struct mlx5_timer {
struct cyclecounter cycles;
struct timecounter tc;
u32 nominal_c_mult;
unsigned long overflow_period;
};
struct mlx5_clock {
struct mlx5_nb pps_nb;
seqlock_t lock;
struct hwtstamp_config hwtstamp_config;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
struct mlx5_pps pps_info;
struct mlx5_timer timer;
};
struct mlx5_clock;
struct mlx5_clock_dev_state;
struct mlx5_dm;
struct mlx5_fw_tracer;
struct mlx5_vxlan;
@ -789,7 +763,8 @@ struct mlx5_core_dev {
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif
struct mlx5_clock clock;
struct mlx5_clock *clock;
struct mlx5_clock_dev_state *clock_state;
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
struct mlx5_rsc_dump *rsc_dump;

View file

@ -115,9 +115,12 @@ enum mlx5e_ext_link_mode {
MLX5E_100GAUI_1_100GBASE_CR_KR = 11,
MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12,
MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13,
MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14,
MLX5E_400GAUI_8_400GBASE_CR8 = 15,
MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16,
MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17,
MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19,
MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20,
MLX5E_EXT_LINK_MODES_NUMBER,
};

View file

@ -658,6 +658,7 @@ struct netdev_queue {
struct Qdisc __rcu *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
const struct attribute_group **groups;
#endif
unsigned long tx_maxrate;
/*

View file

@ -43,6 +43,7 @@ extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
extern int rtnl_lock_interruptible(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);

View file

@ -16,6 +16,7 @@ struct netdev_rx_queue {
struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj;
const struct attribute_group **groups;
struct net_device *dev;
netdevice_tracker dev_tracker;

View file

@ -24,11 +24,20 @@ struct net_iov {
unsigned long __unused_padding;
unsigned long pp_magic;
struct page_pool *pp;
struct dmabuf_genpool_chunk_owner *owner;
struct net_iov_area *owner;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
struct net_iov_area {
/* Array of net_iovs for this area. */
struct net_iov *niovs;
size_t num_niovs;
/* Offset into the dma-buf where this chunk starts. */
unsigned long base_virtual;
};
/* These fields in struct page are used by the page_pool and net stack:
*
* struct {
@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
#undef NET_IOV_ASSERT_OFFSET
static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
{
return niov->owner;
}
static inline unsigned int net_iov_idx(const struct net_iov *niov)
{
return niov - net_iov_owner(niov)->niovs;
}
/* netmem */
/**

View file

@ -0,0 +1,45 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H
#define _NET_PAGE_POOL_MEMORY_PROVIDER_H
#include <net/netmem.h>
#include <net/page_pool/types.h>
struct netdev_rx_queue;
struct sk_buff;
struct memory_provider_ops {
netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp);
bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem);
int (*init)(struct page_pool *pool);
void (*destroy)(struct page_pool *pool);
int (*nl_fill)(void *mp_priv, struct sk_buff *rsp,
struct netdev_rx_queue *rxq);
void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq);
};
bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
void net_mp_niov_clear_page_pool(struct net_iov *niov);
int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *p);
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p);
/**
* net_mp_netmem_place_in_cache() - give a netmem to a page pool
* @pool: the page pool to place the netmem into
* @netmem: netmem to give
*
* Push an accounted netmem into the page pool's allocation cache. The caller
* must ensure that there is space in the cache. It should only be called off
* the mp_ops->alloc_netmems() path.
*/
static inline void net_mp_netmem_place_in_cache(struct page_pool *pool,
netmem_ref netmem)
{
pool->alloc.cache[pool->alloc.count++] = netmem;
}
#endif

View file

@ -152,8 +152,11 @@ struct page_pool_stats {
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
struct memory_provider_ops;
struct pp_memory_provider_params {
void *mp_priv;
const struct memory_provider_ops *mp_ops;
};
struct page_pool {
@ -216,6 +219,7 @@ struct page_pool {
struct ptr_ring ring;
void *mp_priv;
const struct memory_provider_ops *mp_ops;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */

View file

@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices {
ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100,
ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101,
ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102,
ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103,
ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104,
ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105,
ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106,
ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107,
ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108,
ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109,
ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110,
ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111,
ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112,
ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113,
ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114,
ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115,
ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116,
ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117,
ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118,
ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119,
ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120,
/* must be last entry */
__ETHTOOL_LINK_MODE_MASK_NBITS

View file

@ -86,6 +86,11 @@ enum {
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
};
enum {
__NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
};
enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
@ -94,6 +99,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@ -136,6 +142,7 @@ enum {
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)

View file

@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg,
/* host join */
if (!port) {
if (mp->host_joined) {
if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
}

View file

@ -159,6 +159,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
#include <net/rps.h>
#include <linux/phy_link_topology.h>
@ -6119,16 +6120,18 @@ EXPORT_SYMBOL(netif_receive_skb_list);
static void flush_backlog(struct work_struct *work)
{
struct sk_buff *skb, *tmp;
struct sk_buff_head list;
struct softnet_data *sd;
__skb_queue_head_init(&list);
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
dev_kfree_skb_irq(skb);
__skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
@ -6136,14 +6139,16 @@ static void flush_backlog(struct work_struct *work)
local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
__skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
local_bh_enable();
__skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY);
}
static bool flush_required(int cpu)
@ -7071,6 +7076,9 @@ void __netif_napi_del_locked(struct napi_struct *napi)
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
/* Make sure NAPI is disabled (or was never enabled). */
WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
if (napi->config) {
napi->index = -1;
napi->config = NULL;
@ -11738,6 +11746,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
}
EXPORT_SYMBOL(unregister_netdevice_queue);
static void dev_memory_provider_uninstall(struct net_device *dev)
{
unsigned int i;
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct netdev_rx_queue *rxq = &dev->_rx[i];
struct pp_memory_provider_params *p = &rxq->mp_params;
if (p->mp_ops && p->mp_ops->uninstall)
p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
}
}
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh)
{
@ -11792,7 +11813,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
dev_dmabuf_uninstall(dev);
dev_memory_provider_uninstall(dev);
netdev_offload_xstats_disable_all(dev);

View file

@ -16,6 +16,7 @@
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
#include <trace/events/page_pool.h>
#include "devmem.h"
@ -27,20 +28,28 @@
/* Protected by rtnl_lock() */
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
static const struct memory_provider_ops dmabuf_devmem_ops;
bool net_is_devmem_iov(struct net_iov *niov)
{
return niov->pp->mp_ops == &dmabuf_devmem_ops;
}
static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
struct gen_pool_chunk *chunk,
void *not_used)
{
struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
kvfree(owner->niovs);
kvfree(owner->area.niovs);
kfree(owner);
}
static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
{
struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
struct dmabuf_genpool_chunk_owner *owner;
owner = net_devmem_iov_to_chunk_owner(niov);
return owner->base_dma_addr +
((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
}
@ -83,7 +92,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
offset = dma_addr - owner->base_dma_addr;
index = offset / PAGE_SIZE;
niov = &owner->niovs[index];
niov = &owner->area.niovs[index];
niov->pp_magic = 0;
niov->pp = NULL;
@ -94,7 +103,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
void net_devmem_free_dmabuf(struct net_iov *niov)
{
struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov);
unsigned long dma_addr = net_devmem_get_dma_addr(niov);
if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
@ -117,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
WARN_ON(rxq->mp_params.mp_priv != binding);
rxq->mp_params.mp_priv = NULL;
rxq->mp_params.mp_ops = NULL;
rxq_idx = get_netdev_rx_queue_index(rxq);
@ -152,7 +162,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
}
rxq = __netif_get_rx_queue(dev, rxq_idx);
if (rxq->mp_params.mp_priv) {
if (rxq->mp_params.mp_ops) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
return -EEXIST;
}
@ -170,6 +180,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return err;
rxq->mp_params.mp_priv = binding;
rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
err = netdev_rx_queue_restart(dev, rxq_idx);
if (err)
@ -179,6 +190,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
err_xa_erase:
rxq->mp_params.mp_priv = NULL;
rxq->mp_params.mp_ops = NULL;
xa_erase(&binding->bound_rxqs, xa_idx);
return err;
@ -261,9 +273,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
owner->base_virtual = virtual;
owner->area.base_virtual = virtual;
owner->base_dma_addr = dma_addr;
owner->num_niovs = len / PAGE_SIZE;
owner->area.num_niovs = len / PAGE_SIZE;
owner->binding = binding;
err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
@ -275,17 +287,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
owner->niovs = kvmalloc_array(owner->num_niovs,
sizeof(*owner->niovs),
GFP_KERNEL);
if (!owner->niovs) {
owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
sizeof(*owner->area.niovs),
GFP_KERNEL);
if (!owner->area.niovs) {
err = -ENOMEM;
goto err_free_chunks;
}
for (i = 0; i < owner->num_niovs; i++) {
niov = &owner->niovs[i];
niov->owner = owner;
for (i = 0; i < owner->area.num_niovs; i++) {
niov = &owner->area.niovs[i];
niov->owner = &owner->area;
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
net_devmem_get_dma_addr(niov));
}
@ -313,26 +325,6 @@ err_put_dmabuf:
return ERR_PTR(err);
}
void dev_dmabuf_uninstall(struct net_device *dev)
{
struct net_devmem_dmabuf_binding *binding;
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int i;
for (i = 0; i < dev->real_num_rx_queues; i++) {
binding = dev->_rx[i].mp_params.mp_priv;
if (!binding)
continue;
xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
if (rxq == &dev->_rx[i]) {
xa_erase(&binding->bound_rxqs, xa_idx);
break;
}
}
}
/*** "Dmabuf devmem memory provider" ***/
int mp_dmabuf_devmem_init(struct page_pool *pool)
@ -398,3 +390,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
/* We don't want the page pool put_page()ing our net_iovs. */
return false;
}
static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp,
struct netdev_rx_queue *rxq)
{
const struct net_devmem_dmabuf_binding *binding = mp_priv;
int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF;
return nla_put_u32(rsp, type, binding->id);
}
static void mp_dmabuf_devmem_uninstall(void *mp_priv,
struct netdev_rx_queue *rxq)
{
struct net_devmem_dmabuf_binding *binding = mp_priv;
struct netdev_rx_queue *bound_rxq;
unsigned long xa_idx;
xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
if (bound_rxq == rxq) {
xa_erase(&binding->bound_rxqs, xa_idx);
break;
}
}
}
static const struct memory_provider_ops dmabuf_devmem_ops = {
.init = mp_dmabuf_devmem_init,
.destroy = mp_dmabuf_devmem_destroy,
.alloc_netmems = mp_dmabuf_devmem_alloc_netmems,
.release_netmem = mp_dmabuf_devmem_release_page,
.nl_fill = mp_dmabuf_devmem_nl_fill,
.uninstall = mp_dmabuf_devmem_uninstall,
};

View file

@ -10,6 +10,8 @@
#ifndef _NET_DEVMEM_H
#define _NET_DEVMEM_H
#include <net/netmem.h>
struct netlink_ext_ack;
struct net_devmem_dmabuf_binding {
@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding {
* allocations from this chunk.
*/
struct dmabuf_genpool_chunk_owner {
/* Offset into the dma-buf where this chunk starts. */
unsigned long base_virtual;
struct net_iov_area area;
struct net_devmem_dmabuf_binding *binding;
/* dma_addr of the start of the chunk. */
dma_addr_t base_dma_addr;
/* Array of net_iovs for this chunk. */
struct net_iov *niovs;
size_t num_niovs;
struct net_devmem_dmabuf_binding *binding;
};
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack);
void dev_dmabuf_uninstall(struct net_device *dev);
static inline struct dmabuf_genpool_chunk_owner *
net_iov_owner(const struct net_iov *niov)
net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
{
return niov->owner;
}
struct net_iov_area *owner = net_iov_owner(niov);
static inline unsigned int net_iov_idx(const struct net_iov *niov)
{
return niov - net_iov_owner(niov)->niovs;
return container_of(owner, struct dmabuf_genpool_chunk_owner, area);
}
static inline struct net_devmem_dmabuf_binding *
net_iov_binding(const struct net_iov *niov)
net_devmem_iov_binding(const struct net_iov *niov)
{
return net_iov_owner(niov)->binding;
return net_devmem_iov_to_chunk_owner(niov)->binding;
}
static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
return net_devmem_iov_binding(niov)->id;
}
static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
{
struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
struct net_iov_area *owner = net_iov_owner(niov);
return owner->base_virtual +
((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
}
static inline u32 net_iov_binding_id(const struct net_iov *niov)
{
return net_iov_owner(niov)->binding->id;
}
static inline void
net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
{
@ -123,6 +115,8 @@ struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
void net_devmem_free_dmabuf(struct net_iov *ppiov);
bool net_is_devmem_iov(struct net_iov *niov);
#else
struct net_devmem_dmabuf_binding;
@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return -EOPNOTSUPP;
}
static inline void dev_dmabuf_uninstall(struct net_device *dev)
{
}
static inline struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
{
@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
return 0;
}
static inline u32 net_iov_binding_id(const struct net_iov *niov)
static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
return 0;
}
static inline bool net_is_devmem_iov(struct net_iov *niov)
{
return false;
}
#endif
#endif /* _NET_DEVMEM_H */

View file

@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
return -ENOENT;
}
static void neigh_parms_destroy(struct neigh_parms *parms);
static inline void neigh_parms_put(struct neigh_parms *parms)
{
if (refcount_dec_and_test(&parms->refcnt))
neigh_parms_destroy(parms);
kfree(parms);
}
/*
@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
EXPORT_SYMBOL(neigh_parms_release);
static void neigh_parms_destroy(struct neigh_parms *parms)
{
kfree(parms);
}
static struct lock_class_key neigh_table_proxy_queue_class;
static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;

View file

@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev)
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
* when unregistering a net device and accessing associated sysfs files. The
* potential deadlock is as follow:
*
* CPU 0 CPU 1
*
* rtnl_lock vfs_read
* unregister_netdevice_many kernfs_seq_start
* device_del / kobject_put kernfs_get_active (kn->active++)
* kernfs_drain sysfs_kf_seq_show
* wait_event( rtnl_lock
* kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
* -> waits on CPU 1 to decrease kn->active the rtnl lock.
*
* The historical fix was to use rtnl_trylock with restart_syscall to bail out
* of sysfs operations when the lock couldn't be taken. This fixed the above
* issue as it allowed CPU 1 to bail out of the ABBA situation.
*
* But it came with performances issues, as syscalls are being restarted in
* loops when there was contention on the rtnl lock, with huge slow downs in
* specific scenarios (e.g. lots of virtual interfaces created and userspace
* daemons querying their attributes).
*
* The idea below is to bail out of the active kernfs_node protection
* (kn->active) while trying to take the rtnl lock.
*
* This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
* net device is guaranteed to be alive if this returns successfully.
*/
static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
struct net_device *ndev)
{
struct kernfs_node *kn;
int ret = 0;
/* First, we hold a reference to the net device as the unregistration
* path might run in parallel. This will ensure the net device and the
* associated sysfs objects won't be freed while we try to take the rtnl
* lock.
*/
dev_hold(ndev);
/* sysfs_break_active_protection was introduced to allow self-removal of
* devices and their associated sysfs files by bailing out of the
* sysfs/kernfs protection. We do this here to allow the unregistration
* path to complete in parallel. The following takes a reference on the
* kobject and the kernfs_node being accessed.
*
* This works because we hold a reference onto the net device and the
* unregistration path will wait for us eventually in netdev_run_todo
* (outside an rtnl lock section).
*/
kn = sysfs_break_active_protection(kobj, attr);
/* We can now try to take the rtnl lock. This can't deadlock us as the
* unregistration path is able to drain sysfs files (kernfs_node) thanks
* to the above dance.
*/
if (rtnl_lock_interruptible()) {
ret = -ERESTARTSYS;
goto unbreak;
}
/* Check dismantle on the device hasn't started, otherwise deny the
* operation.
*/
if (!dev_isalive(ndev)) {
rtnl_unlock();
ret = -ENODEV;
goto unbreak;
}
/* We are now sure the device dismantle hasn't started nor that it can
* start before we exit the locking section as we hold the rtnl lock.
* There's no need to keep unbreaking the sysfs protection nor to hold
* a net device reference from that point; that was only needed to take
* the rtnl lock.
*/
unbreak:
sysfs_unbreak_active_protection(kn);
dev_put(ndev);
return ret;
}
/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
struct device_attribute *attr, char *buf,
@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (ret)
goto err;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
goto err;
ret = (*set)(netdev, new);
if (ret == 0)
ret = len;
if (dev_isalive(netdev)) {
ret = (*set)(netdev, new);
if (ret == 0)
ret = len;
}
rtnl_unlock();
err:
return ret;
@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
/* The check is also done in change_carrier; this helps returning early
* without hitting the trylock/restart in netdev_store.
* without hitting the locking section in netdev_store.
*/
if (!netdev->netdev_ops->ndo_change_carrier)
return -EOPNOTSUPP;
@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (netif_running(netdev)) {
/* Synchronize carrier state with link watch,
@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev,
ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
}
rtnl_unlock();
rtnl_unlock();
return ret;
}
static DEVICE_ATTR_RW(carrier);
@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
* returning early without hitting the trylock/restart below.
* returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
* returning early without hitting the trylock/restart below.
* returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (dev_isalive(netdev)) {
ret = dev_set_alias(netdev, buf, count);
if (ret < 0)
goto err;
ret = len;
netdev_state_change(netdev);
}
ret = dev_set_alias(netdev, buf, count);
if (ret < 0)
goto err;
ret = len;
netdev_state_change(netdev);
err:
rtnl_unlock();
@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
struct netdev_phys_item_id ppid;
ssize_t ret = -EINVAL;
/* The check is also done in dev_get_phys_port_id; this helps returning
* early without hitting the trylock/restart below.
* early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_id)
return -EOPNOTSUPP;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (dev_isalive(netdev)) {
struct netdev_phys_item_id ppid;
ret = dev_get_phys_port_id(netdev, &ppid);
if (!ret)
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
ret = dev_get_phys_port_id(netdev, &ppid);
if (!ret)
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
}
rtnl_unlock();
return ret;
@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
char name[IFNAMSIZ];
/* The checks are also done in dev_get_phys_port_name; this helps
* returning early without hitting the trylock/restart below.
* returning early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_name &&
!netdev->devlink_port)
return -EOPNOTSUPP;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (dev_isalive(netdev)) {
char name[IFNAMSIZ];
ret = dev_get_phys_port_name(netdev, name, sizeof(name));
if (!ret)
ret = sysfs_emit(buf, "%s\n", name);
ret = dev_get_phys_port_name(netdev, name, sizeof(name));
if (!ret)
ret = sysfs_emit(buf, "%s\n", name);
}
rtnl_unlock();
return ret;
@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
struct netdev_phys_item_id ppid = { };
ssize_t ret = -EINVAL;
/* The checks are also done in dev_get_phys_port_name; this helps
* returning early without hitting the trylock/restart below. This works
* returning early without hitting the locking section below. This works
* because recurse is false when calling dev_get_port_parent_id.
*/
if (!netdev->netdev_ops->ndo_get_port_parent_id &&
!netdev->devlink_port)
return -EOPNOTSUPP;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
if (dev_isalive(netdev)) {
struct netdev_phys_item_id ppid = { };
ret = dev_get_port_parent_id(netdev, &ppid, false);
if (!ret)
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
ret = dev_get_port_parent_id(netdev, &ppid, false);
if (!ret)
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
}
rtnl_unlock();
return ret;
@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type rx_queue_ktype = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
.default_groups = rx_queue_default_groups,
.namespace = rx_queue_namespace,
.get_ownership = rx_queue_get_ownership,
};
@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
/* Rx queues are cleared in rx_queue_release to allow later
* re-registration. This is triggered when their kobj refcount is
* dropped.
*
* If a queue is removed while both a read (or write) operation and a
* the re-addition of the same queue are pending (waiting on rntl_lock)
* it might happen that the re-addition will execute before the read,
* making the initial removal to never happen (queue's kobj refcount
* won't drop enough because of the pending read). In such rare case,
* return to allow the removal operation to complete.
*/
if (unlikely(kobj->state_initialized)) {
netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
return -EAGAIN;
}
/* Kobject_put later will trigger rx_queue_release call which
* decreases dev refcount: Take that reference here
*/
@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
queue->groups = rx_queue_default_groups;
error = sysfs_create_groups(kobj, queue->groups);
if (error)
goto err;
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
goto err;
goto err_default_groups;
}
error = rx_queue_default_mask(dev, queue);
if (error)
goto err;
goto err_default_groups;
kobject_uevent(kobj, KOBJ_ADD);
return error;
err_default_groups:
sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
}
while (--i >= new_num) {
struct kobject *kobj = &dev->_rx[i].kobj;
struct netdev_rx_queue *queue = &dev->_rx[i];
struct kobject *kobj = &queue->kobj;
if (!refcount_read(&dev_net(dev)->ns.count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
sysfs_remove_groups(kobj, queue->groups);
kobject_put(kobj);
}
@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num,
*/
struct netdev_queue_attribute {
struct attribute attr;
ssize_t (*show)(struct netdev_queue *queue, char *buf);
ssize_t (*store)(struct netdev_queue *queue,
const char *buf, size_t len);
ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf);
ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len);
};
#define to_netdev_queue_attr(_attr) \
container_of(_attr, struct netdev_queue_attribute, attr)
@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj,
if (!attribute->show)
return -EIO;
return attribute->show(queue, buf);
return attribute->show(kobj, attr, queue, buf);
}
static ssize_t netdev_queue_attr_store(struct kobject *kobj,
@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
if (!attribute->store)
return -EIO;
return attribute->store(queue, buf, count);
return attribute->store(kobj, attr, queue, buf, count);
}
static const struct sysfs_ops netdev_queue_sysfs_ops = {
@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i;
}
static ssize_t traffic_class_show(struct netdev_queue *queue,
char *buf)
static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
int num_tc, tc;
int index;
int num_tc, tc, index, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
if (ret)
return ret;
index = get_netdev_queue_index(queue);
@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
static ssize_t tx_maxrate_show(struct netdev_queue *queue,
char *buf)
static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
}
static ssize_t tx_maxrate_store(struct netdev_queue *queue,
const char *buf, size_t len)
static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
int err, index = get_netdev_queue_index(queue);
struct net_device *dev = queue->dev;
u32 rate = 0;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* The check is also done later; this helps returning early without
* hitting the trylock/restart below.
* hitting the locking section below.
*/
if (!dev->netdev_ops->ndo_set_tx_maxrate)
return -EOPNOTSUPP;
@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (err < 0)
return err;
if (!rtnl_trylock())
return restart_syscall();
err = sysfs_rtnl_lock(kobj, attr, dev);
if (err)
return err;
err = -EOPNOTSUPP;
if (dev->netdev_ops->ndo_set_tx_maxrate)
err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
rtnl_unlock();
if (!err) {
queue->tx_maxrate = rate;
rtnl_unlock();
return len;
}
rtnl_unlock();
return err;
}
@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count,
return count;
}
static ssize_t bql_show_hold_time(struct netdev_queue *queue,
char *buf)
static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
}
static ssize_t bql_set_hold_time(struct netdev_queue *queue,
const char *buf, size_t len)
static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
}
static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
const char *buf, size_t len)
static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
__ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
}
static ssize_t bql_set_stall_max(struct netdev_queue *queue,
const char *buf, size_t len)
static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len)
{
WRITE_ONCE(queue->dql.stall_max, 0);
return len;
@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
__ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
__ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
static ssize_t bql_show_inflight(struct netdev_queue *queue,
char *buf)
static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
__ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
char *buf) \
static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
struct attribute *attr, \
struct netdev_queue *queue, char *buf) \
{ \
return bql_show(buf, queue->dql.FIELD); \
} \
\
static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
struct attribute *attr, \
struct netdev_queue *queue, \
const char *buf, size_t len) \
{ \
return bql_set(buf, len, &queue->dql.FIELD); \
@ -1613,19 +1733,21 @@ out_no_maps:
return len < PAGE_SIZE ? len : -EINVAL;
}
static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
int len, tc;
int len, tc, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
index = get_netdev_queue_index(queue);
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
if (ret)
return ret;
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
return -EINVAL;
}
/* Make sure the subordinate device can't be freed */
get_device(&dev->dev);
/* Increase the net device refcnt to make sure it won't be freed while
* xps_queue_show is running.
*/
dev_hold(dev);
rtnl_unlock();
len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
put_device(&dev->dev);
dev_put(dev);
return len;
}
static ssize_t xps_cpus_store(struct netdev_queue *queue,
const char *buf, size_t len)
static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
unsigned int index;
@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
if (!rtnl_trylock()) {
err = sysfs_rtnl_lock(kobj, attr, dev);
if (err) {
free_cpumask_var(mask);
return restart_syscall();
return err;
}
err = netif_set_xps_queue(dev, mask, index);
@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
= __ATTR_RW(xps_cpus);
static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
int tc;
int tc, ret;
index = get_netdev_queue_index(queue);
if (!rtnl_trylock())
return restart_syscall();
ret = sysfs_rtnl_lock(kobj, attr, dev);
if (ret)
return ret;
tc = netdev_txq_to_tc(dev, index);
rtnl_unlock();
if (tc < 0)
return -EINVAL;
return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
/* Increase the net device refcnt to make sure it won't be freed while
* xps_queue_show is running.
*/
dev_hold(dev);
rtnl_unlock();
ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
dev_put(dev);
return ret;
}
static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
if (!rtnl_trylock()) {
err = sysfs_rtnl_lock(kobj, attr, dev);
if (err) {
bitmap_free(mask);
return restart_syscall();
return err;
}
cpus_read_lock();
@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
.default_groups = netdev_queue_default_groups,
.namespace = netdev_queue_namespace,
.get_ownership = netdev_queue_get_ownership,
};
@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
/* Tx queues are cleared in netdev_queue_release to allow later
* re-registration. This is triggered when their kobj refcount is
* dropped.
*
* If a queue is removed while both a read (or write) operation and a
* the re-addition of the same queue are pending (waiting on rntl_lock)
* it might happen that the re-addition will execute before the read,
* making the initial removal to never happen (queue's kobj refcount
* won't drop enough because of the pending read). In such rare case,
* return to allow the removal operation to complete.
*/
if (unlikely(kobj->state_initialized)) {
netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
return -EAGAIN;
}
/* Kobject_put later will trigger netdev_queue_release call
* which decreases dev refcount: Take that reference here
*/
@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
queue->groups = netdev_queue_default_groups;
error = sysfs_create_groups(kobj, queue->groups);
if (error)
goto err;
if (netdev_uses_bql(dev)) {
error = sysfs_create_group(kobj, &dql_group);
if (error)
goto err;
goto err_default_groups;
}
kobject_uevent(kobj, KOBJ_ADD);
return 0;
err_default_groups:
sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
if (netdev_uses_bql(dev))
sysfs_remove_group(&queue->kobj, &dql_group);
sysfs_remove_groups(&queue->kobj, queue->groups);
kobject_put(&queue->kobj);
}

View file

@ -10,6 +10,7 @@
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
#include <net/page_pool/memory_provider.h>
#include "dev.h"
#include "devmem.h"
@ -368,7 +369,7 @@ static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
struct net_devmem_dmabuf_binding *binding;
struct pp_memory_provider_params *params;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@ -385,15 +386,15 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
rxq = __netif_get_rx_queue(netdev, q_idx);
if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
rxq->napi->napi_id))
goto nla_put_failure;
binding = rxq->mp_params.mp_priv;
if (binding &&
nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
params = &rxq->mp_params;
if (params->mp_ops &&
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);

View file

@ -3,6 +3,7 @@
#include <linux/netdevice.h>
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/memory_provider.h>
#include "page_pool_priv.h"
@ -80,3 +81,71 @@ err_free_new_mem:
return err;
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *p)
{
struct netdev_rx_queue *rxq;
int ret;
if (ifq_idx >= dev->real_num_rx_queues)
return -EINVAL;
ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);
rxq = __netif_get_rx_queue(dev, ifq_idx);
if (rxq->mp_params.mp_ops)
return -EEXIST;
rxq->mp_params = *p;
ret = netdev_rx_queue_restart(dev, ifq_idx);
if (ret) {
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
}
return ret;
}
int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *p)
{
int ret;
rtnl_lock();
ret = __net_mp_open_rxq(dev, ifq_idx, p);
rtnl_unlock();
return ret;
}
static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p)
{
struct netdev_rx_queue *rxq;
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
return;
rxq = __netif_get_rx_queue(dev, ifq_idx);
/* Callers holding a netdev ref may get here after we already
* went thru shutdown via dev_memory_provider_uninstall().
*/
if (dev->reg_state > NETREG_REGISTERED &&
!rxq->mp_params.mp_ops)
return;
if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
rxq->mp_params.mp_priv != old_p->mp_priv))
return;
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
}
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p)
{
rtnl_lock();
__net_mp_close_rxq(dev, ifq_idx, old_p);
rtnl_unlock();
}

View file

@ -13,6 +13,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@ -285,13 +286,19 @@ static int page_pool_init(struct page_pool *pool,
rxq = __netif_get_rx_queue(pool->slow.netdev,
pool->slow.queue_idx);
pool->mp_priv = rxq->mp_params.mp_priv;
pool->mp_ops = rxq->mp_params.mp_ops;
}
if (pool->mp_priv) {
if (pool->mp_ops) {
if (!pool->dma_map || !pool->dma_sync)
return -EOPNOTSUPP;
err = mp_dmabuf_devmem_init(pool);
if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) {
err = -EFAULT;
goto free_ptr_ring;
}
err = pool->mp_ops->init(pool);
if (err) {
pr_warn("%s() mem-provider init failed %d\n", __func__,
err);
@ -587,8 +594,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
netmem = pool->mp_ops->alloc_netmems(pool, gfp);
else
netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
@ -679,8 +686,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
bool put;
put = true;
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
put = mp_dmabuf_devmem_release_page(pool, netmem);
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
put = pool->mp_ops->release_netmem(pool, netmem);
else
__page_pool_release_page_dma(pool, netmem);
@ -1048,8 +1055,8 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
if (pool->mp_priv) {
mp_dmabuf_devmem_destroy(pool);
if (pool->mp_ops) {
pool->mp_ops->destroy(pool);
static_branch_dec(&page_pool_mem_providers);
}
@ -1190,3 +1197,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr)
{
return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr);
}
/* Associate a niov with a page pool. Should follow with a matching
* net_mp_niov_clear_page_pool()
*/
void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov)
{
netmem_ref netmem = net_iov_to_netmem(niov);
page_pool_set_pp_info(pool, netmem);
pool->pages_state_hold_cnt++;
trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
}
/* Disassociate a niov from a page pool. Should only be used in the
* ->release_netmem() path.
*/
void net_mp_niov_clear_page_pool(struct net_iov *niov)
{
netmem_ref netmem = net_iov_to_netmem(niov);
page_pool_clear_pp_info(netmem);
}

View file

@ -8,9 +8,9 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/types.h>
#include <net/page_pool/memory_provider.h>
#include <net/sock.h>
#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@ -216,7 +216,6 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
unsigned int napi_id;
void *hdr;
@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
goto err_cancel;
genlmsg_end(rsp, hdr);
@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool)
int page_pool_check_memory_provider(struct net_device *dev,
struct netdev_rx_queue *rxq)
{
struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
void *binding = rxq->mp_params.mp_priv;
struct page_pool *pool;
struct hlist_node *n;

View file

@ -80,6 +80,11 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
int rtnl_lock_interruptible(void)
{
return mutex_lock_interruptible(&rtnl_mutex);
}
int rtnl_lock_killable(void)
{
return mutex_lock_killable(&rtnl_mutex);

View file

@ -213,6 +213,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(10, T1S, Half),
__DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
__DEFINE_LINK_MODE_NAME(10, T1BRR, Full),
__DEFINE_LINK_MODE_NAME(200000, CR, Full),
__DEFINE_LINK_MODE_NAME(200000, KR, Full),
__DEFINE_LINK_MODE_NAME(200000, DR, Full),
__DEFINE_LINK_MODE_NAME(200000, DR_2, Full),
__DEFINE_LINK_MODE_NAME(200000, SR, Full),
__DEFINE_LINK_MODE_NAME(200000, VR, Full),
__DEFINE_LINK_MODE_NAME(400000, CR2, Full),
__DEFINE_LINK_MODE_NAME(400000, KR2, Full),
__DEFINE_LINK_MODE_NAME(400000, DR2, Full),
__DEFINE_LINK_MODE_NAME(400000, DR2_2, Full),
__DEFINE_LINK_MODE_NAME(400000, SR2, Full),
__DEFINE_LINK_MODE_NAME(400000, VR2, Full),
__DEFINE_LINK_MODE_NAME(800000, CR4, Full),
__DEFINE_LINK_MODE_NAME(800000, KR4, Full),
__DEFINE_LINK_MODE_NAME(800000, DR4, Full),
__DEFINE_LINK_MODE_NAME(800000, DR4_2, Full),
__DEFINE_LINK_MODE_NAME(800000, SR4, Full),
__DEFINE_LINK_MODE_NAME(800000, VR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@ -221,8 +239,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_CR4 4
#define __LINK_MODE_LANES_CR8 8
#define __LINK_MODE_LANES_DR 1
#define __LINK_MODE_LANES_DR_2 1
#define __LINK_MODE_LANES_DR2 2
#define __LINK_MODE_LANES_DR2_2 2
#define __LINK_MODE_LANES_DR4 4
#define __LINK_MODE_LANES_DR4_2 4
#define __LINK_MODE_LANES_DR8 8
#define __LINK_MODE_LANES_KR 1
#define __LINK_MODE_LANES_KR2 2
@ -251,6 +272,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1L 1
#define __LINK_MODE_LANES_T1S 1
#define __LINK_MODE_LANES_T1S_P2MP 1
#define __LINK_MODE_LANES_VR 1
#define __LINK_MODE_LANES_VR2 2
#define __LINK_MODE_LANES_VR4 4
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
#define __LINK_MODE_LANES_T1BRR 1
@ -378,6 +402,24 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full),
__DEFINE_LINK_MODE_PARAMS(200000, CR, Full),
__DEFINE_LINK_MODE_PARAMS(200000, KR, Full),
__DEFINE_LINK_MODE_PARAMS(200000, DR, Full),
__DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full),
__DEFINE_LINK_MODE_PARAMS(200000, SR, Full),
__DEFINE_LINK_MODE_PARAMS(200000, VR, Full),
__DEFINE_LINK_MODE_PARAMS(400000, CR2, Full),
__DEFINE_LINK_MODE_PARAMS(400000, KR2, Full),
__DEFINE_LINK_MODE_PARAMS(400000, DR2, Full),
__DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full),
__DEFINE_LINK_MODE_PARAMS(400000, SR2, Full),
__DEFINE_LINK_MODE_PARAMS(400000, VR2, Full),
__DEFINE_LINK_MODE_PARAMS(800000, CR4, Full),
__DEFINE_LINK_MODE_PARAMS(800000, KR4, Full),
__DEFINE_LINK_MODE_PARAMS(800000, DR4, Full),
__DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full),
__DEFINE_LINK_MODE_PARAMS(800000, SR4, Full),
__DEFINE_LINK_MODE_PARAMS(800000, VR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);

View file

@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
if (tpi->proto == htons(ETH_P_IPV6) &&
!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
type, data_len))
return 0;
if (tpi->proto == htons(ETH_P_IPV6)) {
unsigned int data_len = 0;
if (type == ICMP_TIME_EXCEEDED)
data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
type, data_len))
return 0;
}
#endif
if (t->parms.iph.daddr == 0 ||

View file

@ -2476,6 +2476,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
}
niov = skb_frag_net_iov(frag);
if (!net_is_devmem_iov(niov)) {
err = -ENODEV;
goto out;
}
end = start + skb_frag_size(frag);
copy = end - offset;
@ -2494,7 +2499,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
/* Will perform the exchange later */
dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov);
offset += copy;
remaining_len -= copy;

View file

@ -86,6 +86,11 @@ enum {
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
};
enum {
__NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
};
enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
@ -94,6 +99,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@ -136,6 +142,7 @@ enum {
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)

View file

@ -17,9 +17,11 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)

View file

@ -100,7 +100,7 @@ class Type(SpecAttr):
if isinstance(value, int):
return value
if value in self.family.consts:
raise Exception("Resolving family constants not implemented, yet")
return self.family.consts[value]["value"]
return limit_to_number(value)
def get_limit_str(self, limit, default=None, suffix=''):
@ -110,6 +110,9 @@ class Type(SpecAttr):
if isinstance(value, int):
return str(value) + suffix
if value in self.family.consts:
const = self.family.consts[value]
if const.get('header'):
return c_upper(value)
return c_upper(f"{self.family['name']}-{value}")
return c_upper(value)
@ -2549,6 +2552,9 @@ def render_uapi(family, cw):
defines = []
for const in family['definitions']:
if const.get('header'):
continue
if const['type'] != 'const':
cw.writes_defines(defines)
defines = []

View file

@ -7,6 +7,7 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \
TEST_PROGS := \
netcons_basic.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
ping.py \
queues.py \

View file

@ -110,6 +110,13 @@ function create_dynamic_target() {
echo 1 > "${NETCONS_PATH}"/enabled
}
# Do not append the release to the header of the message
function disable_release_append() {
echo 0 > "${NETCONS_PATH}"/enabled
echo 0 > "${NETCONS_PATH}"/release
echo 1 > "${NETCONS_PATH}"/enabled
}
function cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"

View file

@ -0,0 +1,122 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0
# Test netconsole's message fragmentation functionality.
#
# When a message exceeds the maximum packet size, netconsole splits it into
# multiple fragments for transmission. This test verifies:
# - Correct fragmentation of large messages
# - Proper reassembly of fragments at the receiver
# - Preservation of userdata across fragments
# - Behavior with and without kernel release version appending
#
# Author: Breno Leitao <leitao@debian.org>
set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
# The content of kmsg will be save to the following file
OUTPUT_FILE="/tmp/${TARGET}"
# set userdata to a long value. In this case, it is "1-2-3-4...50-"
USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
# Convert the header string in a regexp, so, we can remove
# the second header as well.
# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
# release is appended, you might find something like:L
# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
function header_to_regex() {
# header is everything before ;
local HEADER="${1}"
REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
echo "${REGEX}=[0-9]*\/[0-9]*;"
}
# We have two headers in the message. Remove both to get the full message,
# and extract the full message.
function extract_msg() {
local MSGFILE="${1}"
# Extract the header, which is the very first thing that arrives in the
# first list.
HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
HEADER_REGEX=$(header_to_regex "${HEADER}")
# Remove the two headers from the received message
# This will return the message without any header, similarly to what
# was sent.
sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
}
# Validate the message, which has two messages glued together.
# unwrap them to make sure all the characters were transmitted.
# File will look like the following:
# 13,468,514729715,-,ncfrag=0/1135;<message>
# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
function validate_fragmented_result() {
# Discard the netconsole headers, and assemble the full message
RCVMSG=$(extract_msg "${1}")
# check for the main message
if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
echo "Message body doesn't match." >&2
echo "msg received=" "${RCVMSG}" >&2
exit "${ksft_fail}"
fi
# check userdata
if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
echo "message userdata doesn't match" >&2
echo "msg received=" "${RCVMSG}" >&2
exit "${ksft_fail}"
fi
# test passed. hooray
}
# Check for basic system dependency and exit if not found
check_for_dependencies
# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
echo "6 5" > /proc/sys/kernel/printk
# Remove the namespace, interfaces and netconsole target on exit
trap cleanup EXIT
# Create one namespace and two interfaces
set_network
# Create a dynamic target for netconsole
create_dynamic_target
# Set userdata "key" with the "value" value
set_user_data
# TEST 1: Send message and userdata. They will fragment
# =======
MSG=$(printf -- 'MSG%.3s=' {1..150})
# Listen for netconsole port inside the namespace and destination interface
listen_port_and_save_to "${OUTPUT_FILE}" &
# Wait for socat to start and listen to the port.
wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
# Send the message
echo "${MSG}: ${TARGET}" > /dev/kmsg
# Wait until socat saves the file to disk
busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
# Check if the message was not corrupted
validate_fragmented_result "${OUTPUT_FILE}"
# TEST 2: Test with smaller message, and without release appended
# =======
MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
# Let's disable release and test again.
disable_release_append
listen_port_and_save_to "${OUTPUT_FILE}" &
wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
echo "${MSG}: ${TARGET}" > /dev/kmsg
busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
validate_fragmented_result "${OUTPUT_FILE}"
exit "${ksft_pass}"

View file

@ -149,7 +149,7 @@ cfg_test_host_common()
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
check_fail $? "Managed to replace $name host entry"
check_err $? "Failed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null

View file

@ -740,6 +740,8 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
# The entry should age out when it only forwards traffic
$MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self

View file

@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
$(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
$(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
GENS="$(YNL_GENS)" RSTS="" libynl.a
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \