Skip to content

Commit

Permalink
Merge branch 'mlx5-next'
Browse files Browse the repository at this point in the history
Saeed Mahameed says:

====================
Mellanox 100G mlx5 CQE compression

Introducing ConnectX-4 CQE (Completion Queue Entry) compression feature
for mlx5 etherent driver.

CQE Compressing reduces PCI overhead by coalescing and compressing multiple CQEs into a
single merged CQE.  Successful compressing improves message rate especially for small packet
traffic.

CQE Compressing in details:

Instead of writing full CQEs to memory, multiple almost identical CQEs are merged and compressed.
Information that is shared between the CQEs is written once, regardless of the number of
compressed CQEs.  In addition, only the unique information (small amount of bytes compared to
full CQE size) is written per CQE.

CQE Compression Block:

This block contains multiple compressed CQEs.  CQE Compression Block contains a single copy
of CQEs properties which are shared between all the compressed CQEs (called Title, see below)
and multiple mini CQEs (CQEs in compressed form).

Title:

The Title holds information which is shared between all the compressed CQEs in the CQE Compression
Block.  In each Compression Block there is only a single Title regardless of the number
of compressed CQEs.

Mini CQE:

A CQE in compressed form that holds some data needed to extract a single full CQE, for example
8 Bytes instead of 64 Bytes.
The shared information between all compressed CQEs, which belong to the same CQE Compression
Block called Title, is written once, and only the unique information in each compressed
CQE, for example 8 bytes, is written per compressed CQE, called mini CQE.

Since CQE Compression can add overhead to the software (CPU),
it will be only enabled on "weak/slow" PCI slots, where it can actually help.

Applied on top: c047c3b ('netfilter: conntrack: remove uninitialized shadow variable')
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed May 11, 2016
2 parents c1869d5 + b797a68 commit 6a47a57
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 30 deletions.
24 changes: 18 additions & 6 deletions drivers/net/ethernet/mellanox/mlx5/core/en.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,9 @@
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6

#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */
#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */
#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\
MLX5_MPWRQ_LOG_STRIDE_SIZE)
#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */
#define MLX5_MPWRQ_LOG_WQE_SZ 17
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
Expand Down Expand Up @@ -154,9 +151,13 @@ struct mlx5e_umr_wqe {
struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
u8 mpwqe_log_stride_sz;
u8 mpwqe_log_num_strides;
u8 log_rq_size;
u16 num_channels;
u8 num_tc;
bool rx_cqe_compress_admin;
bool rx_cqe_compress;
u16 rx_cq_moderation_usec;
u16 rx_cq_moderation_pkts;
u16 tx_cq_moderation_usec;
Expand Down Expand Up @@ -202,6 +203,13 @@ struct mlx5e_cq {
struct mlx5e_channel *channel;
struct mlx5e_priv *priv;

/* cqe decompression */
struct mlx5_cqe64 title;
struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
u8 mini_arr_idx;
u16 decmprs_left;
u16 decmprs_wqe_counter;

/* control */
struct mlx5_wq_ctrl wq_ctrl;
} ____cacheline_aligned_in_smp;
Expand Down Expand Up @@ -240,6 +248,8 @@ struct mlx5e_rq {
/* control */
struct mlx5_wq_ctrl wq_ctrl;
u8 wq_type;
u32 mpwqe_stride_sz;
u32 mpwqe_num_strides;
u32 rqn;
struct mlx5e_channel *channel;
struct mlx5e_priv *priv;
Expand All @@ -263,7 +273,7 @@ struct mlx5e_mpw_info {
void (*dma_pre_sync)(struct device *pdev,
struct mlx5e_mpw_info *wi,
u32 wqe_offset, u32 len);
void (*add_skb_frag)(struct device *pdev,
void (*add_skb_frag)(struct mlx5e_rq *rq,
struct sk_buff *skb,
struct mlx5e_mpw_info *wi,
u32 page_idx, u32 frag_offset, u32 len);
Expand Down Expand Up @@ -616,6 +626,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv);
void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr);
int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr);
void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val);

int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
u16 vid);
Expand All @@ -634,6 +645,7 @@ int mlx5e_close_locked(struct net_device *netdev);
void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
u32 *indirection_rqt, int len,
int num_channels);
int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);

static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz)
Expand Down
4 changes: 4 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
/* RX HW timestamp */
switch (config.rx_filter) {
case HWTSTAMP_FILTER_NONE:
/* Reset CQE compression to Admin default */
mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin);
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
Expand All @@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
/* Disable CQE compression */
mlx5e_modify_rx_cqe_compression(priv, false);
config.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
Expand Down
19 changes: 19 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,25 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap)
return 0;
}

int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
u32 max_speed = 0;
u32 proto_cap;
int err;
int i;

err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
if (err)
return err;

for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
if (proto_cap & MLX5E_PROT_MASK(i))
max_speed = max(max_speed, ptys2ethtool_table[i].speed);

*speed = max_speed;
return 0;
}

static void get_speed_duplex(struct net_device *netdev,
u32 eth_proto_oper,
struct ethtool_cmd *cmd)
Expand Down
81 changes: 77 additions & 4 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
s->rx_mpwqe_frag += rq_stats->mpwqe_frag;
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;

for (j = 0; j < priv->params.num_tc; j++) {
sq_stats = &priv->channel[i]->sq[j].stats;
Expand Down Expand Up @@ -305,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;

rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
byte_count = rq->wqe_sz;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
Expand Down Expand Up @@ -1128,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
MLX5_SET(wq, wq, log_wqe_num_of_strides,
MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
priv->params.mpwqe_log_num_strides - 9);
MLX5_SET(wq, wq, log_wqe_stride_size,
MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
priv->params.mpwqe_log_stride_sz - 6);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
Expand Down Expand Up @@ -1197,13 +1201,17 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
log_cq_size = priv->params.log_rq_size +
MLX5_MPWRQ_LOG_NUM_STRIDES;
priv->params.mpwqe_log_num_strides;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
log_cq_size = priv->params.log_rq_size;
}

MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
if (priv->params.rx_cqe_compress) {
MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
}

mlx5e_build_common_cq_param(priv, param);
}
Expand Down Expand Up @@ -2708,27 +2716,92 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, reg_umr_sq);
}

static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
{
enum pcie_link_width width;
enum pci_bus_speed speed;
int err = 0;

err = pcie_get_minimum_link(mdev->pdev, &speed, &width);
if (err)
return err;

if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
return -EINVAL;

switch (speed) {
case PCIE_SPEED_2_5GT:
*pci_bw = 2500 * width;
break;
case PCIE_SPEED_5_0GT:
*pci_bw = 5000 * width;
break;
case PCIE_SPEED_8_0GT:
*pci_bw = 8000 * width;
break;
default:
return -EINVAL;
}

return 0;
}

static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
{
return (link_speed && pci_bw &&
(pci_bw < 40000) && (pci_bw < link_speed));
}

static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev,
int num_channels)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
u32 link_speed = 0;
u32 pci_bw = 0;

priv->params.log_sq_size =
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_LINKED_LIST;

/* set CQE compression */
priv->params.rx_cqe_compress_admin = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
MLX5_CAP_GEN(mdev, vport_group_manager)) {
mlx5e_get_max_linkspeed(mdev, &link_speed);
mlx5e_get_pci_bw(mdev, &pci_bw);
mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
link_speed, pci_bw);
priv->params.rx_cqe_compress_admin =
cqe_compress_heuristic(link_speed, pci_bw);
}

priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;

switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
priv->params.mpwqe_log_stride_sz =
priv->params.rx_cqe_compress ?
MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
MLX5_MPWRQ_LOG_STRIDE_SIZE;
priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
priv->params.mpwqe_log_stride_sz;
priv->params.lro_en = true;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
}

mlx5_core_info(mdev,
"MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
BIT(priv->params.log_rq_size),
BIT(priv->params.mpwqe_log_stride_sz),
priv->params.rx_cqe_compress_admin);

priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
BIT(priv->params.log_rq_size));
priv->params.rx_cq_moderation_usec =
Expand Down
Loading

0 comments on commit 6a47a57

Please sign in to comment.