linux_dsm_epyc7002/drivers/infiniband/hw/mlx5/cong.c

422 lines
12 KiB
C
Raw Normal View History

IB/mlx5: Add debug control parameters for congestion control This patch adds debug control parameters for congestion control which can be read or written through debugfs. They are for reaction point and notification point nodes. These control parameters are as below: +------------------------------+-----------------------------------------+ | Name | Description | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate | When set target rate is updated to | | | current rate | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate_ati | When set update target rate based on | | | timer as well | |------------------------------+-----------------------------------------| |rp_time_reset | time between rate increase if no | | | CNP is received unit in usec | |------------------------------+-----------------------------------------| |rp_byte_reset | Number of bytes between rate inease if | | | no CNP is received | |------------------------------+-----------------------------------------| |rp_threshold | Threshold for reaction point rate | | | control | |------------------------------+-----------------------------------------| |rp_ai_rate | Rate for target rate, unit in Mbps | |------------------------------+-----------------------------------------| |rp_hai_rate | Rate for hyper increase state | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_min_dec_fac | Minimum factor by which the current | | | transmit rate can be changed when | | | processing a CNP, unit is percerntage | |------------------------------+-----------------------------------------| |rp_min_rate | Minimum value for rate limit, | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_rate_to_set_on_first_cnp | Rate that is set when first CNP is | | | received, unit is Mbps | |------------------------------+-----------------------------------------| |rp_dce_tcp_g | Used to calculate alpha | |------------------------------+-----------------------------------------| |rp_dce_tcp_rtt | Time between updates of alpha value, | | | unit is usec | |------------------------------+-----------------------------------------| |rp_rate_reduce_monitor_period | Minimum time between consecutive rate | | | reductions | |------------------------------+-----------------------------------------| |rp_initial_alpha_value | Initial value of alpha | |------------------------------+-----------------------------------------| |rp_gd | When CNP is received, flow rate is | | | reduced based on gd, rp_gd is given as | | | log2(rp_gd) | |------------------------------+-----------------------------------------| |np_cnp_dscp | dscp code point for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio_mode | 802.1p priority for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio | cnp priority mode | +------------------------------+-----------------------------------------+ Signed-off-by: Parav Pandit <parav@mellanox.com> Reviewed-by: Daniel Jurgens <danielj@mellanox.com> Reviewed-by: Eli Cohen <eli@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-05-30 14:05:15 +07:00
/*
* Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/debugfs.h>
#include "mlx5_ib.h"
#include "cmd.h"
enum mlx5_ib_cong_node_type {
MLX5_IB_RROCE_ECN_RP = 1,
MLX5_IB_RROCE_ECN_NP = 2,
};
static const char * const mlx5_ib_dbg_cc_name[] = {
"rp_clamp_tgt_rate",
"rp_clamp_tgt_rate_ati",
"rp_time_reset",
"rp_byte_reset",
"rp_threshold",
"rp_ai_rate",
"rp_hai_rate",
"rp_min_dec_fac",
"rp_min_rate",
"rp_rate_to_set_on_first_cnp",
"rp_dce_tcp_g",
"rp_dce_tcp_rtt",
"rp_rate_reduce_monitor_period",
"rp_initial_alpha_value",
"rp_gd",
"np_cnp_dscp",
"np_cnp_prio_mode",
"np_cnp_prio",
};
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
#define MLX5_IB_RP_GD_ATTR BIT(16)
#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
static enum mlx5_ib_cong_node_type
mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
{
if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
param_offset <= MLX5_IB_DBG_CC_RP_GD)
return MLX5_IB_RROCE_ECN_RP;
else
return MLX5_IB_RROCE_ECN_NP;
}
static u32 mlx5_get_cc_param_val(void *field, int offset)
{
switch (offset) {
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate);
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate_after_time_inc);
case MLX5_IB_DBG_CC_RP_TIME_RESET:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_time_reset);
case MLX5_IB_DBG_CC_RP_BYTE_RESET:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_byte_reset);
case MLX5_IB_DBG_CC_RP_THRESHOLD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_threshold);
case MLX5_IB_DBG_CC_RP_AI_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_ai_rate);
case MLX5_IB_DBG_CC_RP_HAI_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_hai_rate);
case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_min_dec_fac);
case MLX5_IB_DBG_CC_RP_MIN_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_min_rate);
case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rate_to_set_on_first_cnp);
case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
dce_tcp_g);
case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
dce_tcp_rtt);
case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rate_reduce_monitor_period);
case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
initial_alpha_value);
case MLX5_IB_DBG_CC_RP_GD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_gd);
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_dscp);
case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_prio_mode);
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_802p_prio);
default:
return 0;
}
}
static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
u32 var, u32 *attr_mask)
{
switch (offset) {
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate, var);
break;
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate_after_time_inc, var);
break;
case MLX5_IB_DBG_CC_RP_TIME_RESET:
*attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_time_reset, var);
break;
case MLX5_IB_DBG_CC_RP_BYTE_RESET:
*attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_byte_reset, var);
break;
case MLX5_IB_DBG_CC_RP_THRESHOLD:
*attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_threshold, var);
break;
case MLX5_IB_DBG_CC_RP_AI_RATE:
*attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_ai_rate, var);
break;
case MLX5_IB_DBG_CC_RP_HAI_RATE:
*attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_hai_rate, var);
break;
case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
*attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_min_dec_fac, var);
break;
case MLX5_IB_DBG_CC_RP_MIN_RATE:
*attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_min_rate, var);
break;
case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
*attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rate_to_set_on_first_cnp, var);
break;
case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
*attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
dce_tcp_g, var);
break;
case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
*attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
dce_tcp_rtt, var);
break;
case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
*attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rate_reduce_monitor_period, var);
break;
case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
*attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
initial_alpha_value, var);
break;
case MLX5_IB_DBG_CC_RP_GD:
*attr_mask |= MLX5_IB_RP_GD_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_gd, var);
break;
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
*attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
break;
case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
break;
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
break;
}
}
static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
void *out;
void *field;
int err;
enum mlx5_ib_cong_node_type node;
out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
node = mlx5_ib_param_to_node(offset);
err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
if (err)
goto free;
field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
*var = mlx5_get_cc_param_val(field, offset);
free:
kvfree(out);
return err;
}
static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
{
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
void *in;
void *field;
enum mlx5_ib_cong_node_type node;
u32 attr_mask = 0;
int err;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(modify_cong_params_in, in, opcode,
MLX5_CMD_OP_MODIFY_CONG_PARAMS);
node = mlx5_ib_param_to_node(offset);
MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
attr_mask);
err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
kvfree(in);
return err;
}
static ssize_t set_param(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_ib_dbg_param *param = filp->private_data;
int offset = param->offset;
char lbuf[11] = { };
u32 var;
int ret;
if (count > sizeof(lbuf))
return -EINVAL;
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
lbuf[sizeof(lbuf) - 1] = '\0';
if (kstrtou32(lbuf, 0, &var))
return -EINVAL;
ret = mlx5_ib_set_cc_params(param->dev, offset, var);
return ret ? ret : count;
}
static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
loff_t *pos)
{
struct mlx5_ib_dbg_param *param = filp->private_data;
int offset = param->offset;
u32 var = 0;
int ret;
char lbuf[11];
if (*pos)
return 0;
ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
if (ret)
return ret;
ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
if (ret < 0)
return ret;
if (copy_to_user(buf, lbuf, ret))
return -EFAULT;
*pos += ret;
return ret;
}
static const struct file_operations dbg_cc_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.write = set_param,
.read = get_param,
};
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
{
if (!mlx5_debugfs_root ||
!dev->dbg_cc_params ||
!dev->dbg_cc_params->root)
return;
debugfs_remove_recursive(dev->dbg_cc_params->root);
kfree(dev->dbg_cc_params);
dev->dbg_cc_params = NULL;
}
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
int i;
if (!mlx5_debugfs_root)
goto out;
if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
goto out;
dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
if (!dbg_cc_params)
goto out;
dev->dbg_cc_params = dbg_cc_params;
dbg_cc_params->root = debugfs_create_dir("cc_params",
dev->mdev->priv.dbg_root);
if (!dbg_cc_params->root)
goto err;
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
dbg_cc_params->params[i].dentry =
debugfs_create_file(mlx5_ib_dbg_cc_name[i],
0600, dbg_cc_params->root,
&dbg_cc_params->params[i],
&dbg_cc_fops);
if (!dbg_cc_params->params[i].dentry)
goto err;
}
out: return 0;
err:
mlx5_ib_warn(dev, "cong debugfs failure\n");
mlx5_ib_cleanup_cong_debugfs(dev);
/*
* We don't want to fail driver if debugfs failed to initialize,
* so we are not forwarding error to the user.
*/
return 0;
}