RDMA netlink infrastructure v2

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEkhr/r4Op1/04yqaB5GN7iDZyWKcFAlmMOrQQHGxlb25Aa2Vy
 bmVsLm9yZwAKCRDkY3uINnJYp2jeD/4gSNBJLwHewOpmwN1rK21vSEP4rRSEcevL
 T1EbkB7+mIKwePljbwze2UCxbehW+xrsqNJwy8Ng6EdyWc7C5DRN4LhWwz7YcQFJ
 o1ljPgZ2N41GTIPx5tClugry+naFbNRcXYCQvmVqgO8a/J+xeZGAPKfKgqdfcz69
 Z+IneCUuMNIsuhWoPJB898uwbTpMGdytUd7j6UcuTn7QhSCI1I2BUUnaPiFN1o8C
 KI3g0NFrya0K0WMLLxg+g0hxTUiioChmheI23pJ2d5XeY3/tRgz5a28KBnaNhaMX
 9t84OXk/9oKVEoMedbLrTLnT4yc133Hv5i4ky1K1c0lwveYqFSVCBLcMg+MXJvFV
 CooAeOxG36JKj2VoUa+a8Fqw5ggTL92QKr3lmzD4UMFzBFD0u4XzGBY6N49tN7OQ
 SC/GfGmR183J+CU9qTOI5P8CtE8JisO4RdA0lrqti2yAf/x7qoYp7/BaP/4Nw0OB
 jYOwjMkStuqQAm9gpRY8YQzxke28aJL9l78V2Dac3BzpvQmfLCASZxwEHlr0P4QB
 9sJlgV49PSt3tWvLt7KaUgJo63/Z9RzhVmuXYZDCMOTLhLL0NO6KRPNq081osCUf
 YpBT1QPTVE+wNC7Ra9w0gtJWXbzWBglm1ous2M/6l4scW6vx/+HJuLrwsuUHMW5C
 erfgT1JpRQ==
 =twtZ
 -----END PGP SIGNATURE-----

Merge tag 'rdma-next-2017-08-10' of git://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma into rdma-netlink

RDMA netlink infrastructure v2

Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Doug Ledford 2017-08-10 13:43:11 -04:00
commit db14dff174
169 changed files with 6975 additions and 2604 deletions

View File

@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
security.o
security.o nldev.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o

View File

@ -129,13 +129,11 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
}
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb)
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
!(NETLINK_CB(skb).sk) ||
!netlink_capable(skb, CAP_NET_ADMIN))
!(NETLINK_CB(skb).sk))
return -EPERM;
if (ib_nl_is_good_ip_resp(nlh))
@ -185,7 +183,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
/* Make the request retry, so when we get the response from userspace
* we will have something.
@ -326,7 +324,7 @@ static void queue_req(struct addr_req *req)
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL;
/* We fill in what we can, the response will fill the rest */

View File

@ -72,6 +72,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
#define CMA_PREFERRED_ROCE_GID_TYPE (1 << IB_GID_TYPE_ROCE_UDP_ENCAP)
static const char * const cma_events[] = {
[RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved",
@ -3998,7 +3999,8 @@ static void iboe_mcast_work_handler(struct work_struct *work)
kfree(mw);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
enum ib_gid_type gid_type)
{
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
@ -4008,8 +4010,8 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
mgid->raw[0] = 0xff;
mgid->raw[1] = 0x0e;
mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@ -4050,7 +4052,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
goto out1;
}
cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
@ -4066,8 +4070,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
@ -4280,8 +4282,12 @@ static void cma_add_one(struct ib_device *device)
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
if (supported_gids & CMA_PREFERRED_ROCE_GID_TYPE)
cma_dev->default_gid_type[i - rdma_start_port(device)] =
CMA_PREFERRED_ROCE_GID_TYPE;
else
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
@ -4452,9 +4458,8 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
static const struct ibnl_client_cbs cma_cb_table[] = {
[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats,
.module = THIS_MODULE },
static const struct rdma_nl_cbs cma_cb_table[] = {
[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
};
static int cma_init_net(struct net *net)
@ -4506,9 +4511,7 @@ static int __init cma_init(void)
if (ret)
goto err;
if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table),
cma_cb_table))
pr_warn("RDMA CMA: failed to add netlink callback\n");
rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
cma_configfs_init();
return 0;
@ -4525,7 +4528,7 @@ static int __init cma_init(void)
static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ibnl_remove_client(RDMA_NL_RDMA_CM);
rdma_nl_unregister(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);

View File

@ -102,6 +102,14 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
roce_netdev_callback cb,
void *cookie);
typedef int (*nldev_callback)(struct ib_device *device,
struct sk_buff *skb,
struct netlink_callback *cb,
unsigned int idx);
int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct netlink_callback *cb);
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
@ -179,8 +187,8 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
int ibnl_init(void);
void ibnl_cleanup(void);
int rdma_nl_init(void);
void rdma_nl_exit(void);
/**
* Check if there are any listeners to the netlink group
@ -190,11 +198,14 @@ void ibnl_cleanup(void);
int ibnl_chk_listeners(unsigned int group);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb);
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb);
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb);
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
int ib_get_cached_subnet_prefix(struct ib_device *device,
u8 port_num,
@ -301,4 +312,9 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
return 0;
}
#endif
struct ib_device *__ib_device_get_by_index(u32 ifindex);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
#endif /* _CORE_PRIV_H */

View File

@ -134,6 +134,17 @@ static int ib_device_check_mandatory(struct ib_device *device)
return 0;
}
struct ib_device *__ib_device_get_by_index(u32 index)
{
struct ib_device *device;
list_for_each_entry(device, &device_list, core_list)
if (device->index == index)
return device;
return NULL;
}
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
@ -145,7 +156,6 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
return NULL;
}
static int alloc_name(char *name)
{
unsigned long *inuse;
@ -326,10 +336,10 @@ static int read_port_immutable(struct ib_device *device)
return 0;
}
void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->get_dev_fw_str)
dev->get_dev_fw_str(dev, str, str_len);
dev->get_dev_fw_str(dev, str);
else
str[0] = '\0';
}
@ -394,6 +404,30 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event,
return NOTIFY_OK;
}
/**
* __dev_new_index - allocate an device index
*
* Returns a suitable unique value for a new device interface
* number. It assumes that there are less than 2^32-1 ib devices
* will be present in the system.
*/
static u32 __dev_new_index(void)
{
/*
* The device index to allow stable naming.
* Similar to struct net -> ifindex.
*/
static u32 index;
for (;;) {
if (!(++index))
index = 1;
if (!__ib_device_get_by_index(index))
return index;
}
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
@ -492,6 +526,7 @@ int ib_register_device(struct ib_device *device,
if (client->add && !add_client_context(device, client))
client->add(device);
device->index = __dev_new_index();
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
@ -892,6 +927,31 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
up_read(&lists_rwsem);
}
/**
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
*
* Enumerates all ib_devices and calls callback() on each device.
*/
int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct ib_device *dev;
unsigned int idx = 0;
int ret = 0;
down_read(&lists_rwsem);
list_for_each_entry(dev, &device_list, core_list) {
ret = nldev_cb(dev, skb, cb, idx);
if (ret)
break;
idx++;
}
up_read(&lists_rwsem);
return ret;
}
/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
@ -1086,29 +1146,21 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
static const struct rdma_nl_cbs ibnl_ls_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.dump = ib_nl_handle_resolve_resp,
.module = THIS_MODULE },
.doit = ib_nl_handle_resolve_resp,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
.dump = ib_nl_handle_set_timeout,
.module = THIS_MODULE },
.doit = ib_nl_handle_set_timeout,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NL_LS_OP_IP_RESOLVE] = {
.dump = ib_nl_handle_ip_res_resp,
.module = THIS_MODULE },
.doit = ib_nl_handle_ip_res_resp,
.flags = RDMA_NL_ADMIN_PERM,
},
};
static int ib_add_ibnl_clients(void)
{
return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
ibnl_ls_cb_table);
}
static void ib_remove_ibnl_clients(void)
{
ibnl_remove_client(RDMA_NL_LS);
}
static int __init ib_core_init(void)
{
int ret;
@ -1130,9 +1182,9 @@ static int __init ib_core_init(void)
goto err_comp;
}
ret = ibnl_init();
ret = rdma_nl_init();
if (ret) {
pr_warn("Couldn't init IB netlink interface\n");
pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
goto err_sysfs;
}
@ -1154,24 +1206,18 @@ static int __init ib_core_init(void)
goto err_mad;
}
ret = ib_add_ibnl_clients();
if (ret) {
pr_warn("Couldn't register ibnl clients\n");
goto err_sa;
}
ret = register_lsm_notifier(&ibdev_lsm_nb);
if (ret) {
pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
goto err_ibnl_clients;
goto err_sa;
}
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
ib_cache_setup();
return 0;
err_ibnl_clients:
ib_remove_ibnl_clients();
err_sa:
ib_sa_cleanup();
err_mad:
@ -1179,7 +1225,7 @@ static int __init ib_core_init(void)
err_addr:
addr_cleanup();
err_ibnl:
ibnl_cleanup();
rdma_nl_exit();
err_sysfs:
class_unregister(&ib_class);
err_comp:
@ -1191,13 +1237,14 @@ static int __init ib_core_init(void)
static void __exit ib_core_cleanup(void)
{
unregister_lsm_notifier(&ibdev_lsm_nb);
ib_cache_cleanup();
ib_remove_ibnl_clients();
nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
unregister_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
ibnl_cleanup();
rdma_nl_exit();
class_unregister(&ib_class);
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */

View File

@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason)
}
EXPORT_SYMBOL(iwcm_reject_msg);
static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
static struct rdma_nl_cbs iwcm_nl_cb_table[] = {
[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
@ -1175,12 +1175,8 @@ static int __init iw_cm_init(void)
ret = iwpm_init(RDMA_NL_IWCM);
if (ret)
pr_err("iw_cm: couldn't init iwpm\n");
ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table),
iwcm_nl_cb_table);
if (ret)
pr_err("iw_cm: couldn't register netlink callbacks\n");
else
rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
if (!iwcm_wq)
return -ENOMEM;
@ -1200,7 +1196,7 @@ static void __exit iw_cm_cleanup(void)
{
unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
ibnl_remove_client(RDMA_NL_IWCM);
rdma_nl_unregister(RDMA_NL_IWCM);
iwpm_exit(RDMA_NL_IWCM);
}

View File

@ -42,7 +42,6 @@ int iwpm_valid_pid(void)
{
return iwpm_user_pid > 0;
}
EXPORT_SYMBOL(iwpm_valid_pid);
/*
* iwpm_register_pid - Send a netlink query to user space
@ -104,7 +103,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@ -122,7 +121,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
EXPORT_SYMBOL(iwpm_register_pid);
/*
* iwpm_add_mapping - Send a netlink add mapping message
@ -174,7 +172,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
goto add_mapping_error;
nlmsg_request->req_buffer = pm_msg;
ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@ -191,7 +189,6 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
EXPORT_SYMBOL(iwpm_add_mapping);
/*
* iwpm_add_and_query_mapping - Send a netlink add and query
@ -251,7 +248,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
goto query_mapping_error;
nlmsg_request->req_buffer = pm_msg;
ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
err_str = "Unable to send a nlmsg";
@ -267,7 +264,6 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
EXPORT_SYMBOL(iwpm_add_and_query_mapping);
/*
* iwpm_remove_mapping - Send a netlink remove mapping message
@ -312,7 +308,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
if (ret)
goto remove_mapping_error;
ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@ -328,7 +324,6 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
dev_kfree_skb_any(skb);
return ret;
}
EXPORT_SYMBOL(iwpm_remove_mapping);
/* netlink attribute policy for the received response to register pid request */
static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
@ -397,7 +392,6 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
up(&nlmsg_request->sem);
return 0;
}
EXPORT_SYMBOL(iwpm_register_pid_cb);
/* netlink attribute policy for the received response to add mapping request */
static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
@ -466,7 +460,6 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
up(&nlmsg_request->sem);
return 0;
}
EXPORT_SYMBOL(iwpm_add_mapping_cb);
/* netlink attribute policy for the response to add and query mapping request
* and response with remote address info */
@ -558,7 +551,6 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
up(&nlmsg_request->sem);
return 0;
}
EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
/*
* iwpm_remote_info_cb - Process a port mapper message, containing
@ -627,7 +619,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
"remote_info: Mapped remote sockaddr:");
return ret;
}
EXPORT_SYMBOL(iwpm_remote_info_cb);
/* netlink attribute policy for the received request for mapping info */
static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
@ -677,7 +668,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid);
return ret;
}
EXPORT_SYMBOL(iwpm_mapping_info_cb);
/* netlink attribute policy for the received mapping info ack */
static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
@ -707,7 +697,6 @@ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
return 0;
}
EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
/* netlink attribute policy for the received port mapper error message */
static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
@ -751,4 +740,3 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
up(&nlmsg_request->sem);
return 0;
}
EXPORT_SYMBOL(iwpm_mapping_error_cb);

View File

@ -54,8 +54,6 @@ static struct iwpm_admin_data iwpm_admin;
int iwpm_init(u8 nl_client)
{
int ret = 0;
if (iwpm_valid_client(nl_client))
return -EINVAL;
mutex_lock(&iwpm_admin_lock);
if (atomic_read(&iwpm_admin.refcount) == 0) {
iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE *
@ -83,7 +81,6 @@ int iwpm_init(u8 nl_client)
}
return ret;
}
EXPORT_SYMBOL(iwpm_init);
static void free_hash_bucket(void);
static void free_reminfo_bucket(void);
@ -109,7 +106,6 @@ int iwpm_exit(u8 nl_client)
iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
return 0;
}
EXPORT_SYMBOL(iwpm_exit);
static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
struct sockaddr_storage *);
@ -148,7 +144,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return ret;
}
EXPORT_SYMBOL(iwpm_create_mapinfo);
int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_local_addr)
@ -184,7 +179,6 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return ret;
}
EXPORT_SYMBOL(iwpm_remove_mapinfo);
static void free_hash_bucket(void)
{
@ -297,7 +291,6 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
return ret;
}
EXPORT_SYMBOL(iwpm_get_remote_info);
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
u8 nl_client, gfp_t gfp)
@ -383,15 +376,11 @@ int iwpm_get_nlmsg_seq(void)
int iwpm_valid_client(u8 nl_client)
{
if (nl_client >= RDMA_NL_NUM_CLIENTS)
return 0;
return iwpm_admin.client_list[nl_client];
}
void iwpm_set_valid(u8 nl_client, int valid)
{
if (nl_client >= RDMA_NL_NUM_CLIENTS)
return;
iwpm_admin.client_list[nl_client] = valid;
}
@ -608,7 +597,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
if (ret)
goto mapinfo_num_error;
ret = ibnl_unicast(skb, nlh, iwpm_pid);
ret = rdma_nl_unicast(skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
@ -637,7 +626,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
ret = rdma_nl_unicast(skb, iwpm_pid);
if (ret)
pr_warn("%s Unable to send a nlmsg\n", __func__);
return ret;

View File

@ -1,4 +1,5 @@
/*
* Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved.
* Copyright (c) 2010 Voltaire Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@ -39,237 +40,253 @@
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
struct ibnl_client {
struct list_head list;
int index;
int nops;
const struct ibnl_client_cbs *cb_table;
};
#include "core_priv.h"
static DEFINE_MUTEX(ibnl_mutex);
static DEFINE_MUTEX(rdma_nl_mutex);
static struct sock *nls;
static LIST_HEAD(client_list);
static struct {
const struct rdma_nl_cbs *cb_table;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
int ibnl_chk_listeners(unsigned int group)
int rdma_nl_chk_listeners(unsigned int group)
{
if (netlink_has_listeners(nls, group) == 0)
return -1;
return 0;
return (netlink_has_listeners(nls, group)) ? 0 : -1;
}
EXPORT_SYMBOL(rdma_nl_chk_listeners);
static bool is_nl_msg_valid(unsigned int type, unsigned int op)
{
static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS - 1] = {
RDMA_NL_RDMA_CM_NUM_OPS,
RDMA_NL_IWPM_NUM_OPS,
0,
RDMA_NL_LS_NUM_OPS,
RDMA_NLDEV_NUM_OPS };
/*
* This BUILD_BUG_ON is intended to catch addition of new
* RDMA netlink protocol without updating the array above.
*/
BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6);
if (type > RDMA_NL_NUM_CLIENTS - 1)
return false;
return (op < max_num_ops[type - 1]) ? true : false;
}
int ibnl_add_client(int index, int nops,
const struct ibnl_client_cbs cb_table[])
static bool is_nl_valid(unsigned int type, unsigned int op)
{
struct ibnl_client *cur;
struct ibnl_client *nl_client;
const struct rdma_nl_cbs *cb_table;
nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
if (!nl_client)
return -ENOMEM;
if (!is_nl_msg_valid(type, op))
return false;
nl_client->index = index;
nl_client->nops = nops;
nl_client->cb_table = cb_table;
cb_table = rdma_nl_types[type].cb_table;
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
return false;
return true;
}
mutex_lock(&ibnl_mutex);
list_for_each_entry(cur, &client_list, list) {
if (cur->index == index) {
pr_warn("Client for %d already exists\n", index);
mutex_unlock(&ibnl_mutex);
kfree(nl_client);
return -EINVAL;
}
void rdma_nl_register(unsigned int index,
const struct rdma_nl_cbs cb_table[])
{
mutex_lock(&rdma_nl_mutex);
if (!is_nl_msg_valid(index, 0)) {
/*
* All clients are not interesting in success/failure of
* this call. They want to see the print to error log and
* continue their initialization. Print warning for them,
* because it is programmer's error to be here.
*/
mutex_unlock(&rdma_nl_mutex);
WARN(true,
"The not-valid %u index was supplied to RDMA netlink\n",
index);
return;
}
list_add_tail(&nl_client->list, &client_list);
mutex_unlock(&ibnl_mutex);
return 0;
}
EXPORT_SYMBOL(ibnl_add_client);
int ibnl_remove_client(int index)
{
struct ibnl_client *cur, *next;
mutex_lock(&ibnl_mutex);
list_for_each_entry_safe(cur, next, &client_list, list) {
if (cur->index == index) {
list_del(&(cur->list));
mutex_unlock(&ibnl_mutex);
kfree(cur);
return 0;
}
if (rdma_nl_types[index].cb_table) {
mutex_unlock(&rdma_nl_mutex);
WARN(true,
"The %u index is already registered in RDMA netlink\n",
index);
return;
}
pr_warn("Can't remove callback for client idx %d. Not found\n", index);
mutex_unlock(&ibnl_mutex);
return -EINVAL;
rdma_nl_types[index].cb_table = cb_table;
mutex_unlock(&rdma_nl_mutex);
}
EXPORT_SYMBOL(ibnl_remove_client);
EXPORT_SYMBOL(rdma_nl_register);
void rdma_nl_unregister(unsigned int index)
{
mutex_lock(&rdma_nl_mutex);
rdma_nl_types[index].cb_table = NULL;
mutex_unlock(&rdma_nl_mutex);
}
EXPORT_SYMBOL(rdma_nl_unregister);
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
int len, int client, int op, int flags)
{
unsigned char *prev_tail;
prev_tail = skb_tail_pointer(skb);
*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
len, flags);
*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), len, flags);
if (!*nlh)
goto out_nlmsg_trim;
(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
return NULL;
return nlmsg_data(*nlh);
out_nlmsg_trim:
nlmsg_trim(skb, prev_tail);
return NULL;
}
EXPORT_SYMBOL(ibnl_put_msg);
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
int len, void *data, int type)
{
unsigned char *prev_tail;
prev_tail = skb_tail_pointer(skb);
if (nla_put(skb, type, len, data))
goto nla_put_failure;
nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
if (nla_put(skb, type, len, data)) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
return 0;
nla_put_failure:
nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
return -EMSGSIZE;
}
EXPORT_SYMBOL(ibnl_put_attr);
static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct ibnl_client *client;
int type = nlh->nlmsg_type;
int index = RDMA_NL_GET_CLIENT(type);
unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
const struct rdma_nl_cbs *cb_table;
list_for_each_entry(client, &client_list, list) {
if (client->index == index) {
if (op >= client->nops || !client->cb_table[op].dump)
return -EINVAL;
if (!is_nl_valid(index, op))
return -EINVAL;
/*
* For response or local service set_timeout request,
* there is no need to use netlink_dump_start.
*/
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
(index == RDMA_NL_LS &&
op == RDMA_NL_LS_OP_SET_TIMEOUT)) {
struct netlink_callback cb = {
.skb = skb,
.nlh = nlh,
.dump = client->cb_table[op].dump,
.module = client->cb_table[op].module,
};
cb_table = rdma_nl_types[index].cb_table;
return cb.dump(skb, &cb);
}
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
{
struct netlink_dump_control c = {
.dump = client->cb_table[op].dump,
.module = client->cb_table[op].module,
};
return netlink_dump_start(nls, skb, nlh, &c);
}
}
/* FIXME: Convert IWCM to properly handle doit callbacks */
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM ||
index == RDMA_NL_IWCM) {
struct netlink_dump_control c = {
.dump = cb_table[op].dump,
};
return netlink_dump_start(nls, skb, nlh, &c);
}
pr_info("Index %d wasn't found in client list\n", index);
return -EINVAL;
if (cb_table[op].doit)
return cb_table[op].doit(skb, nlh, extack);
return 0;
}
static void ibnl_rcv_reply_skb(struct sk_buff *skb)
/*
* This function is similar to netlink_rcv_skb with one exception:
* It calls to the callback for the netlink messages without NLM_F_REQUEST
* flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed
* for that consumer only.
*/
static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *,
struct netlink_ext_ack *))
{
struct netlink_ext_ack extack = {};
struct nlmsghdr *nlh;
int msglen;
int err;
/*
* Process responses until there is no more message or the first
* request. Generally speaking, it is not recommended to mix responses
* with requests.
*/
while (skb->len >= nlmsg_total_size(0)) {
int msglen;
nlh = nlmsg_hdr(skb);
err = 0;
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
return;
return 0;
/* Handle response only */
if (nlh->nlmsg_flags & NLM_F_REQUEST)
return;
/*
* Generally speaking, the only requests are handled
* by the kernel, but RDMA_NL_LS is different, because it
* runs backward netlink scheme. Kernel initiates messages
* and waits for reply with data to keep pathrecord cache
* in sync.
*/
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) &&
(RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS))
goto ack;
ibnl_rcv_msg(skb, nlh, NULL);
/* Skip control messages */
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
goto ack;
err = cb(skb, nlh, &extack);
if (err == -EINTR)
goto skip;
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err)
netlink_ack(skb, nlh, err, &extack);
skip:
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
return 0;
}
static void ibnl_rcv(struct sk_buff *skb)
static void rdma_nl_rcv(struct sk_buff *skb)
{
mutex_lock(&ibnl_mutex);
ibnl_rcv_reply_skb(skb);
netlink_rcv_skb(skb, &ibnl_rcv_msg);
mutex_unlock(&ibnl_mutex);
mutex_lock(&rdma_nl_mutex);
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
mutex_unlock(&rdma_nl_mutex);
}
int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
__u32 pid)
int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
{
int err;
err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(rdma_nl_unicast);
int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
{
int err;
err = netlink_unicast(nls, skb, pid, 0);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(ibnl_unicast);
EXPORT_SYMBOL(rdma_nl_unicast_wait);
int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int group, gfp_t flags)
int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
{
return nlmsg_multicast(nls, skb, 0, group, flags);
}
EXPORT_SYMBOL(ibnl_multicast);
EXPORT_SYMBOL(rdma_nl_multicast);
int __init ibnl_init(void)
int __init rdma_nl_init(void)
{
struct netlink_kernel_cfg cfg = {
.input = ibnl_rcv,
.input = rdma_nl_rcv,
};
nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
if (!nls) {
pr_warn("Failed to create netlink socket\n");
if (!nls)
return -ENOMEM;
}
nls->sk_sndtimeo = 10 * HZ;
return 0;
}
void ibnl_cleanup(void)
void rdma_nl_exit(void)
{
struct ibnl_client *cur, *next;
int idx;
mutex_lock(&ibnl_mutex);
list_for_each_entry_safe(cur, next, &client_list, list) {
list_del(&(cur->list));
kfree(cur);
}
mutex_unlock(&ibnl_mutex);
for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
rdma_nl_unregister(idx);
netlink_kernel_release(nls);
}

View File

@ -0,0 +1,322 @@
/*
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <net/netlink.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
.len = IB_DEVICE_NAME_MAX - 1},
[RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
.len = IB_FW_VERSION_NAME_MAX - 1},
[RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
};
static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
{
char fw[IB_FW_VERSION_NAME_MAX];
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
return -EMSGSIZE;
BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
device->attrs.device_cap_flags, 0))
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
/* Device without FW has strlen(fw) */
if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
be64_to_cpu(device->node_guid), 0))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
be64_to_cpu(device->attrs.sys_image_guid), 0))
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
return -EMSGSIZE;
return 0;
}
static int fill_port_info(struct sk_buff *msg,
struct ib_device *device, u32 port)
{
struct ib_port_attr attr;
int ret;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
return -EMSGSIZE;
ret = ib_query_port(device, port, &attr);
if (ret)
return ret;
BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
(u64)attr.port_cap_flags, 0))
return -EMSGSIZE;
if (rdma_protocol_ib(device, port) &&
nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
attr.subnet_prefix, 0))
return -EMSGSIZE;
if (rdma_protocol_ib(device, port)) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
return -EMSGSIZE;
}
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
return 0;
}
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
struct sk_buff *msg;
u32 index;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = __ib_device_get_by_index(index);
if (!device)
return -EINVAL;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
err = fill_dev_info(msg, device);
if (err) {
nlmsg_free(msg);
return err;
}
nlmsg_end(msg, nlh);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
}
static int _nldev_get_dumpit(struct ib_device *device,
struct sk_buff *skb,
struct netlink_callback *cb,
unsigned int idx)
{
int start = cb->args[0];
struct nlmsghdr *nlh;
if (idx < start)
return 0;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, NLM_F_MULTI);
if (fill_dev_info(skb, device)) {
nlmsg_cancel(skb, nlh);
goto out;
}
nlmsg_end(skb, nlh);
idx++;
out: cb->args[0] = idx;
return skb->len;
}
static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
/*
* There is no need to take lock, because
* we are relying on ib_core's lists_rwsem
*/
return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
}
static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
struct sk_buff *msg;
u32 index;
u32 port;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = __ib_device_get_by_index(index);
if (!device)
return -EINVAL;
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
if (!rdma_is_port_valid(device, port))
return -EINVAL;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
err = fill_port_info(msg, device, port);
if (err) {
nlmsg_free(msg);
return err;
}
nlmsg_end(msg, nlh);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
}
static int nldev_port_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
int start = cb->args[0];
struct nlmsghdr *nlh;
u32 idx = 0;
u32 ifindex;
int err;
u32 p;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = __ib_device_get_by_index(ifindex);
if (!device)
return -EINVAL;
for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
/*
* The dumpit function returns all information from specific
* index. This specific index is taken from the netlink
* messages request sent by user and it is available
* in cb->args[0].
*
* Usually, the user doesn't fill this field and it causes
* to return everything.
*
*/
if (idx < start) {
idx++;
continue;
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
if (fill_port_info(skb, device, p)) {
nlmsg_cancel(skb, nlh);
goto out;
}
idx++;
nlmsg_end(skb, nlh);
}
out: cb->args[0] = idx;
return skb->len;
}
static const struct rdma_nl_cbs nldev_cb_table[] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
.dump = nldev_get_dumpit,
},
[RDMA_NLDEV_CMD_PORT_GET] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,
},
};
void __init nldev_init(void)
{
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
}
void __exit nldev_exit(void)
{
rdma_nl_unregister(RDMA_NL_NLDEV);
}

View File

@ -44,6 +44,8 @@
static struct workqueue_struct *gid_cache_wq;
static struct workqueue_struct *gid_cache_wq;
enum gid_op_type {
GID_DEL = 0,
GID_ADD

View File

@ -861,7 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
@ -1021,9 +1021,9 @@ static void ib_nl_request_timeout(struct work_struct *work)
}
int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb)
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta;
const struct nlattr *attr;
unsigned long flags;
@ -1033,8 +1033,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
int ret;
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
!(NETLINK_CB(skb).sk) ||
!netlink_capable(skb, CAP_NET_ADMIN))
!(NETLINK_CB(skb).sk))
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@ -1098,9 +1097,9 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
}
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb)
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags;
struct ib_sa_query *query;
struct ib_mad_send_buf *send_buf;
@ -1109,8 +1108,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
!(NETLINK_CB(skb).sk) ||
!netlink_capable(skb, CAP_NET_ADMIN))
!(NETLINK_CB(skb).sk))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
@ -1420,7 +1418,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
(!(query->flags & IB_SA_QUERY_OPA))) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
}

View File

@ -1210,8 +1210,8 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
ib_get_device_fw_str(dev, buf, PAGE_SIZE);
strlcat(buf, "\n", PAGE_SIZE);
ib_get_device_fw_str(dev, buf);
strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
return strlen(buf);
}

View File

@ -1383,8 +1383,9 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
sizeof(cmd->reserved1)) && cmd->reserved1) {
if (cmd_sz > sizeof(*cmd) &&
!ib_is_udata_cleared(ucore, sizeof(*cmd),
cmd_sz - sizeof(*cmd))) {
ret = -EOPNOTSUPP;
goto err_put;
}
@ -1482,11 +1483,21 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
IB_QP_CREATE_SCATTER_FCS |
IB_QP_CREATE_CVLAN_STRIPPING)) {
IB_QP_CREATE_CVLAN_STRIPPING |
IB_QP_CREATE_SOURCE_QPN)) {
ret = -EINVAL;
goto err_put;
}
if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
if (!capable(CAP_NET_RAW)) {
ret = -EPERM;
goto err_put;
}
attr.source_qpn = cmd->source_qpn;
}
buf = (void *)cmd + sizeof(*cmd);
if (cmd_sz > sizeof(*cmd))
if (!(buf[0] == 0 && !memcmp(buf, buf + 1,

View File

@ -1244,6 +1244,18 @@ int ib_resolve_eth_dmac(struct ib_device *device,
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
ah_attr->roce.dmac);
return 0;
}
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
__be32 addr = 0;
memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
} else {
ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
(char *)ah_attr->roce.dmac);
}
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@ -1302,6 +1314,61 @@ int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
{
int rc;
u32 netdev_speed;
struct net_device *netdev;
struct ethtool_link_ksettings lksettings;
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
if (!dev->get_netdev)
return -EOPNOTSUPP;
netdev = dev->get_netdev(dev, port_num);
if (!netdev)
return -ENODEV;
rtnl_lock();
rc = __ethtool_get_link_ksettings(netdev, &lksettings);
rtnl_unlock();
dev_put(netdev);
if (!rc) {
netdev_speed = lksettings.base.speed;
} else {
netdev_speed = SPEED_1000;
pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name,
netdev_speed);
}
if (netdev_speed <= SPEED_1000) {
*width = IB_WIDTH_1X;
*speed = IB_SPEED_SDR;
} else if (netdev_speed <= SPEED_10000) {
*width = IB_WIDTH_1X;
*speed = IB_SPEED_FDR10;
} else if (netdev_speed <= SPEED_20000) {
*width = IB_WIDTH_4X;
*speed = IB_SPEED_DDR;
} else if (netdev_speed <= SPEED_25000) {
*width = IB_WIDTH_1X;
*speed = IB_SPEED_EDR;
} else if (netdev_speed <= SPEED_40000) {
*width = IB_WIDTH_4X;
*speed = IB_SPEED_FDR10;
} else {
*width = IB_WIDTH_4X;
*speed = IB_SPEED_EDR;
}
return 0;
}
EXPORT_SYMBOL(ib_get_eth_speed);
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
@ -1569,15 +1636,53 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
/* Multicast groups */
static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
{
struct ib_qp_init_attr init_attr = {};
struct ib_qp_attr attr = {};
int num_eth_ports = 0;
int port;
/* If QP state >= init, it is assigned to a port and we can check this
* port only.
*/
if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
if (attr.qp_state >= IB_QPS_INIT) {
if (qp->device->get_link_layer(qp->device, attr.port_num) !=
IB_LINK_LAYER_INFINIBAND)
return true;
goto lid_check;
}
}
/* Can't get a quick answer, iterate over all ports */
for (port = 0; port < qp->device->phys_port_cnt; port++)
if (qp->device->get_link_layer(qp->device, port) !=
IB_LINK_LAYER_INFINIBAND)
num_eth_ports++;
/* If we have at lease one Ethernet port, RoCE annex declares that
* multicast LID should be ignored. We can't tell at this step if the
* QP belongs to an IB or Ethernet port.
*/
if (num_eth_ports)
return true;
/* If all the ports are IB, we can check according to IB spec. */
lid_check:
return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
lid == be16_to_cpu(IB_LID_PERMISSIVE));
}
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
if (!qp->device->attach_mcast)
return -ENOSYS;
if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
lid == be16_to_cpu(IB_LID_PERMISSIVE))
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@ -1593,9 +1698,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
lid == be16_to_cpu(IB_LID_PERMISSIVE))
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);

View File

@ -223,50 +223,6 @@ int bnxt_re_modify_device(struct ib_device *ibdev,
return 0;
}
static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width)
{
struct ethtool_link_ksettings lksettings;
u32 espeed;
if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
memset(&lksettings, 0, sizeof(lksettings));
rtnl_lock();
netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings);
rtnl_unlock();
espeed = lksettings.base.speed;
} else {
espeed = SPEED_UNKNOWN;
}
switch (espeed) {
case SPEED_1000:
*speed = IB_SPEED_SDR;
*width = IB_WIDTH_1X;
break;
case SPEED_10000:
*speed = IB_SPEED_QDR;
*width = IB_WIDTH_1X;
break;
case SPEED_20000:
*speed = IB_SPEED_DDR;
*width = IB_WIDTH_4X;
break;
case SPEED_25000:
*speed = IB_SPEED_EDR;
*width = IB_WIDTH_1X;
break;
case SPEED_40000:
*speed = IB_SPEED_QDR;
*width = IB_WIDTH_4X;
break;
case SPEED_50000:
break;
default:
*speed = IB_SPEED_SDR;
*width = IB_WIDTH_1X;
break;
}
}
/* Port */
int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr *port_attr)
@ -308,25 +264,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
* IB stack to avoid race in the NETDEV_UNREG path
*/
if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
__to_ib_speed_width(rdev->netdev, &port_attr->active_speed,
&port_attr->active_width);
return 0;
}
int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
int port_modify_mask,
struct ib_port_modify *port_modify)
{
switch (port_modify_mask) {
case IB_PORT_SHUTDOWN:
break;
case IB_PORT_INIT_TYPE:
break;
case IB_PORT_RESET_QKEY_CNTR:
break;
default:
break;
}
if (!ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed,
&port_attr->active_width))
return -EINVAL;
return 0;
}
@ -846,6 +786,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
struct bnxt_re_dev *rdev = qp->rdev;
int rc;
bnxt_qplib_del_flush_qp(&qp->qplib_qp);
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
@ -860,6 +801,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
return rc;
}
bnxt_qplib_del_flush_qp(&qp->qplib_qp);
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
&rdev->qp1_sqp->qplib_qp);
if (rc) {
@ -1404,6 +1346,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
}
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state);
if (!qp->sumem &&
qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
dev_dbg(rdev_to_dev(rdev),
"Move QP = %p to flush list\n",
qp);
bnxt_qplib_add_flush_qp(&qp->qplib_qp);
}
if (!qp->sumem &&
qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
dev_dbg(rdev_to_dev(rdev),
"Move QP = %p out of flush list\n",
qp);
bnxt_qplib_del_flush_qp(&qp->qplib_qp);
}
}
if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
qp->qplib_qp.modify_flags |=
@ -2414,6 +2371,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
}
cq->qplib_cq.max_wqe = entries;
cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id;
cq->qplib_cq.nq = &rdev->nq;
rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
if (rc) {
@ -2921,6 +2879,10 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
sq->send_phantom = false;
}
}
if (ncqe < budget)
ncqe += bnxt_qplib_process_flush_list(&cq->qplib_cq,
cqe + ncqe,
budget - ncqe);
if (!ncqe)
break;

View File

@ -141,9 +141,6 @@ int bnxt_re_modify_device(struct ib_device *ibdev,
struct ib_device_modify *device_modify);
int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr *port_attr);
int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
int port_modify_mask,
struct ib_port_modify *port_modify);
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable);
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,

View File

@ -70,7 +70,6 @@ static char version[] =
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(ROCE_DRV_MODULE_VERSION);
/* globals */
static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
@ -474,7 +473,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->modify_device = bnxt_re_modify_device;
ibdev->query_port = bnxt_re_query_port;
ibdev->modify_port = bnxt_re_modify_port;
ibdev->get_port_immutable = bnxt_re_get_port_immutable;
ibdev->query_pkey = bnxt_re_query_pkey;
ibdev->query_gid = bnxt_re_query_gid;
@ -835,6 +833,42 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
mutex_unlock(&rdev->qp_lock);
}
static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
struct bnxt_qplib_gid gid;
u16 gid_idx, index;
int rc = 0;
if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
return 0;
if (!sgid_tbl) {
dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated");
return -EINVAL;
}
for (index = 0; index < sgid_tbl->active; index++) {
gid_idx = sgid_tbl->hw_id[index];
if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
sizeof(bnxt_qplib_gid_zero)))
continue;
/* need to modify the VLAN enable setting of non VLAN GID only
* as setting is done for VLAN GID while adding GID
*/
if (sgid_tbl->vlan[index])
continue;
memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
rdev->qplib_res.netdev->dev_addr);
}
return rc;
}
static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
{
u32 prio_map = 0, tmp_map = 0;
@ -854,8 +888,6 @@ static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
tmp_map = dcb_ieee_getapp_mask(netdev, &app);
prio_map |= tmp_map;
if (!prio_map)
prio_map = -EFAULT;
return prio_map;
}
@ -881,10 +913,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
int rc;
/* Get priority for roce */
rc = bnxt_re_get_priority_mask(rdev);
if (rc < 0)
return rc;
prio_map = (u8)rc;
prio_map = bnxt_re_get_priority_mask(rdev);
if (prio_map == rdev->cur_prio_map)
return 0;
@ -906,6 +935,16 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
return rc;
}
/* Actual priorities are not programmed as they are already
* done by L2 driver; just enable or disable priority vlan tagging
*/
if ((prio_map == 0 && rdev->qplib_res.prio) ||
(prio_map != 0 && !rdev->qplib_res.prio)) {
rdev->qplib_res.prio = prio_map ? true : false;
bnxt_re_update_gid(rdev);
}
return 0;
}
@ -998,7 +1037,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
/* Establish RCFW Communication Channel to initialize the context
* memory for the function and all child VFs
*/
rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw);
rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
BNXT_RE_MAX_QPC_COUNT);
if (rc)
goto fail;

View File

@ -51,6 +51,168 @@
#include "qplib_fp.h"
static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp)
{
qp->sq.condition = false;
qp->sq.send_phantom = false;
qp->sq.single = false;
}
/* Flush list */
static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_cq *scq, *rcq;
scq = qp->scq;
rcq = qp->rcq;
if (!qp->sq.flushed) {
dev_dbg(&scq->hwq.pdev->dev,
"QPLIB: FP: Adding to SQ Flush list = %p",
qp);
bnxt_qplib_cancel_phantom_processing(qp);
list_add_tail(&qp->sq_flush, &scq->sqf_head);
qp->sq.flushed = true;
}
if (!qp->srq) {
if (!qp->rq.flushed) {
dev_dbg(&rcq->hwq.pdev->dev,
"QPLIB: FP: Adding to RQ Flush list = %p",
qp);
list_add_tail(&qp->rq_flush, &rcq->rqf_head);
qp->rq.flushed = true;
}
}
}
void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
unsigned long *flags)
__acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock)
{
spin_lock_irqsave(&qp->scq->hwq.lock, *flags);
if (qp->scq == qp->rcq)
__acquire(&qp->rcq->hwq.lock);
else
spin_lock(&qp->rcq->hwq.lock);
}
void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
unsigned long *flags)
__releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock)
{
if (qp->scq == qp->rcq)
__release(&qp->rcq->hwq.lock);
else
spin_unlock(&qp->rcq->hwq.lock);
spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags);
}
static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cq *cq)
{
struct bnxt_qplib_cq *buddy_cq = NULL;
if (qp->scq == qp->rcq)
buddy_cq = NULL;
else if (qp->scq == cq)
buddy_cq = qp->rcq;
else
buddy_cq = qp->scq;
return buddy_cq;
}
static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cq *cq)
__acquires(&buddy_cq->hwq.lock)
{
struct bnxt_qplib_cq *buddy_cq = NULL;
buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
if (!buddy_cq)
__acquire(&cq->hwq.lock);
else
spin_lock(&buddy_cq->hwq.lock);
}
static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cq *cq)
__releases(&buddy_cq->hwq.lock)
{
struct bnxt_qplib_cq *buddy_cq = NULL;
buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
if (!buddy_cq)
__release(&cq->hwq.lock);
else
spin_unlock(&buddy_cq->hwq.lock);
}
void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
{
unsigned long flags;
bnxt_qplib_acquire_cq_locks(qp, &flags);
__bnxt_qplib_add_flush_qp(qp);
bnxt_qplib_release_cq_locks(qp, &flags);
}
static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_cq *scq, *rcq;
scq = qp->scq;
rcq = qp->rcq;
if (qp->sq.flushed) {
qp->sq.flushed = false;
list_del(&qp->sq_flush);
}
if (!qp->srq) {
if (qp->rq.flushed) {
qp->rq.flushed = false;
list_del(&qp->rq_flush);
}
}
}
void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
{
unsigned long flags;
bnxt_qplib_acquire_cq_locks(qp, &flags);
__clean_cq(qp->scq, (u64)(unsigned long)qp);
qp->sq.hwq.prod = 0;
qp->sq.hwq.cons = 0;
__clean_cq(qp->rcq, (u64)(unsigned long)qp);
qp->rq.hwq.prod = 0;
qp->rq.hwq.cons = 0;
__bnxt_qplib_del_flush_qp(qp);
bnxt_qplib_release_cq_locks(qp, &flags);
}
static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
{
struct bnxt_qplib_nq_work *nq_work =
container_of(work, struct bnxt_qplib_nq_work, work);
struct bnxt_qplib_cq *cq = nq_work->cq;
struct bnxt_qplib_nq *nq = nq_work->nq;
if (cq && nq) {
spin_lock_bh(&cq->compl_lock);
if (atomic_read(&cq->arm_state) && nq->cqn_handler) {
dev_dbg(&nq->pdev->dev,
"%s:Trigger cq = %p event nq = %p\n",
__func__, cq, nq);
nq->cqn_handler(nq, cq);
}
spin_unlock_bh(&cq->compl_lock);
}
kfree(nq_work);
}
static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
struct bnxt_qplib_qp *qp)
@ -119,6 +281,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
struct bnxt_qplib_hwq *hwq = &nq->hwq;
struct nq_base *nqe, **nq_ptr;
struct bnxt_qplib_cq *cq;
int num_cqne_processed = 0;
u32 sw_cons, raw_cons;
u16 type;
@ -143,15 +306,17 @@ static void bnxt_qplib_service_nq(unsigned long data)
q_handle = le32_to_cpu(nqcne->cq_handle_low);
q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
<< 32;
bnxt_qplib_arm_cq_enable((struct bnxt_qplib_cq *)
((unsigned long)q_handle));
if (!nq->cqn_handler(nq, (struct bnxt_qplib_cq *)
((unsigned long)q_handle)))
cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle;
bnxt_qplib_arm_cq_enable(cq);
spin_lock_bh(&cq->compl_lock);
atomic_set(&cq->arm_state, 0);
if (!nq->cqn_handler(nq, (cq)))
num_cqne_processed++;
else
dev_warn(&nq->pdev->dev,
"QPLIB: cqn - type 0x%x not handled",
type);
spin_unlock_bh(&cq->compl_lock);
break;
}
case NQ_BASE_TYPE_DBQ_EVENT:
@ -190,6 +355,10 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
{
if (nq->cqn_wq) {
destroy_workqueue(nq->cqn_wq);
nq->cqn_wq = NULL;
}
/* Make sure the HW is stopped! */
synchronize_irq(nq->vector);
tasklet_disable(&nq->worker);
@ -216,7 +385,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
void *, u8 event))
{
resource_size_t nq_base;
int rc;
int rc = -1;
nq->pdev = pdev;
nq->vector = msix_vector;
@ -227,6 +396,11 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
/* Have a task to schedule CQ notifiers in post send case */
nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq");
if (!nq->cqn_wq)
goto fail;
nq->requested = false;
rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq);
if (rc) {
@ -401,8 +575,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
sq->flush_in_progress = false;
rq->flush_in_progress = false;
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
return 0;
@ -615,8 +789,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
sq->flush_in_progress = false;
rq->flush_in_progress = false;
INIT_LIST_HEAD(&qp->sq_flush);
INIT_LIST_HEAD(&qp->rq_flush);
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
return 0;
@ -963,13 +1139,19 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
u16 cmd_flags = 0;
int rc;
rcfw->qp_tbl[qp->id].qp_id = BNXT_QPLIB_QP_ID_INVALID;
rcfw->qp_tbl[qp->id].qp_handle = NULL;
RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
req.qp_cid = cpu_to_le32(qp->id);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
(void *)&resp, NULL, 0);
if (rc)
if (rc) {
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = qp;
return rc;
}
/* Must walk the associated CQs to nullified the QP ptr */
spin_lock_irqsave(&qp->scq->hwq.lock, flags);
@ -1074,14 +1256,21 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swq *swq;
struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
struct sq_sge *hw_sge;
struct bnxt_qplib_nq_work *nq_work = NULL;
bool sch_handler = false;
u32 sw_prod;
u8 wqe_size16;
int i, rc = 0, data_len = 0, pkt_num = 0;
__le32 temp32;
if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
rc = -EINVAL;
goto done;
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
sch_handler = true;
dev_dbg(&sq->hwq.pdev->dev,
"%s Error QP. Scheduling for poll_cq\n",
__func__);
goto queue_err;
}
}
if (bnxt_qplib_queue_full(sq)) {
@ -1301,12 +1490,35 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
SQ_PSN_SEARCH_NEXT_PSN_MASK));
}
queue_err:
if (sch_handler) {
/* Store the ULP info in the software structures */
sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
swq = &sq->swq[sw_prod];
swq->wr_id = wqe->wr_id;
swq->type = wqe->type;
swq->flags = wqe->flags;
if (qp->sig_type)
swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
swq->start_psn = sq->psn & BTH_PSN_MASK;
}
sq->hwq.prod++;
qp->wqe_cnt++;
done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
if (nq_work) {
nq_work->cq = qp->scq;
nq_work->nq = qp->scq->nq;
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&sq->hwq.pdev->dev,
"QPLIB: FP: Failed to allocate SQ nq_work!");
rc = -ENOMEM;
}
}
return rc;
}
@ -1334,15 +1546,17 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_q *rq = &qp->rq;
struct rq_wqe *rqe, **rqe_ptr;
struct sq_sge *hw_sge;
struct bnxt_qplib_nq_work *nq_work = NULL;
bool sch_handler = false;
u32 sw_prod;
int i, rc = 0;
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: QP (0x%x) is in the 0x%x state",
qp->id, qp->state);
rc = -EINVAL;
goto done;
sch_handler = true;
dev_dbg(&rq->hwq.pdev->dev,
"%s Error QP. Scheduling for poll_cq\n",
__func__);
goto queue_err;
}
if (bnxt_qplib_queue_full(rq)) {
dev_err(&rq->hwq.pdev->dev,
@ -1378,7 +1592,27 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
/* Supply the rqe->wr_id index to the wr_id_tbl for now */
rqe->wr_id[0] = cpu_to_le32(sw_prod);
queue_err:
if (sch_handler) {
/* Store the ULP info in the software structures */
sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
rq->swq[sw_prod].wr_id = wqe->wr_id;
}
rq->hwq.prod++;
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
if (nq_work) {
nq_work->cq = qp->rcq;
nq_work->nq = qp->rcq->nq;
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: Failed to allocate RQ nq_work!");
rc = -ENOMEM;
}
}
done:
return rc;
}
@ -1471,6 +1705,9 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
init_waitqueue_head(&cq->waitq);
INIT_LIST_HEAD(&cq->sqf_head);
INIT_LIST_HEAD(&cq->rqf_head);
spin_lock_init(&cq->compl_lock);
bnxt_qplib_arm_cq_enable(cq);
return 0;
@ -1513,9 +1750,13 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
while (*budget) {
sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
if (sw_cons == sw_prod) {
sq->flush_in_progress = false;
break;
}
/* Skip the FENCE WQE completions */
if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) {
bnxt_qplib_cancel_phantom_processing(qp);
goto skip_compl;
}
memset(cqe, 0, sizeof(*cqe));
cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
@ -1525,6 +1766,7 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
cqe->type = sq->swq[sw_cons].type;
cqe++;
(*budget)--;
skip_compl:
sq->hwq.cons++;
}
*pcqe = cqe;
@ -1536,11 +1778,24 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
}
static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
int opcode, struct bnxt_qplib_cqe **pcqe, int *budget)
struct bnxt_qplib_cqe **pcqe, int *budget)
{
struct bnxt_qplib_cqe *cqe;
u32 sw_prod, sw_cons;
int rc = 0;
int opcode = 0;
switch (qp->type) {
case CMDQ_CREATE_QP1_TYPE_GSI:
opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1;
break;
case CMDQ_CREATE_QP_TYPE_RC:
opcode = CQ_BASE_CQE_TYPE_RES_RC;
break;
case CMDQ_CREATE_QP_TYPE_UD:
opcode = CQ_BASE_CQE_TYPE_RES_UD;
break;
}
/* Flush the rest of the RQ */
sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
@ -1567,6 +1822,21 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
return rc;
}
void bnxt_qplib_mark_qp_error(void *qp_handle)
{
struct bnxt_qplib_qp *qp = qp_handle;
if (!qp)
return;
/* Must block new posting of SQ and RQ */
qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
bnxt_qplib_cancel_phantom_processing(qp);
/* Add qp to flush list of the CQ */
__bnxt_qplib_add_flush_qp(qp);
}
/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
* CQE is track from sw_cq_cons to max_element but valid only if VALID=1
*/
@ -1694,10 +1964,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
cqe_sq_cons, sq->hwq.max_elements);
return -EINVAL;
}
/* If we were in the middle of flushing the SQ, continue */
if (sq->flush_in_progress)
goto flush;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
/* Require to walk the sq's swq to fabricate CQEs for all previously
* signaled SWQEs due to CQE aggregation from the current sq cons
* to the cqe_sq_cons
@ -1733,11 +2005,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
sw_sq_cons, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
sq->flush_in_progress = true;
/* Must block new posting of SQ and RQ */
qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
sq->condition = false;
sq->single = false;
bnxt_qplib_lock_buddy_cq(qp, cq);
bnxt_qplib_mark_qp_error(qp);
bnxt_qplib_unlock_buddy_cq(qp, cq);
} else {
if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
/* Before we complete, do WA 9060 */
@ -1768,15 +2038,6 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
* the WC for this CQE
*/
sq->single = false;
if (!sq->flush_in_progress)
goto done;
flush:
/* Require to walk the sq's swq to fabricate CQEs for all
* previously posted SWQEs due to the error CQE received
*/
rc = __flush_sq(sq, qp, pcqe, budget);
if (!rc)
sq->flush_in_progress = false;
done:
return rc;
}
@ -1798,6 +2059,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
cqe->length = le32_to_cpu(hwcqe->length);
@ -1817,8 +2084,6 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
if (rq->flush_in_progress)
goto flush_rq;
cqe->wr_id = rq->swq[wr_id_idx].wr_id;
cqe++;
@ -1827,12 +2092,13 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
rq->flush_in_progress = true;
flush_rq:
rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RC, pcqe, budget);
if (!rc)
rq->flush_in_progress = false;
/* Add qp to flush list of the CQ */
bnxt_qplib_lock_buddy_cq(qp, cq);
__bnxt_qplib_add_flush_qp(qp);
bnxt_qplib_unlock_buddy_cq(qp, cq);
}
done:
return rc;
}
@ -1853,6 +2119,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
cqe->length = le32_to_cpu(hwcqe->length);
@ -1876,8 +2147,6 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
if (rq->flush_in_progress)
goto flush_rq;
cqe->wr_id = rq->swq[wr_id_idx].wr_id;
cqe++;
@ -1886,12 +2155,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
rq->flush_in_progress = true;
flush_rq:
rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_UD, pcqe, budget);
if (!rc)
rq->flush_in_progress = false;
/* Add qp to flush list of the CQ */
bnxt_qplib_lock_buddy_cq(qp, cq);
__bnxt_qplib_add_flush_qp(qp);
bnxt_qplib_unlock_buddy_cq(qp, cq);
}
done:
return rc;
}
@ -1932,6 +2201,11 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
"QPLIB: process_cq Raw/QP1 qp is NULL");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
cqe->flags = le16_to_cpu(hwcqe->flags);
@ -1960,8 +2234,6 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
if (rq->flush_in_progress)
goto flush_rq;
cqe->wr_id = rq->swq[wr_id_idx].wr_id;
cqe++;
@ -1970,13 +2242,13 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
rq->flush_in_progress = true;
flush_rq:
rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RAWETH_QP1, pcqe,
budget);
if (!rc)
rq->flush_in_progress = false;
/* Add qp to flush list of the CQ */
bnxt_qplib_lock_buddy_cq(qp, cq);
__bnxt_qplib_add_flush_qp(qp);
bnxt_qplib_unlock_buddy_cq(qp, cq);
}
done:
return rc;
}
@ -1990,7 +2262,6 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe;
u32 sw_cons = 0, cqe_cons;
int rc = 0;
u8 opcode = 0;
/* Check the Status */
if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
@ -2005,6 +2276,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
"QPLIB: FP: CQ Process terminal qp is NULL");
return -EINVAL;
}
/* Must block new posting of SQ and RQ */
qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
@ -2023,9 +2295,12 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe_cons, sq->hwq.max_elements);
goto do_rq;
}
/* If we were in the middle of flushing, continue */
if (sq->flush_in_progress)
goto flush_sq;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
goto sq_done;
}
/* Terminal CQE can also include aggregated successful CQEs prior.
* So we must complete all CQEs from the current sq's cons to the
@ -2055,11 +2330,6 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
rc = -EAGAIN;
goto sq_done;
}
sq->flush_in_progress = true;
flush_sq:
rc = __flush_sq(sq, qp, pcqe, budget);
if (!rc)
sq->flush_in_progress = false;
sq_done:
if (rc)
return rc;
@ -2075,26 +2345,23 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe_cons, rq->hwq.max_elements);
goto done;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
rc = 0;
goto done;
}
/* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR
* from the current rq->cons to the rq->prod regardless what the
* rq->cons the terminal CQE indicates
*/
rq->flush_in_progress = true;
switch (qp->type) {
case CMDQ_CREATE_QP1_TYPE_GSI:
opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1;
break;
case CMDQ_CREATE_QP_TYPE_RC:
opcode = CQ_BASE_CQE_TYPE_RES_RC;
break;
case CMDQ_CREATE_QP_TYPE_UD:
opcode = CQ_BASE_CQE_TYPE_RES_UD;
break;
}
rc = __flush_rq(rq, qp, opcode, pcqe, budget);
if (!rc)
rq->flush_in_progress = false;
/* Add qp to flush list of the CQ */
bnxt_qplib_lock_buddy_cq(qp, cq);
__bnxt_qplib_add_flush_qp(qp);
bnxt_qplib_unlock_buddy_cq(qp, cq);
done:
return rc;
}
@ -2115,6 +2382,33 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
return 0;
}
int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe,
int num_cqes)
{
struct bnxt_qplib_qp *qp = NULL;
u32 budget = num_cqes;
unsigned long flags;
spin_lock_irqsave(&cq->hwq.lock, flags);
list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
dev_dbg(&cq->hwq.pdev->dev,
"QPLIB: FP: Flushing SQ QP= %p",
qp);
__flush_sq(&qp->sq, qp, &cqe, &budget);
}
list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
dev_dbg(&cq->hwq.pdev->dev,
"QPLIB: FP: Flushing RQ QP= %p",
qp);
__flush_rq(&qp->rq, qp, &cqe, &budget);
}
spin_unlock_irqrestore(&cq->hwq.lock, flags);
return num_cqes - budget;
}
int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
int num_cqes, struct bnxt_qplib_qp **lib_qp)
{
@ -2205,6 +2499,7 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
spin_lock_irqsave(&cq->hwq.lock, flags);
if (arm_type)
bnxt_qplib_arm_cq(cq, arm_type);
/* Using cq->arm_state variable to track whether to issue cq handler */
atomic_set(&cq->arm_state, 1);
spin_unlock_irqrestore(&cq->hwq.lock, flags);
}

View File

@ -220,19 +220,20 @@ struct bnxt_qplib_q {
u16 q_full_delta;
u16 max_sge;
u32 psn;
bool flush_in_progress;
bool condition;
bool single;
bool send_phantom;
u32 phantom_wqe_cnt;
u32 phantom_cqe_cnt;
u32 next_cq_cons;
bool flushed;
};
struct bnxt_qplib_qp {
struct bnxt_qplib_pd *pd;
struct bnxt_qplib_dpi *dpi;
u64 qp_handle;
#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
u32 id;
u8 type;
u8 sig_type;
@ -296,6 +297,8 @@ struct bnxt_qplib_qp {
dma_addr_t sq_hdr_buf_map;
void *rq_hdr_buf;
dma_addr_t rq_hdr_buf_map;
struct list_head sq_flush;
struct list_head rq_flush;
};
#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
@ -351,6 +354,7 @@ struct bnxt_qplib_cq {
u16 period;
struct bnxt_qplib_hwq hwq;
u32 cnq_hw_ring_id;
struct bnxt_qplib_nq *nq;
bool resize_in_progress;
struct scatterlist *sghead;
u32 nmap;
@ -360,6 +364,9 @@ struct bnxt_qplib_cq {
unsigned long flags;
#define CQ_FLAGS_RESIZE_IN_PROG 1
wait_queue_head_t waitq;
struct list_head sqf_head, rqf_head;
atomic_t arm_state;
spinlock_t compl_lock; /* synch CQ handlers */
};
#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
@ -417,6 +424,13 @@ struct bnxt_qplib_nq {
(struct bnxt_qplib_nq *nq,
void *srq,
u8 event);
struct workqueue_struct *cqn_wq;
};
struct bnxt_qplib_nq_work {
struct work_struct work;
struct bnxt_qplib_nq *nq;
struct bnxt_qplib_cq *cq;
};
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq);
@ -453,4 +467,13 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq);
void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp);
void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
unsigned long *flags);
void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
unsigned long *flags);
int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe,
int num_cqes);
#endif /* __BNXT_QPLIB_FP_H__ */

View File

@ -44,6 +44,9 @@
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_rcfw.h"
#include "qplib_sp.h"
#include "qplib_fp.h"
static void bnxt_qplib_service_creq(unsigned long data);
/* Hardware communication channel */
@ -279,16 +282,29 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
struct creq_qp_event *qp_event)
{
struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
struct creq_qp_error_notification *err_event;
struct bnxt_qplib_crsq *crsqe;
unsigned long flags;
struct bnxt_qplib_qp *qp;
u16 cbit, blocked = 0;
u16 cookie;
__le16 mcookie;
u32 qp_id;
switch (qp_event->event) {
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
err_event = (struct creq_qp_error_notification *)qp_event;
qp_id = le32_to_cpu(err_event->xid);
qp = rcfw->qp_tbl[qp_id].qp_handle;
dev_dbg(&rcfw->pdev->dev,
"QPLIB: Received QP error notification");
dev_dbg(&rcfw->pdev->dev,
"QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason);
bnxt_qplib_acquire_cq_locks(qp, &flags);
bnxt_qplib_mark_qp_error(qp);
bnxt_qplib_release_cq_locks(qp, &flags);
break;
default:
/* Command Response */
@ -507,6 +523,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
@ -514,7 +531,8 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
}
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw)
struct bnxt_qplib_rcfw *rcfw,
int qp_tbl_sz)
{
rcfw->pdev = pdev;
rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
@ -541,6 +559,12 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
if (!rcfw->crsqe_tbl)
goto fail;
rcfw->qp_tbl_size = qp_tbl_sz;
rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node),
GFP_KERNEL);
if (!rcfw->qp_tbl)
goto fail;
return 0;
fail:

View File

@ -148,6 +148,11 @@ struct bnxt_qplib_rcfw_sbuf {
u32 size;
};
struct bnxt_qplib_qp_node {
u32 qp_id; /* QP id */
void *qp_handle; /* ptr to qplib_qp */
};
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
@ -181,11 +186,13 @@ struct bnxt_qplib_rcfw {
/* Actual Cmd and Resp Queues */
struct bnxt_qplib_hwq cmdq;
struct bnxt_qplib_crsq *crsqe_tbl;
int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw);
struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw,
@ -207,4 +214,5 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn);
void bnxt_qplib_mark_qp_error(void *qp_handle);
#endif /* __BNXT_QPLIB_RCFW_H__ */

View File

@ -468,9 +468,11 @@ static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
kfree(sgid_tbl->tbl);
kfree(sgid_tbl->hw_id);
kfree(sgid_tbl->ctx);
kfree(sgid_tbl->vlan);
sgid_tbl->tbl = NULL;
sgid_tbl->hw_id = NULL;
sgid_tbl->ctx = NULL;
sgid_tbl->vlan = NULL;
sgid_tbl->max = 0;
sgid_tbl->active = 0;
}
@ -491,8 +493,15 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
if (!sgid_tbl->ctx)
goto out_free2;
sgid_tbl->vlan = kcalloc(max, sizeof(u8), GFP_KERNEL);
if (!sgid_tbl->vlan)
goto out_free3;
sgid_tbl->max = max;
return 0;
out_free3:
kfree(sgid_tbl->ctx);
sgid_tbl->ctx = NULL;
out_free2:
kfree(sgid_tbl->hw_id);
sgid_tbl->hw_id = NULL;
@ -514,6 +523,7 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
}
memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max);
sgid_tbl->active = 0;
}

View File

@ -116,6 +116,7 @@ struct bnxt_qplib_sgid_tbl {
u16 max;
u16 active;
void *ctx;
u8 *vlan;
};
struct bnxt_qplib_pkey_tbl {
@ -188,6 +189,7 @@ struct bnxt_qplib_res {
struct bnxt_qplib_sgid_tbl sgid_tbl;
struct bnxt_qplib_pkey_tbl pkey_tbl;
struct bnxt_qplib_dpi_tbl dpi_tbl;
bool prio;
};
#define to_bnxt_qplib(ptr, type, member) \

View File

@ -213,6 +213,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
sizeof(bnxt_qplib_gid_zero));
sgid_tbl->vlan[index] = 0;
sgid_tbl->active--;
dev_dbg(&res->pdev->dev,
"QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
@ -265,28 +266,32 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct cmdq_add_gid req;
struct creq_add_gid_resp resp;
u16 cmd_flags = 0;
u32 temp32[4];
u16 temp16[3];
int rc;
RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
memcpy(temp32, gid->data, sizeof(struct bnxt_qplib_gid));
req.gid[0] = cpu_to_be32(temp32[3]);
req.gid[1] = cpu_to_be32(temp32[2]);
req.gid[2] = cpu_to_be32(temp32[1]);
req.gid[3] = cpu_to_be32(temp32[0]);
if (vlan_id != 0xFFFF)
req.vlan = cpu_to_le16((vlan_id &
CMDQ_ADD_GID_VLAN_VLAN_ID_MASK) |
CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
CMDQ_ADD_GID_VLAN_VLAN_EN);
req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]);
req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]);
req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]);
req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]);
/*
* driver should ensure that all RoCE traffic is always VLAN
* tagged if RoCE traffic is running on non-zero VLAN ID or
* RoCE traffic is running on non-zero Priority.
*/
if ((vlan_id != 0xFFFF) || res->prio) {
if (vlan_id != 0xFFFF)
req.vlan = cpu_to_le16
(vlan_id & CMDQ_ADD_GID_VLAN_VLAN_ID_MASK);
req.vlan |= cpu_to_le16
(CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
CMDQ_ADD_GID_VLAN_VLAN_EN);
}
/* MAC in network format */
memcpy(temp16, smac, 6);
req.src_mac[0] = cpu_to_be16(temp16[0]);
req.src_mac[1] = cpu_to_be16(temp16[1]);
req.src_mac[2] = cpu_to_be16(temp16[2]);
req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]);
req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
(void *)&resp, NULL, 0);
@ -297,6 +302,9 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
/* Add GID to the sgid_tbl */
memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
sgid_tbl->active++;
if (vlan_id != 0xFFFF)
sgid_tbl->vlan[free_idx] = 1;
dev_dbg(&res->pdev->dev,
"QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
@ -306,6 +314,43 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
return 0;
}
int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u16 gid_idx,
u8 *smac)
{
struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
struct bnxt_qplib_res,
sgid_tbl);
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_modify_gid_resp resp;
struct cmdq_modify_gid req;
int rc;
u16 cmd_flags = 0;
RCFW_CMD_PREP(req, MODIFY_GID, cmd_flags);
req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]);
req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]);
req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]);
req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]);
if (res->prio) {
req.vlan |= cpu_to_le16
(CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
CMDQ_ADD_GID_VLAN_VLAN_EN);
}
/* MAC in network format */
req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]);
req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
req.gid_index = cpu_to_le16(gid_idx);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
(void *)&resp, NULL, 0);
return rc;
}
/* pkeys */
int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,

View File

@ -135,6 +135,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
bool update, u32 *index);
int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac);
int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
u16 *pkey);

View File

@ -1473,8 +1473,8 @@ struct cmdq_modify_gid {
u8 resp_size;
u8 reserved8;
__le64 resp_addr;
__le32 gid[4];
__le16 src_mac[3];
__be32 gid[4];
__be16 src_mac[3];
__le16 vlan;
#define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL
#define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0

View File

@ -45,7 +45,6 @@
MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
static void open_rnic_dev(struct t3cdev *);
static void close_rnic_dev(struct t3cdev *);

View File

@ -1336,8 +1336,7 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
size_t str_len)
static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
{
struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
struct ethtool_drvinfo info;
@ -1345,7 +1344,7 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
snprintf(str, str_len, "%s", info.fw_version);
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
}
int iwch_register_device(struct iwch_dev *dev)

View File

@ -44,7 +44,6 @@
MODULE_AUTHOR("Steve Wise");
MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
static int allow_db_fc_on_t5;
module_param(allow_db_fc_on_t5, int, 0644);

View File

@ -517,14 +517,13 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *dev, char *str,
size_t str_len)
static void get_dev_fw_str(struct ib_device *dev, char *str)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev);
pr_debug("%s dev 0x%p\n", __func__, dev);
snprintf(str, str_len, "%u.%u.%u.%u",
snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u.%u",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),

View File

@ -8,7 +8,7 @@
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
eprom.o file_ops.o firmware.o \
eprom.o exp_rcv.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
sde->cpu = cpu;
cpumask_clear(&msix->mask);
cpumask_set_cpu(cpu, &msix->mask);
dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
msix->msix.vector, irq_type_names[msix->type],
dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
msix->irq, irq_type_names[msix->type],
sde->this_idx, cpu);
irq_set_affinity_hint(msix->msix.vector, &msix->mask);
irq_set_affinity_hint(msix->irq, &msix->mask);
/*
* Set the new cpu in the hfi1_affinity_node and clean
@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
{
struct irq_affinity_notify *notify = &msix->notify;
notify->irq = msix->msix.vector;
notify->irq = msix->irq;
notify->notify = hfi1_irq_notifier_notify;
notify->release = hfi1_irq_notifier_release;
@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
}
cpumask_set_cpu(cpu, &msix->mask);
dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
msix->msix.vector, irq_type_names[msix->type],
dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
msix->irq, irq_type_names[msix->type],
extra, cpu);
irq_set_affinity_hint(msix->msix.vector, &msix->mask);
irq_set_affinity_hint(msix->irq, &msix->mask);
if (msix->type == IRQ_SDMA) {
sde->cpu = cpu;
@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
}
}
irq_set_affinity_hint(msix->msix.vector, NULL);
irq_set_affinity_hint(msix->irq, NULL);
cpumask_clear(&msix->mask);
mutex_unlock(&node_affinity.lock);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -75,24 +75,26 @@ struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void);
/* Initialize driver affinity data */
int hfi1_dev_affinity_init(struct hfi1_devdata *);
int hfi1_dev_affinity_init(struct hfi1_devdata *dd);
/*
* Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it.
*/
int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
int hfi1_get_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix);
/*
* Remove the IRQ's CPU affinity. This function also updates
* any internal CPU tracking data
*/
void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix);
/*
* Determine a CPU affinity for a user process, if the process does not
* have an affinity set yet.
*/
int hfi1_get_proc_affinity(int);
int hfi1_get_proc_affinity(int node);
/* Release a CPU used by a user process. */
void hfi1_put_proc_affinity(int);
void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;

View File

@ -237,7 +237,7 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
u16 i;
for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
@ -256,7 +256,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
u16 i;
aspm_enable(dd);
@ -284,7 +284,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
static inline void aspm_init(struct hfi1_devdata *dd)
{
unsigned i;
u16 i;
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);

File diff suppressed because it is too large Load Diff

View File

@ -384,6 +384,7 @@
#define VERIFY_CAP_LOCAL_FABRIC 0x08
#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09
#define LOCAL_DEVICE_ID 0x0a
#define RESERVED_REGISTERS 0x0b
#define LOCAL_LNI_INFO 0x0c
#define REMOTE_LNI_INFO 0x0d
#define MISC_STATUS 0x0e
@ -506,6 +507,9 @@
#define DOWN_REMOTE_REASON_SHIFT 16
#define DOWN_REMOTE_REASON_MASK 0xff
#define HOST_INTERFACE_VERSION_SHIFT 16
#define HOST_INTERFACE_VERSION_MASK 0xff
/* verify capability PHY power management bits */
#define PWRM_BER_CONTROL 0x1
#define PWRM_BANDWIDTH_CONTROL 0x2
@ -605,11 +609,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data);
int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data);
void __iomem *get_csr_addr(
struct hfi1_devdata *dd,
const struct hfi1_devdata *dd,
u32 offset);
static inline void __iomem *get_kctxt_csr_addr(
struct hfi1_devdata *dd,
const struct hfi1_devdata *dd,
int ctxt,
u32 offset0)
{
@ -704,6 +708,7 @@ int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch);
int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@ -744,6 +749,7 @@ int is_bx(struct hfi1_devdata *dd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
u32 get_logical_state(struct hfi1_pportdata *ppd);
void cache_physical_state(struct hfi1_pportdata *ppd);
const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_physical_state(struct hfi1_pportdata *ppd);
@ -1347,21 +1353,21 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
struct hfi1_ctxtdata *rcd);
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
u16 jkey);
int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt,
u16 pkey);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);

View File

@ -325,6 +325,7 @@ struct diag_pkt {
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
/* misc. */
#define SC15_PACKET 0xF
#define SIZE_OF_CRC 1
#define LIM_MGMT_P_KEY 0x7FFF

View File

@ -96,7 +96,6 @@ MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
MODULE_VERSION(HFI1_DRIVER_VERSION);
/*
* MAX_PKT_RCV is the max # if packets processed per receive interrupt.
@ -196,7 +195,7 @@ int hfi1_count_active_units(void)
spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) {
if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase)
if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1)
continue;
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@ -224,6 +223,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
(offset * RCV_BUF_BLOCK_SIZE));
}
static inline void *hfi1_get_header(struct hfi1_devdata *dd,
__le32 *rhf_addr)
{
u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
return (void *)(rhf_addr - dd->rhf_offset + offset);
}
static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
__le32 *rhf_addr)
{
return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
}
/*
* Validate and encode the a given RcvArray Buffer size.
* The function will check whether the given size falls within
@ -249,7 +262,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = ib_get_lnh(rhdr);
u8 lnh = ib_get_lnh(rhdr);
bool has_grh = false;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@ -257,37 +271,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
return;
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &rhdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
has_grh = true;
packet->ohdr = &rhdr->u.l.oth;
packet->grh = &rhdr->u.l.grh;
} else {
goto drop;
}
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
u16 lid = ib_get_dlid(rhdr);
u32 dlid = ib_get_dlid(rhdr);
u32 qp_num;
u32 rcv_flags = 0;
u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE);
/* Sanity check packet */
if (tlen < 24)
goto drop;
/* Check for GRH */
if (lnh == HFI1_LRH_BTH) {
ohdr = &rhdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
if (has_grh) {
u32 vtf;
struct ib_grh *grh = packet->grh;
ohdr = &rhdr->u.l.oth;
if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
if (grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
vtf = be32_to_cpu(grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
rcv_flags |= HFI1_HAS_GRH;
} else {
goto drop;
}
/* Get the destination QP number. */
qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
qp_num = ib_bth_get_qpn(packet->ohdr);
if (dlid < mlid_base) {
struct rvt_qp *qp;
unsigned long flags;
@ -312,11 +331,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_rc_hdrerr(
rcd,
rhdr,
rcv_flags,
qp);
hfi1_rc_hdrerr(rcd, packet, qp);
break;
default:
/* For now don't handle any other QP types */
@ -332,9 +347,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (rte) {
case RHF_RTE_ERROR_OP_CODE_ERR:
{
u32 opcode;
void *ebuf = NULL;
__be32 *bth = NULL;
u8 opcode;
if (rhf_use_egr_bfr(packet->rhf))
ebuf = packet->ebuf;
@ -342,16 +356,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (!ebuf)
goto drop; /* this should never happen */
if (lnh == HFI1_LRH_BTH)
bth = (__be32 *)ebuf;
else if (lnh == HFI1_LRH_GRH)
bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
else
goto drop;
opcode = be32_to_cpu(bth[0]) >> 24;
opcode &= 0xff;
opcode = ib_bth_get_opcode(packet->ohdr);
if (opcode == IB_OPCODE_CNP) {
/*
* Only in pre-B0 h/w is the CNP_OPCODE handled
@ -365,7 +370,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
lqpn = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
if (!qp) {
@ -415,7 +420,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->rhf = rhf_to_cpu(packet->rhf_addr);
packet->rhqoff = rcd->head;
packet->numpkt = 0;
packet->rcv_flags = 0;
}
void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
@ -424,21 +428,18 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct ib_header *hdr = pkt->hdr;
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
struct ib_grh *grh = pkt->grh;
u32 rqpn = 0, bth1;
u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
if (pkt->rcv_flags & HFI1_HAS_GRH)
grh = &hdr->u.l.grh;
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
rlid = ib_get_slid(hdr);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
rqpn = ib_get_sqpn(ohdr);
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
@ -461,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & IB_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
u16 pkey = ib_bth_get_pkey(ohdr);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
@ -591,9 +592,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
packet->grh = NULL;
} else if (lnh == HFI1_LRH_GRH) {
packet->ohdr = &hdr->u.l.oth;
packet->rcv_flags |= HFI1_HAS_GRH;
packet->grh = &hdr->u.l.grh;
} else {
goto next; /* just in case */
}
@ -698,10 +700,8 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
packet->rhf_addr);
packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
packet->etype = rhf_rcv_type(packet->rhf);
/* total length */
packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
/* retrieve eager buffer details */
@ -759,7 +759,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
packet->etail, 0, 0);
packet->updegr = 0;
}
packet->rcv_flags = 0;
packet->grh = NULL;
}
static inline void finish_packet(struct hfi1_packet *packet)
@ -837,9 +837,9 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
return last;
}
static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
{
int i;
u16 i;
/*
* For dynamically allocated kernel contexts (like vnic) switch
@ -857,9 +857,9 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
&handle_receive_interrupt_nodma_rtail;
}
static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
{
int i;
u16 i;
/*
* For dynamically allocated kernel contexts (like vnic) switch
@ -879,7 +879,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
void set_all_slowpath(struct hfi1_devdata *dd)
{
int i;
u16 i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
@ -896,20 +896,25 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
u8 sc = SC15_PACKET;
if (etype == RHF_RCV_TYPE_IB &&
hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (etype == RHF_RCV_TYPE_IB) {
struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
packet->rhf_addr);
sc = hfi1_9B_get_sc5(hdr, packet->rhf);
}
if (sc != SC15_PACKET) {
int hwstate = driver_lstate(rcd->ppd);
if (hwstate != LSTATE_ACTIVE) {
dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
if (hwstate != IB_PORT_ACTIVE) {
dd_dev_info(dd,
"Unexpected link state %s\n",
opa_lstate_name(hwstate));
return 0;
}
queue_work(rcd->ppd->hfi1_wq, lsaw);
queue_work(rcd->ppd->link_wq, lsaw);
return 1;
}
return 0;
@ -1063,7 +1068,7 @@ void receive_interrupt_work(struct work_struct *work)
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
linkstate_active_work);
struct hfi1_devdata *dd = ppd->dd;
int i;
u16 i;
/* Received non-SC15 packet implies neighbor_normal */
ppd->neighbor_normal = 1;
@ -1264,7 +1269,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
*/
int hfi1_reset_device(int unit)
{
int ret, i;
int ret;
u16 i;
struct hfi1_devdata *dd = hfi1_lookup(unit);
struct hfi1_pportdata *ppd;
unsigned long flags;
@ -1277,7 +1283,7 @@ int hfi1_reset_device(int unit)
dd_dev_info(dd, "Reset on unit %u requested\n", unit);
if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) {
dd_dev_info(dd,
"Invalid unit number %u or not initialized or not present\n",
unit);
@ -1321,6 +1327,58 @@ int hfi1_reset_device(int unit)
return ret;
}
static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
{
packet->hdr = (struct hfi1_ib_message_header *)
hfi1_get_msgheader(packet->rcd->dd,
packet->rhf_addr);
packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
}
static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct ib_header *hdr;
u8 lnh;
hfi1_setup_ib_header(packet);
hdr = packet->hdr;
lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
packet->grh = NULL;
} else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
packet->ohdr = &hdr->u.l.oth;
packet->grh = &hdr->u.l.grh;
if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
vtf = be32_to_cpu(packet->grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
} else {
goto drop;
}
/* Query commonly used fields from packet header */
packet->opcode = ib_bth_get_opcode(packet->ohdr);
packet->slid = ib_get_slid(hdr);
packet->dlid = ib_get_dlid(hdr);
packet->sl = ib_get_sl(hdr);
packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
packet->pad = ib_bth_get_pad(packet->ohdr);
packet->extra_byte = 0;
packet->fecn = ib_bth_get_fecn(packet->ohdr);
packet->becn = ib_bth_get_becn(packet->ohdr);
return 0;
drop:
ibp->rvp.n_pkt_drops++;
return -EINVAL;
}
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@ -1351,6 +1409,9 @@ int process_receive_ib(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
if (hfi1_setup_9B_packet(packet))
return RHF_RCV_CONTINUE;
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@ -1422,6 +1483,7 @@ int process_receive_error(struct hfi1_packet *packet)
rhf_rcv_type_err(packet->rhf) == 3))
return RHF_RCV_CONTINUE;
hfi1_setup_ib_header(packet);
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@ -1435,6 +1497,8 @@ int kdeth_process_expected(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@ -1445,6 +1509,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
int kdeth_process_eager(struct hfi1_packet *packet)
{
hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(hfi1_dbg_fault_packet(packet)))

View File

@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
{
void *buffer;
void *p;
u32 length;
int ret;
buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (p)
length = p - buffer;
else
length = P1_SIZE;
if (!p) {
kfree(buffer);
return -ENOENT;
}
*data = buffer;
*size = length;
*size = p - buffer;
return 0;
}

View File

@ -0,0 +1,114 @@
/*
* Copyright(c) 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "exp_rcv.h"
#include "trace.h"
/**
* exp_tid_group_init - initialize exp_tid_set
* @set - the set
*/
void hfi1_exp_tid_group_init(struct exp_tid_set *set)
{
INIT_LIST_HEAD(&set->list);
set->count = 0;
}
/**
* alloc_ctxt_rcv_groups - initialize expected receive groups
* @rcd - the context to add the groupings to
*/
int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
u32 tidbase;
struct tid_group *grp;
int i;
tidbase = rcd->expected_base;
for (i = 0; i < rcd->expected_count /
dd->rcv_entries.group_size; i++) {
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp)
goto bail;
grp->size = dd->rcv_entries.group_size;
grp->base = tidbase;
tid_group_add_tail(grp, &rcd->tid_group_list);
tidbase += dd->rcv_entries.group_size;
}
return 0;
bail:
hfi1_free_ctxt_rcv_groups(rcd);
return -ENOMEM;
}
/**
* free_ctxt_rcv_groups - free expected receive groups
* @rcd - the context to free
*
* The routine dismantles the expect receive linked
* list and clears any tids associated with the receive
* context.
*
* This should only be called for kernel contexts and the
* a base user context.
*/
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{
struct tid_group *grp, *gptr;
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) {
tid_group_remove(grp, &rcd->tid_group_list);
kfree(grp);
}
hfi1_clear_tids(rcd);
}

View File

@ -0,0 +1,190 @@
#ifndef _HFI1_EXP_RCV_H
#define _HFI1_EXP_RCV_H
/*
* Copyright(c) 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "hfi.h"
#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
#define EXP_TID_TIDLEN_MASK 0x7FFULL
#define EXP_TID_TIDLEN_SHIFT 0
#define EXP_TID_TIDCTRL_MASK 0x3ULL
#define EXP_TID_TIDCTRL_SHIFT 20
#define EXP_TID_TIDIDX_MASK 0x3FFULL
#define EXP_TID_TIDIDX_SHIFT 22
#define EXP_TID_GET(tid, field) \
(((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
#define EXP_TID_SET(field, value) \
(((value) & EXP_TID_TID##field##_MASK) << \
EXP_TID_TID##field##_SHIFT)
#define EXP_TID_CLEAR(tid, field) ({ \
(tid) &= ~(EXP_TID_TID##field##_MASK << \
EXP_TID_TID##field##_SHIFT); \
})
#define EXP_TID_RESET(tid, field, value) do { \
EXP_TID_CLEAR(tid, field); \
(tid) |= EXP_TID_SET(field, (value)); \
} while (0)
/*
* Define fields in the KDETH header so we can update the header
* template.
*/
#define KDETH_OFFSET_SHIFT 0
#define KDETH_OFFSET_MASK 0x7fff
#define KDETH_OM_SHIFT 15
#define KDETH_OM_MASK 0x1
#define KDETH_TID_SHIFT 16
#define KDETH_TID_MASK 0x3ff
#define KDETH_TIDCTRL_SHIFT 26
#define KDETH_TIDCTRL_MASK 0x3
#define KDETH_INTR_SHIFT 28
#define KDETH_INTR_MASK 0x1
#define KDETH_SH_SHIFT 29
#define KDETH_SH_MASK 0x1
#define KDETH_KVER_SHIFT 30
#define KDETH_KVER_MASK 0x3
#define KDETH_JKEY_SHIFT 0x0
#define KDETH_JKEY_MASK 0xff
#define KDETH_HCRC_UPPER_SHIFT 16
#define KDETH_HCRC_UPPER_MASK 0xff
#define KDETH_HCRC_LOWER_SHIFT 24
#define KDETH_HCRC_LOWER_MASK 0xff
#define KDETH_GET(val, field) \
(((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
#define KDETH_SET(dw, field, val) do { \
u32 dwval = le32_to_cpu(dw); \
dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
dwval |= (((val) & KDETH_##field##_MASK) << \
KDETH_##field##_SHIFT); \
dw = cpu_to_le32(dwval); \
} while (0)
#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); })
/* KDETH OM multipliers and switch over point */
#define KDETH_OM_SMALL 4
#define KDETH_OM_SMALL_SHIFT 2
#define KDETH_OM_LARGE 64
#define KDETH_OM_LARGE_SHIFT 6
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
struct tid_group {
struct list_head list;
u32 base;
u8 size;
u8 used;
u8 map;
};
/*
* Write an "empty" RcvArray entry.
* This function exists so the TID registaration code can use it
* to write to unused/unneeded entries and still take advantage
* of the WC performance improvements. The HFI will ignore this
* write to the RcvArray entry.
*/
static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
{
/*
* Doing the WC fill writes only makes sense if the device is
* present and the RcvArray has been mapped as WC memory.
*/
if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) {
writeq(0, dd->rcvarray_wc + (index * 8));
if ((index & 3) == 3)
flush_wc();
}
}
static inline void tid_group_add_tail(struct tid_group *grp,
struct exp_tid_set *set)
{
list_add_tail(&grp->list, &set->list);
set->count++;
}
static inline void tid_group_remove(struct tid_group *grp,
struct exp_tid_set *set)
{
list_del_init(&grp->list);
set->count--;
}
static inline void tid_group_move(struct tid_group *group,
struct exp_tid_set *s1,
struct exp_tid_set *s2)
{
tid_group_remove(group, s1);
tid_group_add_tail(group, s2);
}
static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
{
struct tid_group *grp =
list_first_entry(&set->list, struct tid_group, list);
list_del_init(&grp->list);
set->count--;
return grp;
}
static inline u32 rcventry2tidinfo(u32 rcventry)
{
u32 pair = rcventry & ~0x1;
return EXP_TID_SET(IDX, pair >> 1) |
EXP_TID_SET(CTRL, 1 << (rcventry - pair));
}
int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
void hfi1_exp_tid_group_init(struct exp_tid_set *set);
#endif /* _HFI1_EXP_RCV_H */

View File

@ -81,19 +81,23 @@ static u64 kvirt_to_phys(void *addr);
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo);
static int init_user_ctxt(struct hfi1_filedata *fd);
static int init_user_ctxt(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt);
static void user_init(struct hfi1_ctxtdata *uctxt);
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len);
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len);
static int setup_base_ctxt(struct hfi1_filedata *fd);
static int setup_base_ctxt(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt);
static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
static int find_sub_ctxt(struct hfi1_filedata *fd,
const struct hfi1_user_info *uinfo);
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
struct hfi1_user_info *uinfo);
struct hfi1_user_info *uinfo,
struct hfi1_ctxtdata **cd);
static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt);
static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
@ -181,7 +185,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
return -EINVAL;
if (!atomic_inc_not_zero(&dd->user_refcount))
@ -267,12 +271,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
/*
* Copy the number of tidlist entries we used
* and the length of the buffer we registered.
* These fields are adjacent in the structure so
* we can copy them at the same time.
*/
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
sizeof(tinfo.tidcnt) +
sizeof(tinfo.tidcnt)))
return -EFAULT;
addr = arg + offsetof(struct hfi1_tid_info, length);
if (copy_to_user((void __user *)addr, &tinfo.length,
sizeof(tinfo.length)))
ret = -EFAULT;
}
@ -388,8 +394,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sc_disable(sc);
ret = sc_enable(sc);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
uctxt->ctxt);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt);
} else {
ret = sc_restart(sc);
}
@ -757,7 +762,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
flush_wc();
/* drain user sdma queue */
hfi1_user_sdma_free_queues(fdata);
hfi1_user_sdma_free_queues(fdata, uctxt);
/* release the cpu */
hfi1_put_proc_affinity(fdata->rec_cpu_num);
@ -774,6 +779,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
*ev = 0;
__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
fdata->uctxt = NULL;
hfi1_rcd_put(uctxt); /* fdata reference */
if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
@ -790,34 +797,26 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
hfi1_clear_ctxt_jkey(dd, uctxt);
/*
* Reset context integrity checks to default.
* (writes to CSRs probably belong in chip.c)
* If a send context is allocated, reset context integrity
* checks to default and disable the send context.
*/
write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
sc_disable(uctxt->sc);
if (uctxt->sc) {
set_pio_integrity(uctxt->sc);
sc_disable(uctxt->sc);
}
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL;
hfi1_user_exp_rcv_grp_free(uctxt);
hfi1_free_ctxt_rcv_groups(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0;
uctxt->rcvnowait = 0;
uctxt->pionowait = 0;
uctxt->event_flags = 0;
hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd);
mutex_unlock(&hfi1_mutex);
hfi1_free_ctxtdata(dd, uctxt);
deallocate_ctxt(uctxt);
done:
mmdrop(fdata->mm);
kobject_put(&dd->kobj);
@ -849,6 +848,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
{
int ret;
unsigned int swmajor, swminor;
struct hfi1_ctxtdata *uctxt = NULL;
swmajor = uinfo->userversion >> 16;
if (swmajor != HFI1_USER_SWMAJOR)
@ -874,7 +874,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
* couldn't find a sub context.
*/
if (!ret)
ret = allocate_ctxt(fd, fd->dd, uinfo);
ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt);
mutex_unlock(&hfi1_mutex);
@ -887,31 +887,38 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags));
if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
return -ENOMEM;
}
if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
ret = -ENOMEM;
/* The only thing a sub context needs is the user_xxx stuff */
if (!ret)
ret = init_user_ctxt(fd);
ret = init_user_ctxt(fd, fd->uctxt);
if (ret)
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
} else if (!ret) {
ret = setup_base_ctxt(fd);
if (fd->uctxt->subctxt_cnt) {
ret = setup_base_ctxt(fd, uctxt);
if (uctxt->subctxt_cnt) {
/* If there is an error, set the failed bit. */
if (ret)
set_bit(HFI1_CTXT_BASE_FAILED,
&fd->uctxt->event_flags);
&uctxt->event_flags);
/*
* Base context is done, notify anybody using a
* sub-context that is waiting for this completion
*/
clear_bit(HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags);
wake_up(&fd->uctxt->wait);
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
if (ret)
deallocate_ctxt(uctxt);
}
/* If an error occurred, clear the reference */
if (ret && fd->uctxt) {
hfi1_rcd_put(fd->uctxt);
fd->uctxt = NULL;
}
return ret;
@ -924,7 +931,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
static int find_sub_ctxt(struct hfi1_filedata *fd,
const struct hfi1_user_info *uinfo)
{
int i;
u16 i;
struct hfi1_devdata *dd = fd->dd;
u16 subctxt;
@ -961,6 +968,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
fd->uctxt = uctxt;
fd->subctxt = subctxt;
hfi1_rcd_get(uctxt);
__set_bit(fd->subctxt, uctxt->in_use_ctxts);
return 1;
@ -970,10 +979,11 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
}
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
struct hfi1_user_info *uinfo)
struct hfi1_user_info *uinfo,
struct hfi1_ctxtdata **cd)
{
struct hfi1_ctxtdata *uctxt;
unsigned int ctxt;
u16 ctxt;
int ret, numa;
if (dd->flags & HFI1_FROZEN) {
@ -1058,8 +1068,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
uctxt->jkey = generate_jkey(current_uid());
INIT_LIST_HEAD(&uctxt->sdma_queues);
spin_lock_init(&uctxt->sdma_qlock);
hfi1_stats.sps_ctxts++;
/*
* Disable ASPM when there are open user/PSM contexts to avoid
@ -1067,16 +1075,31 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
*/
if (dd->freectxts-- == dd->num_user_contexts)
aspm_disable_all(dd);
fd->uctxt = uctxt;
*cd = uctxt;
return 0;
ctxdata_free:
*cd = NULL;
dd->rcd[ctxt] = NULL;
hfi1_free_ctxtdata(dd, uctxt);
hfi1_rcd_put(uctxt);
return ret;
}
static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt)
{
mutex_lock(&hfi1_mutex);
hfi1_stats.sps_ctxts--;
if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts)
aspm_enable_all(uctxt->dd);
/* _rcd_put() should be done after releasing mutex */
uctxt->dd->rcd[uctxt->ctxt] = NULL;
mutex_unlock(&hfi1_mutex);
hfi1_rcd_put(uctxt); /* dd reference */
}
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
@ -1153,7 +1176,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
clear_rcvhdrtail(uctxt);
/* Setup J_KEY before enabling the context */
hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
@ -1179,7 +1202,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
}
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
@ -1223,23 +1246,25 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
return ret;
}
static int init_user_ctxt(struct hfi1_filedata *fd)
static int init_user_ctxt(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
int ret;
ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
if (ret)
return ret;
ret = hfi1_user_exp_rcv_init(fd);
ret = hfi1_user_exp_rcv_init(fd, uctxt);
if (ret)
hfi1_user_sdma_free_queues(fd, uctxt);
return ret;
}
static int setup_base_ctxt(struct hfi1_filedata *fd)
static int setup_base_ctxt(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
@ -1260,19 +1285,24 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
if (ret)
goto setup_failed;
ret = hfi1_user_exp_rcv_grp_init(fd);
ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
if (ret)
goto setup_failed;
ret = init_user_ctxt(fd);
ret = init_user_ctxt(fd, uctxt);
if (ret)
goto setup_failed;
user_init(uctxt);
/* Now that the context is set up, the fd can get a reference. */
fd->uctxt = uctxt;
hfi1_rcd_get(uctxt);
return 0;
setup_failed:
/* Call _free_ctxtdata, not _rcd_put(). We still need the context. */
hfi1_free_ctxtdata(dd, uctxt);
return ret;
}
@ -1390,7 +1420,7 @@ static unsigned int poll_next(struct file *fp,
spin_lock_irq(&dd->uctxt_lock);
if (hdrqempty(uctxt)) {
set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt);
pollflag = 0;
} else {
pollflag = POLLIN | POLLRDNORM;
@ -1409,7 +1439,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
{
struct hfi1_ctxtdata *uctxt;
struct hfi1_devdata *dd = ppd->dd;
unsigned ctxt;
u16 ctxt;
int ret = 0;
unsigned long flags;
@ -1475,7 +1505,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
} else {
rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
}
hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
hfi1_rcvctrl(dd, rcvctrl_op, uctxt);
/* always; new head should be equal to new tail; see above */
bail:
return 0;
@ -1525,7 +1555,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
}
if (intable)
ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey);
ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey);
done:
return ret;
}

View File

@ -69,6 +69,7 @@
#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
#define HOST_INTERFACE_VERSION 1
static uint fw_8051_load = 1;
static uint fw_fabric_serdes_load = 1;
@ -615,6 +616,14 @@ static void __obtain_firmware(struct hfi1_devdata *dd)
fw_fabric_serdes_name = ALT_FW_FABRIC_NAME;
fw_sbus_name = ALT_FW_SBUS_NAME;
fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
/*
* Add a delay before obtaining and loading debug firmware.
* Authorization will fail if the delay between firmware
* authorization events is shorter than 50us. Add 100us to
* make a delay time safe.
*/
usleep_range(100, 120);
}
if (fw_sbus_load) {
@ -1079,6 +1088,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to set host interface version, return 0x%x\n",
ret);
return -EIO;
}
return 0;
}

View File

@ -213,13 +213,11 @@ struct hfi1_ctxtdata {
/* dynamic receive available interrupt timeout */
u32 rcvavail_timeout;
/*
* number of opens (including slave sub-contexts) on this instance
* (ignoring forks, dup, etc. for now)
*/
int cnt;
/* Reference count the base context usage */
struct kref kref;
/* Device context index */
unsigned ctxt;
u16 ctxt;
/*
* non-zero if ctxt can be shared, and defines the maximum number of
* sub-contexts for this device context.
@ -245,24 +243,10 @@ struct hfi1_ctxtdata {
/* lock protecting all Expected TID data */
struct mutex exp_lock;
/* number of pio bufs for this ctxt (all procs, if shared) */
u32 piocnt;
/* first pio buffer for this ctxt */
u32 pio_base;
/* chip offset of PIO buffers for this ctxt */
u32 piobufs;
/* per-context configuration flags */
unsigned long flags;
/* per-context event flags for fileops/intr communication */
unsigned long event_flags;
/* WAIT_RCV that timed out, no interrupt */
u32 rcvwait_to;
/* WAIT_PIO that timed out, no interrupt */
u32 piowait_to;
/* WAIT_RCV already happened, no wait */
u32 rcvnowait;
/* WAIT_PIO already happened, no wait */
u32 pionowait;
/* total number of polled urgent packets */
u32 urgent;
/* saved total number of polled urgent packets for poll edge trigger */
@ -292,7 +276,6 @@ struct hfi1_ctxtdata {
u8 redirect_seq_cnt;
/* ctxt rcvhdrq head offset */
u32 head;
u32 pkt_count;
/* QPs waiting for context processing */
struct list_head qp_wait_list;
/* interrupt handling */
@ -301,15 +284,6 @@ struct hfi1_ctxtdata {
unsigned numa_id; /* numa node of this context */
/* verbs stats per CTX */
struct hfi1_opcode_stats_perctx *opstats;
/*
* This is the kernel thread that will keep making
* progress on the user sdma requests behind the scenes.
* There is one per context (shared contexts use the master's).
*/
struct task_struct *progress;
struct list_head sdma_queues;
/* protect sdma queues */
spinlock_t sdma_qlock;
/* Is ASPM interrupt supported for this context */
bool aspm_intr_supported;
@ -356,17 +330,26 @@ struct hfi1_packet {
__le32 *rhf_addr;
struct rvt_qp *qp;
struct ib_other_headers *ohdr;
struct ib_grh *grh;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
u32 dlid;
u32 slid;
u16 tlen;
s16 etail;
u8 hlen;
u8 numpkt;
u8 rsize;
u8 updegr;
u8 rcv_flags;
u8 etype;
u8 extra_byte;
u8 pad;
u8 sc;
u8 sl;
u8 opcode;
bool becn;
bool fecn;
};
struct rvt_sge_state;
@ -512,7 +495,7 @@ static inline void incr_cntr32(u32 *cntr)
#define MAX_NAME_SIZE 64
struct hfi1_msix_entry {
enum irq_type type;
struct msix_entry msix;
int irq;
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
@ -593,6 +576,7 @@ struct hfi1_pportdata {
/* SendDMA related entries */
struct workqueue_struct *hfi1_wq;
struct workqueue_struct *link_wq;
/* move out of interrupt context */
struct work_struct link_vc_work;
@ -654,12 +638,13 @@ struct hfi1_pportdata {
u8 link_enabled; /* link enabled? */
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
u8 last_pstate; /* info only */
u8 pstate; /* info only */
u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
u8 phy_error_threshold;
unsigned int is_link_down_queued;
/* Used to override LED behavior for things like maintenance beaconing*/
/*
@ -860,12 +845,15 @@ struct hfi1_devdata {
struct device *diag_device;
struct device *ui_device;
/* mem-mapped pointer to base of chip regs */
u8 __iomem *kregbase;
/* end of mem-mapped chip space excluding sendbuf and user regs */
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
/* first mapping up to RcvArray */
u8 __iomem *kregbase1;
resource_size_t physaddr;
/* second uncached mapping from RcvArray to pio send buffers */
u8 __iomem *kregbase2;
/* for detecting offset above kregbase2 address */
u32 base2_start;
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
@ -1229,9 +1217,10 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
#define PT_EAGER 1
#define PT_INVALID 2
#define PT_EXPECTED 0
#define PT_EAGER 1
#define PT_INVALID_FLUSH 2
#define PT_INVALID 3
struct tid_rb_node;
struct mmu_rb_node;
@ -1277,12 +1266,13 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
int hfi1_create_ctxts(struct hfi1_devdata *dd);
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt,
int numa);
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
@ -1321,6 +1311,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
return ppd->lstate;
}
/* return the driver's idea of the physical OPA port state */
static inline u32 driver_pstate(struct hfi1_pportdata *ppd)
{
/*
* The driver does some processing from the time the physical
* link state is at LINKUP to the time the SM can be notified
* as such. Return IB_PORTPHYSSTATE_TRAINING until the software
* state is ready.
*/
if (ppd->pstate == PLS_LINKUP &&
!(ppd->host_link_state & HLS_UP))
return IB_PORTPHYSSTATE_TRAINING;
else
return chip_to_opa_pstate(ppd->dd, ppd->pstate);
}
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
@ -1829,10 +1835,9 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
void request_msix(struct hfi1_devdata *dd, u32 *nent,
struct hfi1_msix_entry *entry);
void hfi1_enable_intx(struct pci_dev *pdev);
void restore_pci_variables(struct hfi1_devdata *dd);
int request_msix(struct hfi1_devdata *dd, u32 msireq);
int restore_pci_variables(struct hfi1_devdata *dd);
int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
@ -2087,52 +2092,13 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
#define show_packettype(etype) \
__print_symbolic(etype, \
packettype_name(EXPECTED), \
packettype_name(EAGER), \
packettype_name(IB), \
packettype_name(ERROR), \
packettype_name(BYPASS))
#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
#define show_ib_opcode(opcode) \
__print_symbolic(opcode, \
ib_opcode_name(RC_SEND_FIRST), \
ib_opcode_name(RC_SEND_MIDDLE), \
ib_opcode_name(RC_SEND_LAST), \
ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
ib_opcode_name(RC_SEND_ONLY), \
ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(RC_RDMA_WRITE_FIRST), \
ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
ib_opcode_name(RC_RDMA_WRITE_LAST), \
ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
ib_opcode_name(RC_RDMA_WRITE_ONLY), \
ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(RC_RDMA_READ_REQUEST), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
ib_opcode_name(RC_ACKNOWLEDGE), \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \
ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
ib_opcode_name(UC_SEND_ONLY), \
ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(UC_RDMA_WRITE_FIRST), \
ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
ib_opcode_name(UC_RDMA_WRITE_LAST), \
ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
ib_opcode_name(UC_RDMA_WRITE_ONLY), \
ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(UD_SEND_ONLY), \
ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(CNP))
/*
* hfi1_check_mcast- Check if the given lid is
* in the IB multicast range.
*/
static inline bool hfi1_check_mcast(u16 lid)
{
return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)));
}
#endif /* _HFI1_KERNEL_H */

View File

@ -67,6 +67,7 @@
#include "aspm.h"
#include "affinity.h"
#include "vnic.h"
#include "exp_rcv.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@ -130,7 +131,7 @@ unsigned long *hfi1_cpulist;
*/
int hfi1_create_ctxts(struct hfi1_devdata *dd)
{
unsigned i;
u16 i;
int ret;
/* Control context has to be always 0 */
@ -190,19 +191,49 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
nomem:
ret = -ENOMEM;
if (dd->rcd) {
for (i = 0; i < dd->num_rcv_contexts; ++i)
hfi1_free_ctxtdata(dd, dd->rcd[i]);
}
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
hfi1_rcd_put(dd->rcd[i]);
/* All the contexts should be freed, free the array */
kfree(dd->rcd);
dd->rcd = NULL;
return ret;
}
/*
* Helper routines for the receive context reference count (rcd and uctxt)
*/
static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
{
kref_init(&rcd->kref);
}
static void hfi1_rcd_free(struct kref *kref)
{
struct hfi1_ctxtdata *rcd =
container_of(kref, struct hfi1_ctxtdata, kref);
hfi1_free_ctxtdata(rcd->dd, rcd);
kfree(rcd);
}
int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
{
if (rcd)
return kref_put(&rcd->kref, hfi1_rcd_free);
return 0;
}
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
{
kref_get(&rcd->kref);
}
/*
* Common code for user and kernel context setup.
*/
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt,
int numa)
{
struct hfi1_devdata *dd = ppd->dd;
@ -221,6 +252,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
hfi1_cdbg(PROC, "setting up context %u\n", ctxt);
INIT_LIST_HEAD(&rcd->qp_wait_list);
hfi1_exp_tid_group_init(&rcd->tid_group_list);
hfi1_exp_tid_group_init(&rcd->tid_used_list);
hfi1_exp_tid_group_init(&rcd->tid_full_list);
rcd->ppd = ppd;
rcd->dd = dd;
__set_bit(0, rcd->in_use_ctxts);
@ -328,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
if (!rcd->opstats)
goto bail;
}
hfi1_rcd_init(rcd);
}
return rcd;
bail:
@ -567,8 +603,8 @@ static int init_after_reset(struct hfi1_devdata *dd)
*/
for (i = 0; i < dd->num_rcv_contexts; i++)
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_TAILUPD_DIS, i);
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]);
pio_send_control(dd, PSC_GLOBAL_DISABLE);
for (i = 0; i < dd->num_send_contexts; i++)
sc_disable(dd->send_contexts[i].sc);
@ -579,7 +615,7 @@ static int init_after_reset(struct hfi1_devdata *dd)
static void enable_chip(struct hfi1_devdata *dd)
{
u32 rcvmask;
u32 i;
u16 i;
/* enable PIO send */
pio_send_control(dd, PSC_GLOBAL_ENABLE);
@ -598,7 +634,7 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
hfi1_rcvctrl(dd, rcvmask, i);
hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]);
sc_enable(dd->rcd[i]->sc);
}
}
@ -624,6 +660,20 @@ static int create_workqueues(struct hfi1_devdata *dd)
if (!ppd->hfi1_wq)
goto wq_error;
}
if (!ppd->link_wq) {
/*
* Make the link workqueue single-threaded to enforce
* serialization.
*/
ppd->link_wq =
alloc_workqueue(
"hfi_link_%d_%d",
WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND,
1, /* max_active */
dd->unit, pidx);
if (!ppd->link_wq)
goto wq_error;
}
}
return 0;
wq_error:
@ -634,6 +684,10 @@ static int create_workqueues(struct hfi1_devdata *dd)
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
if (ppd->link_wq) {
destroy_workqueue(ppd->link_wq);
ppd->link_wq = NULL;
}
}
return -ENOMEM;
}
@ -656,7 +710,8 @@ static int create_workqueues(struct hfi1_devdata *dd)
int hfi1_init(struct hfi1_devdata *dd, int reinit)
{
int ret = 0, pidx, lastfail = 0;
unsigned i, len;
unsigned long len;
u16 i;
struct hfi1_ctxtdata *rcd;
struct hfi1_pportdata *ppd;
@ -878,10 +933,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
ppd = dd->pport + pidx;
for (i = 0; i < dd->num_rcv_contexts; i++)
hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_CTXT_DIS |
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_PKEY_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS, i);
HFI1_RCVCTRL_CTXT_DIS |
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_PKEY_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]);
/*
* Gracefully stop all sends allowing any in progress to
* trickle out first.
@ -917,6 +972,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
if (ppd->link_wq) {
destroy_workqueue(ppd->link_wq);
ppd->link_wq = NULL;
}
}
sdma_exit(dd);
}
@ -927,14 +986,11 @@ static void shutdown_device(struct hfi1_devdata *dd)
* @rcd: the ctxtdata structure
*
* free up any allocated data for a context
* This should not touch anything that would affect a simultaneous
* re-allocation of context data, because it is called after hfi1_mutex
* is released (and can be called from reinit as well).
* It should never change any chip state, or global driver state.
*/
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
unsigned e;
u32 e;
if (!rcd)
return;
@ -953,6 +1009,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
/* all the RcvArray entries should have been cleared by now */
kfree(rcd->egrbufs.rcvtids);
rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
if (rcd->egrbufs.buffers[e].dma)
@ -962,13 +1019,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
rcd->egrbufs.alloced = 0;
rcd->egrbufs.buffers = NULL;
sc_free(rcd->sc);
rcd->sc = NULL;
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
kfree(rcd->opstats);
kfree(rcd);
rcd->subctxt_uregbase = NULL;
rcd->subctxt_rcvegrbuf = NULL;
rcd->subctxt_rcvhdr_base = NULL;
rcd->opstats = NULL;
}
/*
@ -1362,7 +1427,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
tmp[ctxt] = NULL; /* debugging paranoia */
if (rcd) {
hfi1_clear_tids(rcd);
hfi1_free_ctxtdata(dd, rcd);
hfi1_rcd_put(rcd);
}
}
kfree(tmp);
@ -1532,6 +1597,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
if (ppd->link_wq) {
destroy_workqueue(ppd->link_wq);
ppd->link_wq = NULL;
}
}
if (!j)
hfi1_device_remove(dd);

View File

@ -164,6 +164,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
ppd->linkup = 0;
/* clear HW details of the previous connection */
ppd->actual_vls_operational = 0;
reset_link_credits(dd);
/* freeze after a link down to guarantee a clean egress */
@ -196,7 +197,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
wake_up_interruptible(&rcd->wait);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd);
} else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG,
&rcd->event_flags)) {
rcd->urgent++;

View File

@ -106,7 +106,9 @@ struct iowait {
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
unsigned seq);
uint seq,
bool pkts_sent
);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
@ -118,6 +120,7 @@ struct iowait {
u32 count;
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
};
#define SDMA_AVAIL_REASON 0
@ -143,7 +146,8 @@ static inline void iowait_init(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
unsigned seq),
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
@ -305,4 +309,66 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
* @w: the iowait struct
* @wait_head: the wait queue
*
* This function is called to insert an iowait struct into a
* wait queue after a resource (eg, sdma decriptor or pio
* buffer) is run out.
*/
static inline void iowait_queue(bool pkts_sent, struct iowait *w,
struct list_head *wait_head)
{
/*
* To play fair, insert the iowait at the tail of the wait queue if it
* has already sent some packets; Otherwise, put it at the head.
*/
if (pkts_sent) {
list_add_tail(&w->list, wait_head);
w->starved_cnt = 0;
} else {
list_add(&w->list, wait_head);
w->starved_cnt++;
}
}
/**
* iowait_starve_clear - clear the wait queue's starve count
* @pkts_sent: have some packets been sent?
* @w: the iowait struct
*
* This function is called to clear the starve count. If no
* packets have been sent, the starve count will not be cleared.
*/
static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
{
if (pkts_sent)
w->starved_cnt = 0;
}
/**
* iowait_starve_find_max - Find the maximum of the starve count
* @w: the iowait struct
* @max: a variable containing the max starve count
* @idx: the index of the current iowait in an array
* @max_idx: a variable containing the array index for the
* iowait entry that has the max starve count
*
* This function is called to compare the starve count of a
* given iowait with the given max starve count. The max starve
* count and the index will be updated if the iowait's start
* count is larger.
*/
static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
uint idx, uint *max_idx)
{
if (w->starved_cnt > *max) {
*max = w->starved_cnt;
*max_idx = idx;
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -115,7 +115,7 @@ struct opa_mad_notice_attr {
__be32 lid; /* LID where change occurred */
__be32 new_cap_mask; /* new capability mask */
__be16 reserved2;
__be16 cap_mask;
__be16 cap_mask3;
__be16 change_flags; /* low 4 bits only */
} __packed ntc_144;
@ -428,5 +428,6 @@ struct sc2vlnt {
COUNTER_MASK(1, 4))
void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
void hfi1_handle_trap_timer(unsigned long data);
#endif /* _HFI1_MAD_H */

View File

@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len)
bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len,
struct mmu_rb_node **rb_node)
{
struct mmu_rb_node *node;
unsigned long flags;
bool ret = false;
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
if (node) {
if (node->addr == addr && node->len == len)
goto unlock;
__mmu_int_rb_remove(node, &handler->root);
list_del(&node->list); /* remove from LRU list */
ret = true;
}
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
return node;
*rb_node = node;
return ret;
}
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)

View File

@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len);
bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len,
struct mmu_rb_node **rb_node);
#endif /* _HFI1_MMU_RB_H */

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -68,7 +68,7 @@
/*
* Code to adjust PCIe capabilities.
*/
static void tune_pcie_caps(struct hfi1_devdata *);
static int tune_pcie_caps(struct hfi1_devdata *);
/*
* Do all the common PCIe setup and initialization.
@ -161,6 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
{
unsigned long len;
resource_size_t addr;
int ret = 0;
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
@ -179,47 +180,54 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -EINVAL;
}
dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND);
if (!dd->kregbase)
dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
if (!dd->kregbase1) {
dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
}
dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
dd->kregbase2 = ioremap_nocache(
addr + dd->base2_start,
TXE_PIO_SEND - dd->base2_start);
if (!dd->kregbase2) {
dd_dev_err(dd, "UC mapping of kregbase2 failed\n");
goto nomem;
}
dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2,
TXE_PIO_SEND - dd->base2_start);
dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE);
if (!dd->piobase) {
iounmap(dd->kregbase);
return -ENOMEM;
dd_dev_err(dd, "WC mapping of send buffers failed\n");
goto nomem;
}
dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
dd->flags |= HFI1_PRESENT; /* now register routines work */
dd->kregend = dd->kregbase + TXE_PIO_SEND;
dd->physaddr = addr; /* used for io_remap, etc. */
/*
* Re-map the chip's RcvArray as write-combining to allow us
* Map the chip's RcvArray as write-combining to allow us
* to write an entire cacheline worth of entries in one shot.
* If this re-map fails, just continue - the RcvArray programming
* function will handle both cases.
*/
dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT);
dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
dd->chip_rcv_array_count * 8);
dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc);
/*
* Save BARs and command to rewrite after device reset.
*/
pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0);
pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1);
pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
&dd->pcie_devctl2);
pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
if (!dd->rcvarray_wc) {
dd_dev_err(dd, "WC mapping of receive array failed\n");
goto nomem;
}
dd_dev_info(dd, "WC RcvArray: %p for %x\n",
dd->rcvarray_wc, dd->chip_rcv_array_count * 8);
dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
return 0;
nomem:
ret = -ENOMEM;
hfi1_pcie_ddcleanup(dd);
return ret;
}
/*
@ -229,59 +237,19 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
*/
void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
{
u64 __iomem *base = (void __iomem *)dd->kregbase;
dd->flags &= ~HFI1_PRESENT;
dd->kregbase = NULL;
iounmap(base);
if (dd->kregbase1)
iounmap(dd->kregbase1);
dd->kregbase1 = NULL;
if (dd->kregbase2)
iounmap(dd->kregbase2);
dd->kregbase2 = NULL;
if (dd->rcvarray_wc)
iounmap(dd->rcvarray_wc);
dd->rcvarray_wc = NULL;
if (dd->piobase)
iounmap(dd->piobase);
}
static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
struct hfi1_msix_entry *hfi1_msix_entry)
{
int ret;
int nvec = *msixcnt;
struct msix_entry *msix_entry;
int i;
/*
* We can't pass hfi1_msix_entry array to msix_setup
* so use a dummy msix_entry array and copy the allocated
* irq back to the hfi1_msix_entry array.
*/
msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
if (!msix_entry) {
ret = -ENOMEM;
goto do_intx;
}
for (i = 0; i < nvec; i++)
msix_entry[i] = hfi1_msix_entry[i].msix;
ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
if (ret < 0)
goto free_msix_entry;
nvec = ret;
for (i = 0; i < nvec; i++)
hfi1_msix_entry[i].msix = msix_entry[i];
kfree(msix_entry);
*msixcnt = nvec;
return;
free_msix_entry:
kfree(msix_entry);
do_intx:
dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
nvec, ret);
*msixcnt = 0;
hfi1_enable_intx(dd->pcidev);
dd->piobase = NULL;
}
/* return the PCIe link speed from the given link status */
@ -314,8 +282,14 @@ static u32 extract_width(u16 linkstat)
static void update_lbus_info(struct hfi1_devdata *dd)
{
u16 linkstat;
int ret;
ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return;
}
pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
dd->lbus_width = extract_width(linkstat);
dd->lbus_speed = extract_speed(linkstat);
snprintf(dd->lbus_info, sizeof(dd->lbus_info),
@ -330,6 +304,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
{
u32 linkcap;
struct pci_dev *parent = dd->pcidev->bus->self;
int ret;
if (!pci_is_pcie(dd->pcidev)) {
dd_dev_err(dd, "Can't find PCI Express capability!\n");
@ -339,7 +314,12 @@ int pcie_speeds(struct hfi1_devdata *dd)
/* find if our max speed is Gen3 and parent supports Gen3 speeds */
dd->link_gen3_capable = 1;
pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return ret;
}
if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
dd_dev_info(dd,
"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
@ -364,49 +344,150 @@ int pcie_speeds(struct hfi1_devdata *dd)
}
/*
* Returns in *nent:
* - actual number of interrupts allocated
* Returns:
* - actual number of interrupts allocated or
* - 0 if fell back to INTx.
* - error
*/
void request_msix(struct hfi1_devdata *dd, u32 *nent,
struct hfi1_msix_entry *entry)
int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
int pos;
int nvec, ret;
pos = dd->pcidev->msix_cap;
if (*nent && pos) {
msix_setup(dd, pos, nent, entry);
/* did it, either MSI-X or INTx */
} else {
*nent = 0;
hfi1_enable_intx(dd->pcidev);
nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
if (nvec < 0) {
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
return nvec;
}
tune_pcie_caps(dd);
}
ret = tune_pcie_caps(dd);
if (ret) {
dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
pci_free_irq_vectors(dd->pcidev);
return ret;
}
void hfi1_enable_intx(struct pci_dev *pdev)
{
/* first, turn on INTx */
pci_intx(pdev, 1);
/* then turn off MSI-X */
pci_disable_msix(pdev);
/* check for legacy IRQ */
if (nvec == 1 && !dd->pcidev->msix_enabled)
return 0;
return nvec;
}
/* restore command and BARs after a reset has wiped them out */
void restore_pci_variables(struct hfi1_devdata *dd)
int restore_pci_variables(struct hfi1_devdata *dd)
{
pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
dd->pcie_devctl2);
pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
int ret = 0;
ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
dd->pcibar0);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
dd->pcibar1);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
if (ret)
goto error;
ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL,
dd->pcie_devctl);
if (ret)
goto error;
ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL,
dd->pcie_lnkctl);
if (ret)
goto error;
ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
dd->pcie_devctl2);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
dd->pci_lnkctl3);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
if (ret)
goto error;
return 0;
error:
dd_dev_err(dd, "Unable to write to PCI config\n");
return ret;
}
/* Save BARs and command to rewrite after device reset */
int save_pci_variables(struct hfi1_devdata *dd)
{
int ret = 0;
ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
&dd->pcibar0);
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
&dd->pcibar1);
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
if (ret)
goto error;
ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
if (ret)
goto error;
ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL,
&dd->pcie_devctl);
if (ret)
goto error;
ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL,
&dd->pcie_lnkctl);
if (ret)
goto error;
ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
&dd->pcie_devctl2);
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
&dd->pci_lnkctl3);
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
if (ret)
goto error;
return 0;
error:
dd_dev_err(dd, "Unable to read from PCI config\n");
return ret;
}
/*
@ -421,21 +502,33 @@ uint aspm_mode = ASPM_MODE_DISABLED;
module_param_named(aspm, aspm_mode, uint, S_IRUGO);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
static void tune_pcie_caps(struct hfi1_devdata *dd)
static int tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
u16 rc_mrrs, ep_mrrs, max_mrrs, ectl;
int ret;
/*
* Turn on extended tags in DevCtl in case the BIOS has turned it off
* to improve WFR SDMA bandwidth
*/
pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
ret = pcie_capability_read_word(dd->pcidev,
PCI_EXP_DEVCTL, &ectl);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return ret;
}
if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
dd_dev_info(dd, "Enabling PCIe extended tags\n");
ectl |= PCI_EXP_DEVCTL_EXT_TAG;
pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
ret = pcie_capability_write_word(dd->pcidev,
PCI_EXP_DEVCTL, ectl);
if (ret) {
dd_dev_err(dd, "Unable to write to PCI config\n");
return ret;
}
}
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
@ -444,14 +537,14 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
* access to the upstream component.
*/
if (!parent)
return;
return -EINVAL;
if (!pci_is_root_bus(parent->bus)) {
dd_dev_info(dd, "Parent not root\n");
return;
return -EINVAL;
}
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
return;
return -EINVAL;
rc_mpss = parent->pcie_mpss;
rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
@ -497,6 +590,8 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
ep_mrrs = max_mrrs;
pcie_set_readrq(dd->pcidev, ep_mrrs);
}
return 0;
}
/* End of PCIe capability tuning */
@ -728,6 +823,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
u32 violation;
u32 i;
u8 c_minus1, c0, c_plus1;
int ret;
for (i = 0; i < 11; i++) {
/* set index */
@ -739,8 +835,14 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
eq_value(c_minus1, c0, c_plus1));
/* check if these coefficients violate EQ rules */
pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
&violation);
ret = pci_read_config_dword(dd->pcidev,
PCIE_CFG_REG_PL105, &violation);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
hit_error = 1;
break;
}
if (violation
& PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
if (hit_error == 0) {
@ -1194,7 +1296,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
* that it is Gen3 capable earlier.
*/
dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return_error = 1;
goto done;
}
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
/* only write to parent if target is not as high as ours */
@ -1203,20 +1311,37 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
(u32)lnkctl2);
pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
ret = pcie_capability_write_word(parent,
PCI_EXP_LNKCTL2, lnkctl2);
if (ret) {
dd_dev_err(dd, "Unable to write to PCI config\n");
return_error = 1;
goto done;
}
} else {
dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
}
dd_dev_info(dd, "%s: setting target link speed\n", __func__);
pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return_error = 1;
goto done;
}
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
(u32)lnkctl2);
pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
if (ret) {
dd_dev_err(dd, "Unable to write to PCI config\n");
return_error = 1;
goto done;
}
/* step 5h: arm gasket logic */
/* hold DC in reset across the SBR */
@ -1266,7 +1391,14 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
/* restore PCI space registers we know were reset */
dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__);
restore_pci_variables(dd);
ret = restore_pci_variables(dd);
if (ret) {
dd_dev_err(dd, "%s: Could not restore PCI variables\n",
__func__);
return_error = 1;
goto done;
}
/* restore firmware control */
write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl);
@ -1296,7 +1428,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
setextled(dd, 0);
/* check for any per-lane errors */
pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return_error = 1;
goto done;
}
dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32);
/* extract status, look for our HFI */

View File

@ -1012,7 +1012,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sc->sw_index,
sc->hw_context, (u32)reg);
queue_work(dd->pport->hfi1_wq,
queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work);
break;
}
@ -1568,7 +1568,8 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
unsigned i, n = 0;
uint i, n = 0, max_idx = 0;
u8 max_starved_cnt = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@ -1591,6 +1592,7 @@ static void sc_piobufavail(struct send_context *sc)
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
/* refcount held until actual wake up */
qps[n++] = qp;
}
@ -1605,9 +1607,14 @@ static void sc_piobufavail(struct send_context *sc)
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
for (i = 0; i < n; i++)
hfi1_qp_wakeup(qps[i],
/* Wake up the most starved one first */
if (n)
hfi1_qp_wakeup(qps[max_idx],
RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
if (i != max_idx)
hfi1_qp_wakeup(qps[i],
RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
}
/* translate a send credit update to a bit code of reasons */

View File

@ -58,8 +58,13 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd)
version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
/* Prevent power on default of all zeroes from passing checksum */
if (!version)
if (!version) {
dd_dev_err(dd, "%s: Config bitmap uninitialized\n", __func__);
dd_dev_err(dd,
"%s: Please update your BIOS to support active channels\n",
__func__);
return 0;
}
/*
* ASIC scratch 0 only contains the checksum and bitmap version as
@ -84,6 +89,8 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd)
if (checksum + temp_scratch == 0xFFFF)
return 1;
dd_dev_err(dd, "%s: Configuration bitmap corrupted\n", __func__);
return 0;
}
@ -136,7 +143,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd)
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
unsigned long size = 0;
u8 *temp_platform_config = NULL;
u32 esize;
@ -145,11 +151,6 @@ void get_platform_config(struct hfi1_devdata *dd)
save_platform_config_fields(dd);
return;
}
dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
__func__);
dd_dev_err(dd,
"%s: Please update your BIOS to support active channels\n",
__func__);
} else {
ret = eprom_read_platform_config(dd,
(void **)&temp_platform_config,
@ -160,15 +161,6 @@ void get_platform_config(struct hfi1_devdata *dd)
dd->platform_config.size = esize;
return;
}
/* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
if (!ret) {
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
return;
}
}
dd_dev_err(dd,
"%s: Failed to get platform config, falling back to sub-optimal default file\n",
@ -242,7 +234,7 @@ static int qual_power(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
dd_dev_info(
dd_dev_err(
ppd->dd,
"%s: Port disabled due to system power restrictions\n",
__func__);
@ -268,7 +260,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
dd_dev_info(
dd_dev_err(
ppd->dd,
"%s: Cable failed bitrate check, disabling port\n",
__func__);
@ -709,15 +701,15 @@ static void apply_tunings(
ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
GENERAL_CONFIG, config_data);
if (ret != HCMD_SUCCESS)
dd_dev_info(ppd->dd,
"%s: Failed set ext device config params\n",
__func__);
dd_dev_err(ppd->dd,
"%s: Failed set ext device config params\n",
__func__);
}
if (tx_preset_index == OPA_INVALID_INDEX) {
if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
__func__);
dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n",
__func__);
return;
}
@ -900,7 +892,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
case 0xD: /* fallthrough */
case 0xF:
default:
dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
__func__);
break;
}
@ -942,7 +934,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
case PORT_TYPE_DISCONNECTED:
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
dd_dev_warn(dd, "%s: Port disconnected, disabling port\n",
__func__);
goto bail;
case PORT_TYPE_FIXED:
@ -1027,7 +1019,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
}
break;
default:
dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__);
ppd->port_type = PORT_TYPE_UNKNOWN;
tuning_method = OPA_UNKNOWN_TUNING;
total_atten = 0;

View File

@ -68,17 +68,12 @@ static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
unsigned seq);
unsigned int seq,
bool pkts_sent);
static void iowait_wakeup(struct iowait *wait, int reason);
static void iowait_sdma_drained(struct iowait *wait);
static void qp_pio_drain(struct rvt_qp *qp);
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off)
{
return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@ -377,7 +372,8 @@ static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
unsigned seq)
uint seq,
bool pkts_sent)
{
struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
struct rvt_qp *qp;
@ -408,7 +404,8 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
@ -607,7 +604,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
send_context = qp_to_send_context(qp, priv->s_sc);
seq_printf(s,
"N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
"N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@ -630,6 +627,10 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
/* ack_queue ring pointers, size */
qp->s_tail_ack_queue, qp->r_head_ack_queue,
HFI1_MAX_RDMA_ATOMIC,
/* remote QP info */
qp->remote_qpn,
rdma_ah_get_dlid(&qp->remote_ah_attr),
rdma_ah_get_sl(&qp->remote_ah_attr),

View File

@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
ohdr->u.aeth = rvt_compute_aeth(qp);
sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
& 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
goto queue_ack;
}
trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
&hdr, ib_is_sc5(sc5));
/* write the pbc and data */
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@ -1009,7 +1010,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
}
psn = be32_to_cpu(ohdr->bth[2]);
psn = ib_bth_get_psn(ohdr);
reset_sending_psn(qp, psn);
/*
@ -1915,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 bth0;
u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
u32 pad;
u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
int diff;
@ -1937,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
u32 rkey;
lockdep_assert_held(&qp->r_lock);
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
if (hfi1_ruc_check_hdr(ibp, packet))
return;
is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
opcode = ib_bth_get_opcode(ohdr);
psn = ib_bth_get_psn(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@ -2074,8 +2073,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
wc.wc_flags = 0;
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@ -2368,28 +2365,19 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
struct ib_header *hdr,
u32 rcv_flags,
struct hfi1_packet *packet,
struct rvt_qp *qp)
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
int diff;
u32 opcode;
u32 psn, bth0;
u32 psn;
/* Check for GRH */
ohdr = &hdr->u.oth;
if (has_grh)
ohdr = &hdr->u.l.oth;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
if (hfi1_ruc_check_hdr(ibp, packet))
return;
psn = be32_to_cpu(ohdr->bth[2]);
opcode = ib_bth_get_opcode(ohdr);
psn = ib_bth_get_psn(packet->ohdr);
opcode = ib_bth_get_opcode(packet->ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {

View File

@ -74,8 +74,10 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
NULL, &wqe->sg_list[i],
IB_ACCESS_LOCAL_WRITE);
if (unlikely(ret <= 0))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
j++;
@ -214,100 +216,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
{
__be64 guid;
unsigned long flags;
struct rvt_qp *qp = packet->qp;
u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
u32 dlid = packet->dlid;
u32 slid = packet->slid;
u32 sl = packet->sl;
int migrated;
u32 bth0, bth1;
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
bth0 = be32_to_cpu(packet->ohdr->bth[0]);
bth1 = be32_to_cpu(packet->ohdr->bth[1]);
migrated = bth0 & IB_BTH_MIG_REQ;
if (qp->s_mig_state == IB_MIG_ARMED && migrated) {
if (!packet->grh) {
if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
IB_AH_GRH)
goto err;
return 1;
} else {
const struct ib_global_route *grh;
if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
IB_AH_GRH))
goto err;
return 1;
grh = rdma_ah_read_grh(&qp->alt_ah_attr);
guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
goto err;
return 1;
if (!gid_ok(
&hdr->u.l.grh.sgid,
&packet->grh->sgid,
grh->dgid.global.subnet_prefix,
grh->dgid.global.interface_id))
goto err;
return 1;
}
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
ib_get_sl(hdr),
0, qp->ibqp.qp_num,
ib_get_slid(hdr),
ib_get_dlid(hdr));
goto err;
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
sc5, slid))) {
hfi1_bad_pkey(ibp, (u16)bth0, sl,
0, qp->ibqp.qp_num, slid, dlid);
return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
if (ib_get_slid(hdr) !=
rdma_ah_get_dlid(&qp->alt_ah_attr) ||
if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) ||
ppd_from_ibp(ibp)->port !=
rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
return 1;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
if (!packet->grh) {
if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
IB_AH_GRH)
goto err;
return 1;
} else {
const struct ib_global_route *grh;
if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
IB_AH_GRH))
goto err;
return 1;
grh = rdma_ah_read_grh(&qp->remote_ah_attr);
guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
goto err;
return 1;
if (!gid_ok(
&hdr->u.l.grh.sgid,
&packet->grh->sgid,
grh->dgid.global.subnet_prefix,
grh->dgid.global.interface_id))
goto err;
return 1;
}
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
ib_get_sl(hdr),
0, qp->ibqp.qp_num,
ib_get_slid(hdr),
ib_get_dlid(hdr));
goto err;
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
sc5, slid))) {
hfi1_bad_pkey(ibp, (u16)bth0, sl,
0, qp->ibqp.qp_num, slid, dlid);
return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
if (ib_get_slid(hdr) !=
rdma_ah_get_dlid(&qp->remote_ah_attr) ||
if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
!(bth0 & IB_BTH_MIG_REQ))
return 1;
if (qp->s_mig_state == IB_MIG_REARM && !migrated)
qp->s_mig_state = IB_MIG_ARMED;
}
return 0;
err:
return 1;
}
/**
@ -816,6 +813,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
ps->pkts_sent = true;
if (unlikely(time_after(jiffies, ps->timeout))) {
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
@ -912,6 +911,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.timeout = jiffies + ps.timeout_int;
ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
ps.pkts_sent = false;
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
@ -934,7 +934,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}

View File

@ -246,7 +246,7 @@ static void __sdma_process_event(
enum sdma_events event);
static void dump_sdma_state(struct sdma_engine *sde);
static void sdma_make_progress(struct sdma_engine *sde, u64 status);
static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail);
static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
static void sdma_flush_descq(struct sdma_engine *sde);
/**
@ -325,7 +325,7 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
/* timed out - bounce the link */
dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sde->this_idx, (u32)reg);
queue_work(dd->pport->hfi1_wq,
queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work);
break;
}
@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
* @dd: hfi1_devdata
* @port: port number (currently only zero)
*
* sdma_init initializes the specified number of engines.
*
* The code initializes each sde, its csrs. Interrupts
* are not required to be enabled.
* Initializes each sde and its csrs.
* Interrupts are not required to be enabled.
*
* Returns:
* 0 - success, -errno on failure
@ -1764,13 +1762,14 @@ static inline u16 sdma_gethead(struct sdma_engine *sde)
*
* This is called with head_lock held.
*/
static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
unsigned i, n = 0, seq;
uint i, n = 0, seq, max_idx = 0;
struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@ -1805,6 +1804,9 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
if (num_desc > avail)
break;
avail -= num_desc;
/* Find the most starved wait memeber */
iowait_starve_find_max(wait, &max_starved_cnt,
n, &max_idx);
list_del_init(&wait->list);
waits[n++] = wait;
}
@ -1813,8 +1815,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
}
} while (read_seqretry(&dev->iowait_lock, seq));
/* Schedule the most starved one first */
if (n)
waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
if (i != max_idx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
/* head_lock must be held */
@ -2351,7 +2358,8 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
static int sdma_check_progress(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx)
struct sdma_txreq *tx,
bool pkts_sent)
{
int ret;
@ -2364,7 +2372,7 @@ static int sdma_check_progress(
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
ret = wait->sleep(sde, wait, tx, seq);
ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@ -2378,6 +2386,7 @@ static int sdma_check_progress(
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
* @pkts_sent: has any packet been sent yet?
*
* The call submits the tx into the ring. If a iowait structure is non-NULL
* the packet will be queued to the list in wait.
@ -2389,7 +2398,8 @@ static int sdma_check_progress(
*/
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx)
struct sdma_txreq *tx,
bool pkts_sent)
{
int ret = 0;
u16 tail;
@ -2431,7 +2441,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
ret = -ECOMM;
goto unlock;
nodesc:
ret = sdma_check_progress(sde, wait, tx);
ret = sdma_check_progress(sde, wait, tx, pkts_sent);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
@ -2500,8 +2510,10 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
}
update_tail:
total_count = submit_count + flush_count;
if (wait)
if (wait) {
iowait_sdma_add(wait, total_count);
iowait_starve_clear(submit_count > 0, wait);
}
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
@ -2529,7 +2541,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
ret = -ECOMM;
goto update_tail;
nodesc:
ret = sdma_check_progress(sde, wait, tx);
ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
if (ret == -EAGAIN) {
ret = 0;
goto retry;

View File

@ -852,7 +852,8 @@ struct iowait;
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx);
struct sdma_txreq *tx,
bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
struct list_head *tx_list,

View File

@ -47,7 +47,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
u8 ibhdr_exhdr_len(struct ib_header *hdr)
u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
#define IMM_PRN "imm %d"
#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
#define IETH_PRN "ieth rkey 0x%.8x"
#define ATOMICACKETH_PRN "origdata %llx"
#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet)
{
return "IB";
}
#define IMM_PRN "imm:%d"
#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome)
return "";
}
void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
u8 *se, u8 *pad, u8 *opcode, u8 *tver,
u16 *pkey, u32 *psn, u32 *qpn)
{
*ack = ib_bth_get_ackreq(ohdr);
*becn = ib_bth_get_becn(ohdr);
*fecn = ib_bth_get_fecn(ohdr);
*mig = ib_bth_get_migreq(ohdr);
*se = ib_bth_get_se(ohdr);
*pad = ib_bth_get_pad(ohdr);
*opcode = ib_bth_get_opcode(ohdr);
*tver = ib_bth_get_tver(ohdr);
*pkey = ib_bth_get_pkey(ohdr);
*psn = ib_bth_get_psn(ohdr);
*qpn = ib_bth_get_qpn(ohdr);
}
void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
struct ib_other_headers **ohdr,
u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
u16 *len, u32 *dlid, u32 *slid)
{
*lnh = ib_get_lnh(hdr);
*lver = ib_get_lver(hdr);
*sl = ib_get_sl(hdr);
*sc = ib_get_sc(hdr) | (sc5 << 4);
*len = ib_get_len(hdr);
*dlid = ib_get_dlid(hdr);
*slid = ib_get_slid(hdr);
if (*lnh == HFI1_LRH_BTH)
*ohdr = &hdr->u.oth;
else
*ohdr = &hdr->u.l.oth;
}
const char *parse_everbs_hdrs(
struct trace_seq *p,
u8 opcode,

View File

@ -55,8 +55,57 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
u8 ibhdr_exhdr_len(struct ib_header *hdr);
#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
#define show_ib_opcode(opcode) \
__print_symbolic(opcode, \
ib_opcode_name(RC_SEND_FIRST), \
ib_opcode_name(RC_SEND_MIDDLE), \
ib_opcode_name(RC_SEND_LAST), \
ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
ib_opcode_name(RC_SEND_ONLY), \
ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(RC_RDMA_WRITE_FIRST), \
ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
ib_opcode_name(RC_RDMA_WRITE_LAST), \
ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
ib_opcode_name(RC_RDMA_WRITE_ONLY), \
ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(RC_RDMA_READ_REQUEST), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
ib_opcode_name(RC_ACKNOWLEDGE), \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \
ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
ib_opcode_name(UC_SEND_ONLY), \
ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(UC_RDMA_WRITE_FIRST), \
ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
ib_opcode_name(UC_RDMA_WRITE_LAST), \
ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
ib_opcode_name(UC_RDMA_WRITE_ONLY), \
ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(UD_SEND_ONLY), \
ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
ib_opcode_name(CNP))
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr);
const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet);
void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
u8 *se, u8 *pad, u8 *opcode, u8 *tver,
u16 *pkey, u32 *psn, u32 *qpn);
void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
struct ib_other_headers **ohdr,
u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
u16 *len, u32 *dlid, u32 *slid);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@ -66,139 +115,198 @@ __print_symbolic(lrh, \
lrh_name(LRH_BTH), \
lrh_name(LRH_GRH))
#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x"
#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d "
#define BTH_PRN \
"op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
"f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
#define EHDR_PRN "%s"
"op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \
"f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x"
#define EHDR_PRN "hlen:%d %s"
DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
struct ib_header *hdr),
TP_ARGS(dd, hdr),
struct hfi1_packet *packet,
bool sc5),
TP_ARGS(dd, packet, sc5),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
/* LRH */
__field(u8, vl)
__field(u8, lnh)
__field(u8, lver)
__field(u8, sl)
__field(u8, lnh)
__field(u16, dlid)
__field(u16, len)
__field(u16, slid)
/* BTH */
__field(u32, dlid)
__field(u8, sc)
__field(u32, slid)
__field(u8, opcode)
__field(u8, se)
__field(u8, m)
__field(u8, mig)
__field(u8, pad)
__field(u8, tver)
__field(u16, pkey)
__field(u8, f)
__field(u8, b)
__field(u8, fecn)
__field(u8, becn)
__field(u32, qpn)
__field(u8, a)
__field(u8, ack)
__field(u32, psn)
/* extended headers */
__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
__dynamic_array(u8, ehdrs,
hfi1_trace_ib_hdr_len(packet->hdr))
),
TP_fast_assign(
TP_fast_assign(
struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
&ohdr,
&__entry->lnh,
&__entry->lver,
&__entry->sl,
&__entry->sc,
&__entry->len,
&__entry->dlid,
&__entry->slid);
hfi1_trace_parse_bth(ohdr, &__entry->ack,
&__entry->becn, &__entry->fecn,
&__entry->mig, &__entry->se,
&__entry->pad, &__entry->opcode,
&__entry->tver, &__entry->pkey,
&__entry->psn, &__entry->qpn);
/* extended headers */
memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
__get_dynamic_array_len(ehdrs));
),
TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
BTH_PRN " " EHDR_PRN,
__get_str(dev),
__entry->len,
__entry->sc,
__entry->dlid,
__entry->slid,
__entry->lnh, show_lnh(__entry->lnh),
__entry->lver,
__entry->sl,
/* BTH */
__entry->opcode, show_ib_opcode(__entry->opcode),
__entry->se,
__entry->mig,
__entry->pad,
__entry->tver,
__entry->pkey,
__entry->fecn,
__entry->becn,
__entry->qpn,
__entry->ack,
__entry->psn,
/* extended headers */
__get_dynamic_array_len(ehdrs),
__parse_ib_ehdrs(
__entry->opcode,
(void *)__get_dynamic_array(ehdrs))
)
);
DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr,
TP_PROTO(struct hfi1_devdata *dd,
struct hfi1_packet *packet, bool sc5),
TP_ARGS(dd, packet, sc5));
DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
struct ib_header *hdr,
bool sc5),
TP_ARGS(dd, hdr, sc5),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
__field(u8, lnh)
__field(u8, lver)
__field(u8, sl)
__field(u16, len)
__field(u32, dlid)
__field(u8, sc)
__field(u32, slid)
__field(u8, opcode)
__field(u8, se)
__field(u8, mig)
__field(u8, pad)
__field(u8, tver)
__field(u16, pkey)
__field(u8, fecn)
__field(u8, becn)
__field(u32, qpn)
__field(u8, ack)
__field(u32, psn)
/* extended headers */
__dynamic_array(u8, ehdrs,
hfi1_trace_ib_hdr_len(hdr))
),
TP_fast_assign(
struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
/* LRH */
__entry->vl =
(u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
__entry->lver =
(u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
__entry->sl =
(u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
__entry->lnh =
(u8)(be16_to_cpu(hdr->lrh[0]) & 3);
__entry->dlid =
be16_to_cpu(hdr->lrh[1]);
/* allow for larger len */
__entry->len =
be16_to_cpu(hdr->lrh[2]);
__entry->slid =
be16_to_cpu(hdr->lrh[3]);
/* BTH */
if (__entry->lnh == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
__entry->opcode =
(be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
__entry->se =
(be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
__entry->m =
(be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
__entry->pad =
(be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
__entry->tver =
(be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
__entry->pkey =
be32_to_cpu(ohdr->bth[0]) & 0xffff;
__entry->f =
(be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
IB_FECN_MASK;
__entry->b =
(be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
IB_BECN_MASK;
__entry->qpn =
be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->a =
(be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
/* allow for larger PSN */
__entry->psn =
be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
hfi1_trace_parse_9b_hdr(hdr, sc5,
&ohdr, &__entry->lnh,
&__entry->lver, &__entry->sl,
&__entry->sc, &__entry->len,
&__entry->dlid, &__entry->slid);
hfi1_trace_parse_bth(ohdr, &__entry->ack,
&__entry->becn, &__entry->fecn,
&__entry->mig, &__entry->se,
&__entry->pad, &__entry->opcode,
&__entry->tver, &__entry->pkey,
&__entry->psn, &__entry->qpn);
/* extended headers */
memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
ibhdr_exhdr_len(hdr));
),
TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
__get_str(dev),
/* LRH */
__entry->vl,
__entry->lver,
__entry->sl,
__entry->lnh, show_lnh(__entry->lnh),
__entry->dlid,
__entry->len,
__entry->slid,
/* BTH */
__entry->opcode, show_ib_opcode(__entry->opcode),
__entry->se,
__entry->m,
__entry->pad,
__entry->tver,
__entry->pkey,
__entry->f,
__entry->b,
__entry->qpn,
__entry->a,
__entry->psn,
/* extended headers */
__parse_ib_ehdrs(
__entry->opcode,
(void *)__get_dynamic_array(ehdrs))
)
memcpy(__get_dynamic_array(ehdrs),
&ohdr->u, __get_dynamic_array_len(ehdrs));
),
TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
BTH_PRN " " EHDR_PRN,
__get_str(dev),
__entry->len,
__entry->sc,
__entry->dlid,
__entry->slid,
__entry->lnh, show_lnh(__entry->lnh),
__entry->lver,
__entry->sl,
/* BTH */
__entry->opcode, show_ib_opcode(__entry->opcode),
__entry->se,
__entry->mig,
__entry->pad,
__entry->tver,
__entry->pkey,
__entry->fecn,
__entry->becn,
__entry->qpn,
__entry->ack,
__entry->psn,
/* extended headers */
__get_dynamic_array_len(ehdrs),
__parse_ib_ehdrs(
__entry->opcode,
(void *)__get_dynamic_array(ehdrs))
)
);
DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd,
struct ib_header *hdr, bool sc5),
TP_ARGS(dd, hdr, sc5));
DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd,
struct ib_header *hdr, bool sc5),
TP_ARGS(dd, hdr, sc5));
DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd,
struct ib_header *hdr, bool sc5),
TP_ARGS(dd, hdr, sc5));
DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */

View File

@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
DECLARE_EVENT_CLASS(
hfi1_csr_template,
TP_PROTO(void __iomem *addr, u64 value),
TP_ARGS(addr, value),
TP_STRUCT__entry(
__field(void __iomem *, addr)
__field(u64, value)
),
TP_fast_assign(
__entry->addr = addr;
__entry->value = value;
),
TP_printk("addr %p value %llx", __entry->addr, __entry->value)
);
DEFINE_EVENT(
hfi1_csr_template, hfi1_write_rcvarray,
TP_PROTO(void __iomem *addr, u64 value),
TP_ARGS(addr, value));
#ifdef CONFIG_FAULT_INJECTION
TRACE_EVENT(hfi1_fault_opcode,
TP_PROTO(struct rvt_qp *qp, u8 opcode),

View File

@ -52,9 +52,25 @@
#include "hfi.h"
#define tidtype_name(type) { PT_##type, #type }
#define show_tidtype(type) \
__print_symbolic(type, \
tidtype_name(EXPECTED), \
tidtype_name(EAGER), \
tidtype_name(INVALID)) \
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_rx
#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
#define show_packettype(etype) \
__print_symbolic(etype, \
packettype_name(EXPECTED), \
packettype_name(EAGER), \
packettype_name(IB), \
packettype_name(ERROR), \
packettype_name(BYPASS))
TRACE_EVENT(hfi1_rcvhdr,
TP_PROTO(struct hfi1_devdata *dd,
u32 ctxt,
@ -98,7 +114,7 @@ TRACE_EVENT(hfi1_rcvhdr,
);
TRACE_EVENT(hfi1_receive_interrupt,
TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt),
TP_ARGS(dd, ctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u32, ctxt)
@ -129,7 +145,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
)
);
TRACE_EVENT(hfi1_exp_tid_reg,
DECLARE_EVENT_CLASS(
hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
u32 npages, unsigned long va, unsigned long pa,
dma_addr_t dma),
@ -163,38 +180,45 @@ TRACE_EVENT(hfi1_exp_tid_reg,
)
);
TRACE_EVENT(hfi1_exp_tid_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(u32, rarr)
__field(u32, npages)
__field(unsigned long, va)
__field(unsigned long, pa)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->va = va;
__entry->pa = pa;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->pa,
__entry->va,
__entry->dma
)
);
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
TRACE_EVENT(
hfi1_put_tid,
TP_PROTO(struct hfi1_devdata *dd,
u32 index, u32 type, unsigned long pa, u16 order),
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
__field(unsigned long, pa);
__field(u32, index);
__field(u32, type);
__field(u16, order);
),
TP_fast_assign(
DD_DEV_ASSIGN(dd);
__entry->pa = pa;
__entry->index = index;
__entry->type = type;
__entry->order = order;
),
TP_printk("[%s] type %s pa %lx index %u order %u",
__get_str(dev),
show_tidtype(__entry->type),
__entry->pa,
__entry->index,
__entry->order
)
);
TRACE_EVENT(hfi1_exp_tid_inval,
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,

View File

@ -297,31 +297,25 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
u32 pad;
u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
if (hfi1_ruc_check_hdr(ibp, packet))
return;
process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
opcode = ib_bth_get_opcode(ohdr);
psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
/*
@ -432,8 +426,6 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
wc.ex.imm_data = 0;
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@ -527,8 +519,6 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))

View File

@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
slid, rdma_ah_get_dlid(ah_attr));
hfi1_bad_pkey(ibp, pkey,
rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
sqp->qkey : swqe->ud_wr.remote_qkey;
if (unlikely(qkey != qp->qkey)) {
u16 lid;
lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
lid,
rdma_ah_get_dlid(ah_attr));
goto drop;
}
if (unlikely(qkey != qp->qkey))
goto drop; /* silently drop per IBTA spec */
}
/*
@ -549,7 +539,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
hdr.lrh[3] = cpu_to_be16(slid);
plen = 2 /* PBC */ + hwords;
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
@ -668,36 +658,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
struct ib_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
u16 dlid, pkey;
u16 pkey;
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
u8 extra_bytes;
u8 sl_from_sc;
u8 extra_bytes = packet->pad;
u8 opcode = packet->opcode;
u8 sl = packet->sl;
u32 dlid = packet->dlid;
u32 slid = packet->slid;
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
slid = ib_get_slid(hdr);
qkey = ib_get_qkey(ohdr);
src_qp = ib_get_sqpn(ohdr);
pkey = ib_bth_get_pkey(ohdr);
opcode = ib_bth_get_opcode(ohdr);
sl = ib_get_sl(hdr);
extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
@ -727,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* for invalid pkeys is optional according to
* IB spec (release 1.3, section 10.9.4)
*/
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
pkey, sl,
src_qp, qp->ibqp.qp_num,
slid, dlid);
hfi1_bad_pkey(ibp,
pkey, sl,
src_qp, qp->ibqp.qp_num,
slid, dlid);
return;
}
} else {
@ -739,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0)
goto drop;
}
if (unlikely(qkey != qp->qkey)) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
src_qp, qp->ibqp.qp_num,
slid, dlid);
if (unlikely(qkey != qp->qkey)) /* Silent drop */
return;
}
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen > 2048 || (sc5 == 0xF))))
@ -811,7 +793,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qp->r_flags |= RVT_R_REUSE_SGE;
goto drop;
}
if (has_grh) {
if (packet->grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;

View File

@ -51,14 +51,6 @@
#include "trace.h"
#include "mmu_rb.h"
struct tid_group {
struct list_head list;
u32 base;
u8 size;
u8 used;
u8 map;
};
struct tid_rb_node {
struct mmu_rb_node mmu;
unsigned long phys;
@ -75,8 +67,6 @@ struct tid_pageset {
u16 count;
};
#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
#define num_user_pages(vaddr, len) \
(1 + (((((unsigned long)(vaddr) + \
(unsigned long)(len) - 1) & PAGE_MASK) - \
@ -109,96 +99,14 @@ static struct mmu_rb_ops tid_rb_ops = {
.invalidate = tid_rb_invalidate
};
static inline u32 rcventry2tidinfo(u32 rcventry)
{
u32 pair = rcventry & ~0x1;
return EXP_TID_SET(IDX, pair >> 1) |
EXP_TID_SET(CTRL, 1 << (rcventry - pair));
}
static inline void exp_tid_group_init(struct exp_tid_set *set)
{
INIT_LIST_HEAD(&set->list);
set->count = 0;
}
static inline void tid_group_remove(struct tid_group *grp,
struct exp_tid_set *set)
{
list_del_init(&grp->list);
set->count--;
}
static inline void tid_group_add_tail(struct tid_group *grp,
struct exp_tid_set *set)
{
list_add_tail(&grp->list, &set->list);
set->count++;
}
static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
{
struct tid_group *grp =
list_first_entry(&set->list, struct tid_group, list);
list_del_init(&grp->list);
set->count--;
return grp;
}
static inline void tid_group_move(struct tid_group *group,
struct exp_tid_set *s1,
struct exp_tid_set *s2)
{
tid_group_remove(group, s1);
tid_group_add_tail(group, s2);
}
int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = fd->dd;
u32 tidbase;
u32 i;
struct tid_group *grp, *gptr;
exp_tid_group_init(&uctxt->tid_group_list);
exp_tid_group_init(&uctxt->tid_used_list);
exp_tid_group_init(&uctxt->tid_full_list);
tidbase = uctxt->expected_base;
for (i = 0; i < uctxt->expected_count /
dd->rcv_entries.group_size; i++) {
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp)
goto grp_failed;
grp->size = dd->rcv_entries.group_size;
grp->base = tidbase;
tid_group_add_tail(grp, &uctxt->tid_group_list);
tidbase += dd->rcv_entries.group_size;
}
return 0;
grp_failed:
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
kfree(grp);
}
return -ENOMEM;
}
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
* been configured with the eager/expected RcvEntry counts.
*/
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
return ret;
}
void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
{
struct tid_group *grp, *gptr;
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
kfree(grp);
}
hfi1_clear_tids(uctxt);
}
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
@ -302,23 +198,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
fd->entry_to_rb = NULL;
}
/*
* Write an "empty" RcvArray entry.
* This function exists so the TID registaration code can use it
* to write to unused/unneeded entries and still take advantage
* of the WC performance improvements. The HFI will ignore this
* write to the RcvArray entry.
*/
static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
{
/*
* Doing the WC fill writes only makes sense if the device is
* present and the RcvArray has been mapped as WC memory.
*/
if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
writeq(0, dd->rcvarray_wc + (index * 8));
}
/*
* RcvArray entry allocation for Expected Receives is done by the
* following algorithm:
@ -935,12 +814,11 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
node->npages, node->mmu.addr, node->phys,
node->dma_addr);
hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
/*
* Make sure device has seen the write before we unpin the
* pages.
*/
flush_wc();
hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);

View File

@ -49,30 +49,10 @@
#include "hfi.h"
#define EXP_TID_TIDLEN_MASK 0x7FFULL
#define EXP_TID_TIDLEN_SHIFT 0
#define EXP_TID_TIDCTRL_MASK 0x3ULL
#define EXP_TID_TIDCTRL_SHIFT 20
#define EXP_TID_TIDIDX_MASK 0x3FFULL
#define EXP_TID_TIDIDX_SHIFT 22
#define EXP_TID_GET(tid, field) \
(((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
#include "exp_rcv.h"
#define EXP_TID_SET(field, value) \
(((value) & EXP_TID_TID##field##_MASK) << \
EXP_TID_TID##field##_SHIFT)
#define EXP_TID_CLEAR(tid, field) ({ \
(tid) &= ~(EXP_TID_TID##field##_MASK << \
EXP_TID_TID##field##_SHIFT); \
})
#define EXP_TID_RESET(tid, field, value) do { \
EXP_TID_CLEAR(tid, field); \
(tid) |= EXP_TID_SET(field, (value)); \
} while (0)
void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt);
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);

View File

@ -94,43 +94,13 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define BTH_SEQ_MASK 0x7ffull
/*
* Define fields in the KDETH header so we can update the header
* template.
*/
#define KDETH_OFFSET_SHIFT 0
#define KDETH_OFFSET_MASK 0x7fff
#define KDETH_OM_SHIFT 15
#define KDETH_OM_MASK 0x1
#define KDETH_TID_SHIFT 16
#define KDETH_TID_MASK 0x3ff
#define KDETH_TIDCTRL_SHIFT 26
#define KDETH_TIDCTRL_MASK 0x3
#define KDETH_INTR_SHIFT 28
#define KDETH_INTR_MASK 0x1
#define KDETH_SH_SHIFT 29
#define KDETH_SH_MASK 0x1
#define KDETH_HCRC_UPPER_SHIFT 16
#define KDETH_HCRC_UPPER_MASK 0xff
#define KDETH_HCRC_LOWER_SHIFT 24
#define KDETH_HCRC_LOWER_MASK 0xff
#define AHG_KDETH_INTR_SHIFT 12
#define AHG_KDETH_SH_SHIFT 13
#define AHG_KDETH_ARRAY_SIZE 9
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
#define KDETH_GET(val, field) \
(((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
#define KDETH_SET(dw, field, val) do { \
u32 dwval = le32_to_cpu(dw); \
dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
dwval |= (((val) & KDETH_##field##_MASK) << \
KDETH_##field##_SHIFT); \
dw = cpu_to_le32(dwval); \
} while (0)
#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
do { \
if ((idx) < ARRAY_SIZE((arr))) \
@ -141,23 +111,10 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
return -ERANGE; \
} while (0)
/* KDETH OM multipliers and switch over point */
#define KDETH_OM_SMALL 4
#define KDETH_OM_SMALL_SHIFT 2
#define KDETH_OM_LARGE 64
#define KDETH_OM_LARGE_SHIFT 6
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
/* Tx request flag bits */
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
#define SDMA_REQ_SEND_DONE 2
#define SDMA_REQ_HAS_ERROR 3
#define SDMA_REQ_DONE_ERROR 4
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
#define SDMA_PKT_Q_DEFERRED BIT(2)
@ -204,25 +161,41 @@ struct evict_data {
};
struct user_sdma_request {
struct sdma_req_info info;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
/* This is the original header from user space */
struct hfi1_pkt_header hdr;
/* Read mostly fields */
struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
struct hfi1_user_sdma_comp_q *cq;
/*
* Pointer to the SDMA engine for this request.
* Since different request could be on different VLs,
* each request will need it's own engine pointer.
*/
struct sdma_engine *sde;
s8 ahg_idx;
u32 ahg[9];
struct sdma_req_info info;
/* TID array values copied from the tid_iov vector */
u32 *tids;
/* total length of the data in the request */
u32 data_len;
/* number of elements copied to the tids array */
u16 n_tids;
/*
* KDETH.Offset (Eager) field
* We need to remember the initial value so the headers
* can be updated properly.
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
*/
u32 koffset;
u8 data_iovs;
s8 ahg_idx;
/* Writeable fields shared with interrupt */
u64 seqcomp ____cacheline_aligned_in_smp;
u64 seqsubmitted;
/* status of the last txreq completed */
int status;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
u64 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@ -230,29 +203,21 @@ struct user_sdma_request {
*/
u32 tidoffset;
/*
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
* KDETH.Offset (Eager) field
* We need to remember the initial value so the headers
* can be updated properly.
*/
unsigned data_iovs;
/* total length of the data in the request */
u32 data_len;
/* progress index moving along the iovs array */
unsigned iov_idx;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
/* number of elements copied to the tids array */
u16 n_tids;
/* TID array values copied from the tid_iov vector */
u32 *tids;
u16 tididx;
u32 koffset;
u32 sent;
u64 seqnum;
u64 seqcomp;
u64 seqsubmitted;
struct list_head txps;
unsigned long flags;
/* status of the last txreq completed */
int status;
};
/* TID index copied from the tid_iov vector */
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
} ____cacheline_aligned_in_smp;
/*
* A single txreq could span up to 3 physical pages when the MTU
@ -307,7 +272,8 @@ static int defer_packet_queue(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
unsigned int seq);
uint seq,
bool pkts_sent);
static void activate_packet_queue(struct iowait *wait, int reason);
static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
@ -329,7 +295,8 @@ static int defer_packet_queue(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
unsigned seq)
uint seq,
bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
@ -349,7 +316,7 @@ static int defer_packet_queue(
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&dev->iowait_lock);
if (list_empty(&pq->busy.list))
list_add_tail(&pq->busy.list, &sde->dmawait);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
eagain:
@ -379,7 +346,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
unsigned long flags;
if (!uctxt || !fd)
return -EBADF;
@ -393,7 +359,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
if (!pq)
return -ENOMEM;
INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
@ -454,10 +419,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
fd->pq = pq;
fd->cq = cq;
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
list_add(&pq->list, &uctxt->sdma_queues);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
return 0;
pq_mmu_fail:
@ -476,11 +437,10 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
return ret;
}
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq;
unsigned long flags;
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
uctxt->ctxt, fd->subctxt);
@ -488,10 +448,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
if (pq) {
if (pq->handler)
hfi1_mmu_rb_unregister(pq->handler);
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
if (!list_empty(&pq->list))
list_del_init(&pq->list);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
iowait_sdma_drain(&pq->busy);
/* Wait until all requests have been freed. */
wait_event_interruptible(
@ -607,12 +563,20 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
req = pq->reqs + info.comp_idx;
memset(req, 0, sizeof(*req));
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->data_len = 0;
req->pq = pq;
req->cq = cq;
req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
req->seqnum = 0;
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
@ -701,12 +665,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
req->iovs[i].offset = 0;
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov,
iovec + idx++,
sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
req->status = ret;
goto free_req;
}
@ -749,6 +715,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
}
req->tids = tmp;
req->n_tids = ntids;
req->tididx = 0;
idx++;
}
@ -791,12 +758,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
* request have been submitted to the SDMA engine. However, it
* will not wait for send completions.
*/
while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
if (ret != -EBUSY) {
req->status = ret;
set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
WRITE_ONCE(req->has_error, 1);
if (ACCESS_ONCE(req->seqcomp) ==
req->seqsubmitted - 1)
goto free_req;
@ -898,10 +865,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
pq = req->pq;
/* If tx completion has reported an error, we are done. */
if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
if (READ_ONCE(req->has_error))
return -EFAULT;
}
/*
* Check if we might have sent the entire request already
@ -924,10 +889,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
* with errors. If so, we are not going to process any
* more packets from this request.
*/
if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
if (READ_ONCE(req->has_error))
return -EFAULT;
}
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
if (!tx)
@ -1024,11 +987,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
datalen);
if (changes < 0)
goto free_tx;
sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_USE_AHG,
datalen, req->ahg_idx, changes,
req->ahg, sizeof(req->hdr),
user_sdma_txreq_cb);
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@ -1105,7 +1063,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
set_bit(SDMA_REQ_SEND_DONE, &req->flags);
WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@ -1155,14 +1113,23 @@ static int pin_vector_pages(struct user_sdma_request *req,
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
bool extracted;
rb_node = hfi1_mmu_rb_extract(pq->handler,
(unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len);
if (rb_node)
extracted =
hfi1_mmu_rb_remove_unless_exact(pq->handler,
(unsigned long)
iovec->iov.iov_base,
iovec->iov.iov_len, &rb_node);
if (rb_node) {
node = container_of(rb_node, struct sdma_mmu_node, rb);
else
rb_node = NULL;
if (!extracted) {
atomic_inc(&node->refcount);
iovec->pages = node->pages;
iovec->npages = node->npages;
iovec->node = node;
return 0;
}
}
if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
@ -1423,21 +1390,22 @@ static int set_txreq_header(struct user_sdma_request *req,
}
static int set_txreq_header_ahg(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 len)
struct user_sdma_txreq *tx, u32 datalen)
{
u32 ahg[AHG_KDETH_ARRAY_SIZE];
int diff = 0;
u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
AHG_HEADER_SET(ahg, diff, 0, 0, 12,
cpu_to_le16(LRH2PBC(lrhlen)));
/* LRH.PktLen (we need the full 16 bits due to byte swap) */
AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
AHG_HEADER_SET(ahg, diff, 3, 0, 16,
cpu_to_be16(lrhlen >> 2));
}
@ -1449,13 +1417,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
/* KDETH.Offset */
AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
AHG_HEADER_SET(ahg, diff, 15, 0, 16,
cpu_to_le16(req->koffset & 0xffff));
AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
cpu_to_le16(req->koffset >> 16));
AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16));
if (req_opcode(req->info.ctrl) == EXPECTED) {
__le16 val;
@ -1473,9 +1440,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
* we have to check again.
*/
if (++req->tididx > req->n_tids - 1 ||
!req->tids[req->tididx]) {
!req->tids[req->tididx])
return -EINVAL;
}
tidval = req->tids[req->tididx];
}
omfactor = ((EXP_TID_GET(tidval, LEN) *
@ -1483,7 +1449,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
AHG_HEADER_SET(ahg, diff, 7, 0, 16,
((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
((req->tidoffset >> omfactor)
& 0x7fff)));
@ -1503,12 +1469,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_KDETH_INTR_SHIFT));
}
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
AHG_HEADER_SET(ahg, diff, 7, 16, 14, val);
}
if (diff < 0)
return diff;
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
req->info.comp_idx, req->sde->this_idx,
req->ahg_idx, req->ahg, diff, tidval);
req->ahg_idx, ahg, diff, tidval);
sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_USE_AHG,
datalen, req->ahg_idx, diff,
ahg, sizeof(req->hdr),
user_sdma_txreq_cb);
return diff;
}
@ -1537,7 +1511,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
WRITE_ONCE(req->has_error, 1);
}
req->seqcomp = tx->seqnum;
@ -1556,8 +1530,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK)
req->status = status;
if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
(test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
(READ_ONCE(req->done) ||
READ_ONCE(req->has_error))) {
user_sdma_free_request(req, false);
pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status);

View File

@ -56,8 +56,7 @@
extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
struct list_head list;
unsigned ctxt;
u16 ctxt;
u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
@ -82,7 +81,8 @@ struct hfi1_user_sdma_comp_q {
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_filedata *fd);
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt);
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count);

View File

@ -508,13 +508,14 @@ void hfi1_copy_sge(
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
static inline opcode_handler qp_ok(struct hfi1_packet *packet)
{
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
(opcode == IB_OPCODE_CNP))
return opcode_handler_tbl[opcode];
if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
packet->qp->allowed_ops) ||
(packet->opcode == IB_OPCODE_CNP))
return opcode_handler_tbl[packet->opcode];
return NULL;
}
@ -548,69 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
return pbc;
}
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
*
* This is called to process an incoming packet at interrupt level.
*
* Tlen is the length of the header + data + CRC in bytes.
*/
void hfi1_ib_rcv(struct hfi1_packet *packet)
static inline void hfi1_handle_packet(struct hfi1_packet *packet,
bool is_mcast)
{
u32 qp_num;
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler packet_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
u16 lid;
/* Check for GRH */
lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
packet->ohdr = &hdr->u.l.oth;
if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
goto drop;
vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
packet->rcv_flags |= HFI1_HAS_GRH;
} else {
goto drop;
}
trace_input_ibhdr(rcd->dd, hdr);
opcode = ib_bth_get_opcode(packet->ohdr);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
lid = ib_get_dlid(hdr);
if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
if (unlikely(is_mcast)) {
struct rvt_mcast *mcast;
struct rvt_mcast_qp *p;
if (lnh != HFI1_LRH_GRH)
if (!packet->grh)
goto drop;
mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
mcast = rvt_mcast_find(&ibp->rvp,
&packet->grh->dgid,
packet->dlid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@ -624,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
/* Get the destination QP number. */
qp_num = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp) {
rcu_read_unlock();
goto drop;
}
if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
true))) {
rcu_read_unlock();
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@ -645,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
}
return;
drop:
ibp->rvp.n_pkt_drops++;
}
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
*
* This is called to process an incoming packet at interrupt level.
*/
void hfi1_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
bool is_mcast = false;
if (unlikely(hfi1_check_mcast(packet->dlid)))
is_mcast = true;
trace_input_ibhdr(rcd->dd, packet,
!!(packet->rhf & RHF_DC_INFO_SMASK));
hfi1_handle_packet(packet, is_mcast);
}
/*
* This is called from a timer to check for QPs
* which need kernel memory in order to send a packet.
@ -863,7 +849,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
@ -878,14 +864,15 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret))
goto bail_build;
}
ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
return ret;
}
trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
&ps->s_txreq->phdr.hdr);
&ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
return ret;
bail_ecomm:
@ -935,7 +922,8 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
@ -999,7 +987,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u8 opcode = get_opcode(&tx->phdr.hdr);
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
@ -1058,7 +1046,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
&ps->s_txreq->phdr.hdr);
&ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
pio_bail:
if (qp->s_wqe) {
@ -1368,7 +1356,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
props->state = driver_lstate(ppd);
props->phys_state = hfi1_ibphys_portstate(ppd);
props->phys_state = driver_pstate(ppd);
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
@ -1547,13 +1535,22 @@ static void init_ibport(struct hfi1_pportdata *ppd)
ibp->sc_to_sl[i] = i;
}
for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++)
INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list);
setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer,
(unsigned long)ibp);
spin_lock_init(&ibp->rvp.lock);
/* Set the prefix to the default value (see ch. 4.1.1) */
ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
ibp->rvp.sm_lid = 0;
/* Below should only set bits defined in OPA PortInfo.CapabilityMask */
/*
* Below should only set bits defined in OPA PortInfo.CapabilityMask
* and PortInfo.CapabilityMask3
*/
ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
IB_PORT_CAP_MASK_NOTICE_SUP;
ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
@ -1564,14 +1561,13 @@ static void init_ibport(struct hfi1_pportdata *ppd)
RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
size_t str_len)
static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
u32 ver = dd_from_dev(dev)->dc8051_ver;
snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver),
dc8051_ver_min(ver), dc8051_ver_patch(ver));
}

View File

@ -143,6 +143,7 @@ struct hfi1_pkt_state {
unsigned long timeout_int;
int cpu;
bool in_thread;
bool pkts_sent;
};
#define HFI1_PSN_CREDIT 16
@ -236,8 +237,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
/*
* This must be called with s_lock held.
*/
void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
u32 qp1, u32 qp2, u16 lid1, u16 lid2);
void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
u32 qp1, u32 qp2, u16 lid1, u16 lid2);
void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
@ -307,8 +308,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
struct ib_header *hdr,
u32 rcv_flags,
struct hfi1_packet *packet,
struct rvt_qp *qp);
u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
@ -346,8 +346,7 @@ static inline u8 get_opcode(struct ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
const struct ib_global_route *grh, u32 hwords, u32 nwords);

View File

@ -103,6 +103,7 @@ struct hfi1_vnic_sdma {
struct sdma_txreq stx;
unsigned int state;
u8 q_idx;
bool pkts_sent;
};
/**

View File

@ -95,7 +95,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
uctxt->is_vnic = true;
done:
@ -106,7 +106,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
struct hfi1_ctxtdata **vnic_ctxt)
{
struct hfi1_ctxtdata *uctxt;
unsigned int ctxt;
u16 ctxt;
int ret;
if (dd->flags & HFI1_FROZEN)
@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
return ret;
bail:
/*
* hfi1_free_ctxtdata() also releases send_context
* structure if uctxt->sc is not null
* hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will
* release send_context structure if uctxt->sc is not null
*/
dd->rcd[uctxt->ctxt] = NULL;
hfi1_free_ctxtdata(dd, uctxt);
hfi1_rcd_put(uctxt);
dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
return ret;
}
@ -186,7 +186,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
/*
* VNIC contexts are allocated from user context pool.
* Release them back to user context pool.
@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
hfi1_clear_ctxt_pkey(dd, uctxt);
hfi1_stats.sps_ctxts--;
hfi1_free_ctxtdata(dd, uctxt);
hfi1_rcd_put(uctxt);
}
void hfi1_vnic_setup(struct hfi1_devdata *dd)
@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
if (rc)
break;
hfi1_rcd_get(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i]->vnic_q_idx = i;
}
@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
*/
while (i-- > dd->vnic.num_ctxt) {
deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
hfi1_rcd_put(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i] = NULL;
}
goto alloc_fail;
@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
if (--dd->vnic.num_vports == 0) {
for (i = 0; i < dd->vnic.num_ctxt; i++) {
deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
hfi1_rcd_put(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i] = NULL;
}
hfi1_deinit_vnic_rsm(dd);

View File

@ -198,11 +198,16 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc;
tx->retry_count = 0;
ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
if (!ret) {
vnic_sdma->pkts_sent = true;
iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait);
}
return ret;
free_desc:
@ -211,6 +216,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
tx_err:
if (ret != -EBUSY)
dev_kfree_skb_any(skb);
else
vnic_sdma->pkts_sent = false;
return ret;
}
@ -225,7 +232,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
unsigned int seq)
uint seq,
bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait, struct hfi1_vnic_sdma, wait);
@ -239,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
iowait_queue(pkts_sent, wait, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}

View File

@ -1,7 +1,7 @@
config INFINIBAND_HNS
tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
depends on ARM64 && HNS && HNS_DSAF && HNS_ENET
depends on (ARM64 || COMPILE_TEST) && HNS && HNS_DSAF && HNS_ENET
---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
is used in Hisilicon Hi1610 and more further ICT SoC.

View File

@ -32,6 +32,7 @@
*/
#include <linux/platform_device.h>
#include <linux/vmalloc.h>
#include "hns_roce_device.h"
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)

View File

@ -33,6 +33,7 @@
#ifndef _HNS_ROCE_DEVICE_H
#define _HNS_ROCE_DEVICE_H
#include <linux/io.h>
#include <rdma/ib_verbs.h>
#define DRV_NAME "hns_roce"

View File

@ -31,6 +31,7 @@
*/
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_eq.h"
@ -292,7 +293,7 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n",
event_type, eq->eqn, eq->cons_index);
break;
};
}
eq->cons_index++;
aeqes_found = 1;

View File

@ -2023,7 +2023,6 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
u32 notification_flag;
u32 doorbell[2];
int ret = 0;
notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
@ -2043,7 +2042,7 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
return ret;
return 0;
}
static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,

View File

@ -32,6 +32,7 @@
*/
#include <linux/platform_device.h>
#include <linux/vmalloc.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"

View File

@ -799,7 +799,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
cur = hr_wq->head - hr_wq->tail;
if (likely(cur + nreq < hr_wq->max_post))
return 0;
return false;
hr_cq = to_hr_cq(ib_cq);
spin_lock(&hr_cq->lock);

View File

@ -77,7 +77,6 @@ MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2");
MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
static struct i40e_client i40iw_client;
static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw";

View File

@ -2584,13 +2584,12 @@ static const char * const i40iw_hw_stat_names[] = {
"iwRdmaInv"
};
static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str,
size_t str_len)
static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str)
{
u32 firmware_version = I40IW_FW_VERSION;
snprintf(str, str_len, "%u.%u", firmware_version,
(firmware_version & 0x000000ff));
snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", firmware_version,
(firmware_version & 0x000000ff));
}
/**

View File

@ -218,6 +218,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_mtt;
uar = &to_mucontext(context)->uar;
cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
@ -233,6 +234,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_db;
uar = &dev->priv_uar;
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
}
if (dev->eq_table)

View File

@ -70,7 +70,6 @@
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
@ -81,6 +80,8 @@ static const char mlx4_ib_version[] =
DRV_VERSION "\n";
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
u8 port_num);
static struct workqueue_struct *wq;
@ -552,6 +553,16 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
(mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) {
props->rss_caps.max_rwq_indirection_tables = props->max_qp;
props->rss_caps.max_rwq_indirection_table_size =
dev->dev->caps.max_rss_tbl_sz;
props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
props->max_wq_type_rq = props->max_qp;
}
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
@ -563,6 +574,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
}
}
if (uhw->outlen >= resp.response_length +
sizeof(resp.max_inl_recv_sz)) {
resp.response_length += sizeof(resp.max_inl_recv_sz);
resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg *
sizeof(struct mlx4_wqe_data_seg);
}
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@ -1069,6 +1087,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
INIT_LIST_HEAD(&context->wqn_ranges_list);
mutex_init(&context->wqn_ranges_mutex);
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
@ -2566,12 +2587,11 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_fw_ver_str(struct ib_device *device, char *str,
size_t str_len)
static void get_fw_ver_str(struct ib_device *device, char *str)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%d",
snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
(int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
@ -2713,6 +2733,26 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
(mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
IB_LINK_LAYER_ETHERNET))) {
ibdev->ib_dev.create_wq = mlx4_ib_create_wq;
ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq;
ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq;
ibdev->ib_dev.create_rwq_ind_table =
mlx4_ib_create_rwq_ind_table;
ibdev->ib_dev.destroy_rwq_ind_table =
mlx4_ib_destroy_rwq_ind_table;
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
}
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
@ -2772,7 +2812,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
allocated = 0;
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx4_counter_alloc(ibdev->dev, &counter_index);
err = mlx4_counter_alloc(ibdev->dev, &counter_index,
MLX4_RES_USAGE_DRIVER);
/* if failed to allocate a new counter, use default */
if (err)
counter_index =
@ -2827,7 +2868,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
&ibdev->steer_qpn_base, 0);
&ibdev->steer_qpn_base, 0,
MLX4_RES_USAGE_DRIVER);
if (err)
goto err_counter;

View File

@ -46,6 +46,7 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
#include <linux/mlx4/qp.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@ -88,6 +89,8 @@ struct mlx4_ib_ucontext {
struct list_head db_page_list;
struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
struct list_head wqn_ranges_list;
struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};
struct mlx4_ib_pd {
@ -289,8 +292,25 @@ struct mlx4_roce_smac_vlan_info {
int update_vid;
};
struct mlx4_wqn_range {
int base_wqn;
int size;
int refcount;
bool dirty;
struct list_head list;
};
struct mlx4_ib_rss {
unsigned int base_qpn_tbl_sz;
u8 flags;
u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
};
struct mlx4_ib_qp {
struct ib_qp ibqp;
union {
struct ib_qp ibqp;
struct ib_wq ibwq;
};
struct mlx4_qp mqp;
struct mlx4_buf buf;
@ -318,6 +338,7 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
u32 inl_recv_sz;
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
@ -328,6 +349,10 @@ struct mlx4_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct counter_index *counter_index;
struct mlx4_wqn_range *wqn_range;
/* Number of RSS QP parents that uses this WQ */
u32 rss_usecnt;
struct mlx4_ib_rss *rss_ctx;
};
struct mlx4_ib_srq {
@ -623,6 +648,8 @@ struct mlx4_uverbs_ex_query_device_resp {
__u32 comp_mask;
__u32 response_length;
__u64 hca_core_clock_offset;
__u32 max_inl_recv_sz;
__u32 reserved;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@ -890,4 +917,17 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_destroy_wq(struct ib_wq *wq);
int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table
*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#endif /* MLX4_IB_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

View File

@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
MLX5_SET(query_cong_statistics_in, in, clear, reset);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size)
{
u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
MLX5_SET(query_cong_params_in, in, opcode,
MLX5_CMD_OP_QUERY_CONG_PARAMS);
MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
void *in, int in_size)
{
u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}

View File

@ -39,4 +39,8 @@
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
bool reset, void *out, int out_size);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
#endif /* MLX5_IB_CMD_H */

View File

@ -0,0 +1,421 @@
/*
* Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/debugfs.h>
#include "mlx5_ib.h"
#include "cmd.h"
enum mlx5_ib_cong_node_type {
MLX5_IB_RROCE_ECN_RP = 1,
MLX5_IB_RROCE_ECN_NP = 2,
};
static const char * const mlx5_ib_dbg_cc_name[] = {
"rp_clamp_tgt_rate",
"rp_clamp_tgt_rate_ati",
"rp_time_reset",
"rp_byte_reset",
"rp_threshold",
"rp_ai_rate",
"rp_hai_rate",
"rp_min_dec_fac",
"rp_min_rate",
"rp_rate_to_set_on_first_cnp",
"rp_dce_tcp_g",
"rp_dce_tcp_rtt",
"rp_rate_reduce_monitor_period",
"rp_initial_alpha_value",
"rp_gd",
"np_cnp_dscp",
"np_cnp_prio_mode",
"np_cnp_prio",
};
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
#define MLX5_IB_RP_GD_ATTR BIT(16)
#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
static enum mlx5_ib_cong_node_type
mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
{
if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
param_offset <= MLX5_IB_DBG_CC_RP_GD)
return MLX5_IB_RROCE_ECN_RP;
else
return MLX5_IB_RROCE_ECN_NP;
}
static u32 mlx5_get_cc_param_val(void *field, int offset)
{
switch (offset) {
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate);
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate_after_time_inc);
case MLX5_IB_DBG_CC_RP_TIME_RESET:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_time_reset);
case MLX5_IB_DBG_CC_RP_BYTE_RESET:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_byte_reset);
case MLX5_IB_DBG_CC_RP_THRESHOLD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_threshold);
case MLX5_IB_DBG_CC_RP_AI_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_ai_rate);
case MLX5_IB_DBG_CC_RP_HAI_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_hai_rate);
case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_min_dec_fac);
case MLX5_IB_DBG_CC_RP_MIN_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_min_rate);
case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rate_to_set_on_first_cnp);
case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
dce_tcp_g);
case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
dce_tcp_rtt);
case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rate_reduce_monitor_period);
case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
initial_alpha_value);
case MLX5_IB_DBG_CC_RP_GD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_gd);
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_dscp);
case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_prio_mode);
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_802p_prio);
default:
return 0;
}
}
static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
u32 var, u32 *attr_mask)
{
switch (offset) {
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate, var);
break;
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
clamp_tgt_rate_after_time_inc, var);
break;
case MLX5_IB_DBG_CC_RP_TIME_RESET:
*attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_time_reset, var);
break;
case MLX5_IB_DBG_CC_RP_BYTE_RESET:
*attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_byte_reset, var);
break;
case MLX5_IB_DBG_CC_RP_THRESHOLD:
*attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_threshold, var);
break;
case MLX5_IB_DBG_CC_RP_AI_RATE:
*attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_ai_rate, var);
break;
case MLX5_IB_DBG_CC_RP_HAI_RATE:
*attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_hai_rate, var);
break;
case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
*attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_min_dec_fac, var);
break;
case MLX5_IB_DBG_CC_RP_MIN_RATE:
*attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_min_rate, var);
break;
case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
*attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rate_to_set_on_first_cnp, var);
break;
case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
*attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
dce_tcp_g, var);
break;
case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
*attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
dce_tcp_rtt, var);
break;
case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
*attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rate_reduce_monitor_period, var);
break;
case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
*attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
initial_alpha_value, var);
break;
case MLX5_IB_DBG_CC_RP_GD:
*attr_mask |= MLX5_IB_RP_GD_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_gd, var);
break;
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
*attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
break;
case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
break;
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
break;
}
}
static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
void *out;
void *field;
int err;
enum mlx5_ib_cong_node_type node;
out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
node = mlx5_ib_param_to_node(offset);
err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
if (err)
goto free;
field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
*var = mlx5_get_cc_param_val(field, offset);
free:
kvfree(out);
return err;
}
static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
{
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
void *in;
void *field;
enum mlx5_ib_cong_node_type node;
u32 attr_mask = 0;
int err;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(modify_cong_params_in, in, opcode,
MLX5_CMD_OP_MODIFY_CONG_PARAMS);
node = mlx5_ib_param_to_node(offset);
MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
attr_mask);
err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
kvfree(in);
return err;
}
static ssize_t set_param(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_ib_dbg_param *param = filp->private_data;
int offset = param->offset;
char lbuf[11] = { };
u32 var;
int ret;
if (count > sizeof(lbuf))
return -EINVAL;
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
lbuf[sizeof(lbuf) - 1] = '\0';
if (kstrtou32(lbuf, 0, &var))
return -EINVAL;
ret = mlx5_ib_set_cc_params(param->dev, offset, var);
return ret ? ret : count;
}
static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
loff_t *pos)
{
struct mlx5_ib_dbg_param *param = filp->private_data;
int offset = param->offset;
u32 var = 0;
int ret;
char lbuf[11];
if (*pos)
return 0;
ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
if (ret)
return ret;
ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
if (ret < 0)
return ret;
if (copy_to_user(buf, lbuf, ret))
return -EFAULT;
*pos += ret;
return ret;
}
static const struct file_operations dbg_cc_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.write = set_param,
.read = get_param,
};
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
{
if (!mlx5_debugfs_root ||
!dev->dbg_cc_params ||
!dev->dbg_cc_params->root)
return;
debugfs_remove_recursive(dev->dbg_cc_params->root);
kfree(dev->dbg_cc_params);
dev->dbg_cc_params = NULL;
}
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
int i;
if (!mlx5_debugfs_root)
goto out;
if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
goto out;
dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
if (!dbg_cc_params)
goto out;
dev->dbg_cc_params = dbg_cc_params;
dbg_cc_params->root = debugfs_create_dir("cc_params",
dev->mdev->priv.dbg_root);
if (!dbg_cc_params->root)
goto err;
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
dbg_cc_params->params[i].dentry =
debugfs_create_file(mlx5_ib_dbg_cc_name[i],
0600, dbg_cc_params->root,
&dbg_cc_params->params[i],
&dbg_cc_fops);
if (!dbg_cc_params->params[i].dentry)
goto err;
}
out: return 0;
err:
mlx5_ib_warn(dev, "cong debugfs failure\n");
mlx5_ib_cleanup_cong_debugfs(dev);
/*
* We don't want to fail driver if debugfs failed to initialize,
* so we are not forwarding error to the user.
*/
return 0;
}

View File

@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
}
in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
if (!err)
vfs_ctx[vf].policy = in->policy;
out:
kfree(in);
@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
if (!err)
vfs_ctx[vf].node_guid = guid;
kfree(in);
return err;
}
@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
if (!err)
vfs_ctx[vf].port_guid = guid;
kfree(in);
return err;
}

View File

@ -30,6 +30,7 @@
* SOFTWARE.
*/
#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
@ -58,6 +59,7 @@
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@ -65,7 +67,6 @@
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
@ -97,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
static int get_port_state(struct ib_device *ibdev,
u8 port_num,
enum ib_port_state *state)
{
struct ib_port_attr attr;
int ret;
memset(&attr, 0, sizeof(attr));
ret = mlx5_ib_query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
}
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@ -114,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
write_unlock(&ibdev->roce.netdev_lock);
break;
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
@ -127,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this,
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
enum ib_port_state port_state;
if (get_port_state(&ibdev->ib_dev, 1, &port_state))
return NOTIFY_DONE;
if (ibdev->roce.last_port_state == port_state)
return NOTIFY_DONE;
ibdev->roce.last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
ibev.event = (event == NETDEV_UP) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
if (port_state == IB_PORT_DOWN)
ibev.event = IB_EVENT_PORT_ERR;
else if (port_state == IB_PORT_ACTIVE)
ibev.event = IB_EVENT_PORT_ACTIVE;
else
return NOTIFY_DONE;
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
@ -668,6 +697,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
MLX5_CAP_GEN(dev->mdev, general_notification_event))
props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
@ -1138,7 +1175,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
return -EINVAL;
mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n",
MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
lib_uar_4k ? "yes" : "no", ref_bfregs,
req->total_num_bfregs, *num_sys_pages);
@ -1187,6 +1224,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
return 0;
}
static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
{
int err;
err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
if (err)
return err;
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
!MLX5_CAP_GEN(dev->mdev, disable_local_lb))
return err;
mutex_lock(&dev->lb_mutex);
dev->user_td++;
if (dev->user_td == 2)
err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
mutex_unlock(&dev->lb_mutex);
return err;
}
static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
{
mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
!MLX5_CAP_GEN(dev->mdev, disable_local_lb))
return;
mutex_lock(&dev->lb_mutex);
dev->user_td--;
if (dev->user_td < 2)
mlx5_nic_vport_update_local_lb(dev->mdev, false);
mutex_unlock(&dev->lb_mutex);
}
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@ -1295,8 +1371,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
goto out_page;
}
@ -1362,7 +1437,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
@ -1390,7 +1465,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
@ -2030,21 +2105,32 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
*/
static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
{
struct ib_flow_spec_eth *eth_spec;
union ib_flow_spec *flow_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
ib_attr->size < sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
if (eth_spec->type != IB_FLOW_SPEC_ETH ||
eth_spec->size != sizeof(*eth_spec))
return false;
flow_spec = (union ib_flow_spec *)(ib_attr + 1);
if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
struct ib_flow_spec_ipv4 *ipv4_spec;
return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
is_multicast_ether_addr(eth_spec->val.dst_mac);
ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
return true;
return false;
}
if (flow_spec->type == IB_FLOW_SPEC_ETH) {
struct ib_flow_spec_eth *eth_spec;
eth_spec = (struct ib_flow_spec_eth *)flow_spec;
return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
return false;
}
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
@ -2522,8 +2608,14 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
return -EOPNOTSUPP;
}
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
@ -2685,6 +2777,26 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
static void delay_drop_handler(struct work_struct *work)
{
int err;
struct mlx5_ib_delay_drop *delay_drop =
container_of(work, struct mlx5_ib_delay_drop,
delay_drop_work);
atomic_inc(&delay_drop->events_cnt);
mutex_lock(&delay_drop->lock);
err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
delay_drop->timeout);
if (err) {
mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
delay_drop->timeout);
delay_drop->activate = false;
}
mutex_unlock(&delay_drop->lock);
}
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@ -2737,8 +2849,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
schedule_work(&ibdev->delay_drop.delay_drop_work);
goto out;
default:
return;
goto out;
}
ibev.device = &ibdev->ib_dev;
@ -2746,7 +2861,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
return;
goto out;
}
if (ibdev->ib_active)
@ -2754,6 +2869,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
out:
return;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@ -3167,13 +3285,13 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *ibdev, char *str,
size_t str_len)
static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
container_of(ibdev, struct mlx5_ib_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
fw_rev_sub(dev->mdev));
}
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
@ -3313,6 +3431,17 @@ static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(np_cnp_sent),
};
static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(resp_local_length_error),
INIT_Q_COUNTER(resp_cqe_error),
INIT_Q_COUNTER(req_cqe_error),
INIT_Q_COUNTER(req_remote_invalid_request),
INIT_Q_COUNTER(req_remote_access_errors),
INIT_Q_COUNTER(resp_remote_access_errors),
INIT_Q_COUNTER(resp_cqe_flush_error),
INIT_Q_COUNTER(req_cqe_flush_error),
};
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
@ -3337,6 +3466,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
num_counters += ARRAY_SIZE(extended_err_cnts);
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@ -3386,6 +3519,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
}
}
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
names[j] = extended_err_cnts[i].name;
offsets[j] = extended_err_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
@ -3556,6 +3696,126 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
return netdev;
}
static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
if (!dev->delay_drop.dbg)
return;
debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
kfree(dev->delay_drop.dbg);
dev->delay_drop.dbg = NULL;
}
static void cancel_delay_drop(struct mlx5_ib_dev *dev)
{
if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
return;
cancel_work_sync(&dev->delay_drop.delay_drop_work);
delay_drop_debugfs_cleanup(dev);
}
static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
char lbuf[20];
int len;
len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
return simple_read_from_buffer(buf, count, pos, lbuf, len);
}
static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
u32 timeout;
u32 var;
if (kstrtouint_from_user(buf, count, 0, &var))
return -EFAULT;
timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
1000);
if (timeout != var)
mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
timeout);
delay_drop->timeout = timeout;
return count;
}
static const struct file_operations fops_delay_drop_timeout = {
.owner = THIS_MODULE,
.open = simple_open,
.write = delay_drop_timeout_write,
.read = delay_drop_timeout_read,
};
static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_dbg_delay_drop *dbg;
if (!mlx5_debugfs_root)
return 0;
dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
if (!dbg)
return -ENOMEM;
dbg->dir_debugfs =
debugfs_create_dir("delay_drop",
dev->mdev->priv.dbg_root);
if (!dbg->dir_debugfs)
return -ENOMEM;
dbg->events_cnt_debugfs =
debugfs_create_atomic_t("num_timeout_events", 0400,
dbg->dir_debugfs,
&dev->delay_drop.events_cnt);
if (!dbg->events_cnt_debugfs)
goto out_debugfs;
dbg->rqs_cnt_debugfs =
debugfs_create_atomic_t("num_rqs", 0400,
dbg->dir_debugfs,
&dev->delay_drop.rqs_cnt);
if (!dbg->rqs_cnt_debugfs)
goto out_debugfs;
dbg->timeout_debugfs =
debugfs_create_file("timeout", 0600,
dbg->dir_debugfs,
&dev->delay_drop,
&fops_delay_drop_timeout);
if (!dbg->timeout_debugfs)
goto out_debugfs;
return 0;
out_debugfs:
delay_drop_debugfs_cleanup(dev);
return -ENOMEM;
}
static void init_delay_drop(struct mlx5_ib_dev *dev)
{
if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
return;
mutex_init(&dev->delay_drop.lock);
dev->delay_drop.dev = dev;
dev->delay_drop.activate = false;
dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
atomic_set(&dev->delay_drop.rqs_cnt, 0);
atomic_set(&dev->delay_drop.events_cnt, 0);
if (delay_drop_debugfs_init(dev))
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@ -3723,18 +3983,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
@ -3754,6 +4016,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
dev->roce.last_port_state = IB_PORT_DOWN;
}
err = create_dev_resources(&dev->devr);
@ -3770,9 +4033,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_odp;
}
err = mlx5_ib_init_cong_debugfs(dev);
if (err)
goto err_cnt;
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
goto err_cnt;
goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@ -3790,18 +4057,25 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_dev;
init_delay_drop(dev);
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
goto err_umrc;
goto err_delay_drop;
}
if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
MLX5_CAP_GEN(mdev, disable_local_lb))
mutex_init(&dev->lb_mutex);
dev->ib_active = true;
return dev;
err_umrc:
err_delay_drop:
cancel_delay_drop(dev);
destroy_umrc_res(dev);
err_dev:
@ -3817,6 +4091,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
mlx5_ib_cleanup_cong_debugfs(dev);
err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@ -3846,11 +4122,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
cancel_delay_drop(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);

View File

@ -247,6 +247,10 @@ struct mlx5_ib_wq {
void *qend;
};
enum mlx5_ib_wq_flags {
MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1,
};
struct mlx5_ib_rwq {
struct ib_wq ibwq;
struct mlx5_core_qp core_qp;
@ -264,6 +268,7 @@ struct mlx5_ib_rwq {
u32 wqe_count;
u32 wqe_shift;
int wq_sig;
u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
enum {
@ -378,6 +383,7 @@ struct mlx5_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
u32 rate_limit;
u32 underlay_qpn;
};
struct mlx5_ib_cq_buf {
@ -399,6 +405,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
MLX5_IB_QP_UNDERLAY = 1 << 10,
};
struct mlx5_umr_wr {
@ -616,6 +623,63 @@ struct mlx5_roce {
struct net_device *netdev;
struct notifier_block nb;
atomic_t next_port;
enum ib_port_state last_port_state;
};
struct mlx5_ib_dbg_param {
int offset;
struct mlx5_ib_dev *dev;
struct dentry *dentry;
};
enum mlx5_ib_dbg_cc_types {
MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE,
MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI,
MLX5_IB_DBG_CC_RP_TIME_RESET,
MLX5_IB_DBG_CC_RP_BYTE_RESET,
MLX5_IB_DBG_CC_RP_THRESHOLD,
MLX5_IB_DBG_CC_RP_AI_RATE,
MLX5_IB_DBG_CC_RP_HAI_RATE,
MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
MLX5_IB_DBG_CC_RP_MIN_RATE,
MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP,
MLX5_IB_DBG_CC_RP_DCE_TCP_G,
MLX5_IB_DBG_CC_RP_DCE_TCP_RTT,
MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
MLX5_IB_DBG_CC_RP_GD,
MLX5_IB_DBG_CC_NP_CNP_DSCP,
MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
MLX5_IB_DBG_CC_NP_CNP_PRIO,
MLX5_IB_DBG_CC_MAX,
};
struct mlx5_ib_dbg_cc_params {
struct dentry *root;
struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX];
};
enum {
MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
};
struct mlx5_ib_dbg_delay_drop {
struct dentry *dir_debugfs;
struct dentry *rqs_cnt_debugfs;
struct dentry *events_cnt_debugfs;
struct dentry *timeout_debugfs;
};
struct mlx5_ib_delay_drop {
struct mlx5_ib_dev *dev;
struct work_struct delay_drop_work;
/* serialize setting of delay drop */
struct mutex lock;
u32 timeout;
bool activate;
atomic_t events_cnt;
atomic_t rqs_cnt;
struct mlx5_ib_dbg_delay_drop *dbg;
};
struct mlx5_ib_dev {
@ -652,9 +716,15 @@ struct mlx5_ib_dev {
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg fp_bfreg;
u8 umr_fence;
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
/* protect the user_td */
struct mutex lb_mutex;
u32 user_td;
u8 umr_fence;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@ -904,6 +974,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev);
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr);

View File

@ -48,6 +48,7 @@ enum {
#define MLX5_UMR_ALIGN 2048
static int clean_mr(struct mlx5_ib_mr *mr);
static int max_umr_order(struct mlx5_ib_dev *dev);
static int use_umr(struct mlx5_ib_dev *dev, int order);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
@ -491,16 +492,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_ib_mr *mr = NULL;
struct mlx5_cache_ent *ent;
int last_umr_cache_entry;
int c;
int i;
c = order2idx(dev, order);
if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
last_umr_cache_entry = order2idx(dev, max_umr_order(dev));
if (c < 0 || c > last_umr_cache_entry) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
for (i = c; i <= last_umr_cache_entry; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@ -816,11 +819,16 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
return (npages + 1) / 2;
}
static int use_umr(struct mlx5_ib_dev *dev, int order)
static int max_umr_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
return order <= MAX_UMR_CACHE_ENTRY + 2;
return order <= MLX5_MAX_UMR_SHIFT;
return MAX_UMR_CACHE_ENTRY + 2;
return MLX5_MAX_UMR_SHIFT;
}
static int use_umr(struct mlx5_ib_dev *dev, int order)
{
return order <= max_umr_order(dev);
}
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,

View File

@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (qp->ibqp.qp_type != IB_QPT_RC) {
av = *wqe;
if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
else
*wqe += sizeof(struct mlx5_base_av);

View File

@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
/* not supported currently */
@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
if (attr->qp_type == IB_QPT_RAW_PACKET) {
if (attr->qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@ -675,10 +677,14 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
return err;
}
static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_rwq *rwq)
{
struct mlx5_ib_ucontext *context;
if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
atomic_dec(&dev->delay_drop.rqs_cnt);
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
if (rwq->umem)
@ -1021,12 +1027,16 @@ static int is_connected(enum ib_qp_type qp_type)
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
struct mlx5_ib_sq *sq, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(tisc, tisc, transport_domain, tdn);
if (qp->flags & MLX5_IB_QP_UNDERLAY)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
}
@ -1229,7 +1239,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 tdn = mucontext->tdn;
if (qp->sq.wqe_cnt) {
err = create_raw_packet_qp_tis(dev, sq, tdn);
err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
if (err)
return err;
@ -1502,10 +1512,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 *in;
int err;
base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@ -1587,10 +1593,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
if (init_attr->qp_type != IB_QPT_UD ||
(MLX5_CAP_GEN(dev->mdev, port_type) !=
MLX5_CAP_PORT_TYPE_IB) ||
!mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) {
mlx5_ib_dbg(dev, "Source QP option isn't supported\n");
return -EOPNOTSUPP;
}
qp->flags |= MLX5_IB_QP_UNDERLAY;
qp->underlay_qpn = init_attr->source_qpn;
}
} else {
qp->wq_sig = !!wq_signature;
}
base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
qp, (pd && pd->uobject) ? &ucmd : NULL);
@ -1741,7 +1765,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX5_IB_QP_LSO;
}
if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, pd);
@ -1893,7 +1918,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_qp_base *base;
unsigned long flags;
int err;
@ -1902,12 +1927,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
return;
}
base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
!(qp->flags & MLX5_IB_QP_UNDERLAY)) {
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
@ -1946,7 +1973,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) {
destroy_raw_packet_qp(dev, qp);
} else {
err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
@ -2702,7 +2730,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
} else if (ibqp->qp_type == IB_QPT_UD ||
} else if ((ibqp->qp_type == IB_QPT_UD &&
!(qp->flags & MLX5_IB_QP_UNDERLAY)) ||
ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
@ -2799,6 +2828,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
/* Underlay port should be used - index 0 function per port */
if (qp->flags & MLX5_IB_QP_UNDERLAY)
port_num = 0;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
@ -2824,7 +2858,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
@ -2913,7 +2948,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
if (qp_type != MLX5_IB_QPT_REG_UMR &&
if (qp->flags & MLX5_IB_QP_UNDERLAY) {
if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
attr_mask);
goto out;
}
} else if (qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
@ -4477,9 +4518,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
/* Not all of output fields are applicable, make sure to zero them */
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
memset(qp_attr, 0, sizeof(*qp_attr));
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@ -4597,6 +4643,27 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
}
}
static int set_delay_drop(struct mlx5_ib_dev *dev)
{
int err = 0;
mutex_lock(&dev->delay_drop.lock);
if (dev->delay_drop.activate)
goto out;
err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout);
if (err)
goto out;
dev->delay_drop.activate = true;
out:
mutex_unlock(&dev->delay_drop.lock);
if (!err)
atomic_inc(&dev->delay_drop.rqs_cnt);
return err;
}
static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
@ -4651,9 +4718,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
}
MLX5_SET(rqc, rqc, scatter_fcs, 1);
}
if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
if (!(dev->ib_dev.attrs.raw_packet_caps &
IB_RAW_PACKET_CAP_DELAY_DROP)) {
mlx5_ib_dbg(dev, "Delay drop is not supported\n");
err = -EOPNOTSUPP;
goto out;
}
MLX5_SET(rqc, rqc, delay_drop_en, 1);
}
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
err = set_delay_drop(dev);
if (err) {
mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
err);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
} else {
rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
}
}
out:
kvfree(in);
return err;
@ -4787,7 +4873,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
err_copy:
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
err_user_rq:
destroy_user_rq(pd, rwq);
destroy_user_rq(dev, pd, rwq);
err:
kfree(rwq);
return ERR_PTR(err);
@ -4799,7 +4885,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq)
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
destroy_user_rq(wq->pd, rwq);
destroy_user_rq(dev, wq->pd, rwq);
kfree(rwq);
return 0;

View File

@ -49,7 +49,6 @@
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG

View File

@ -1178,12 +1178,11 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static void get_dev_fw_str(struct ib_device *device, char *str,
size_t str_len)
static void get_dev_fw_str(struct ib_device *device, char *str)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%d",
snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
(int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);

Some files were not shown because too many files have changed in this diff Show More