mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-19 01:36:13 +07:00
First merge window pull request
This has been a smaller cycle with many of the commits being smallish code fixes and improvements across the drivers. - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and rxe - Memory window support in hns - mlx5 user API 'flow mutate/steering' allows accessing the full packet mangling and matching machinery from user space - Support inter-working with verbs API calls in the 'devx' mlx5 user API, and provide options to use devx with less privilege - Modernize the use of syfs and the device interface to use attribute groups and cdev properly for uverbs, and clean up some of the core code's device list management - More progress on net namespaces for RDMA devices - Consolidate driver BAR mmapping support into core code helpers and rework how RDMA holds poitners to mm_struct for get_user_pages cases - First pass to use 'dev_name' instead of ib_device->name - Device renaming for RDMA devices -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAlvR7dUACgkQOG33FX4g mxojiw//a9GU5kq4IZ3LNAEio/3Ql/NHRF0uie5tSzJgipRJA1Ln9zW0Cm1S/ms1 VCmaSJ3l3q3GC4i3tIlsZSIIkN5qtjv/FsT/i+TZwSJYx9BDpPbzWtG6Mp4PSDj0 v3xzklFCN5HMOmEcjkNmyZw3VjHOt2Iw2mKjqvGbI9imCPLOYnw+WQaZLmMWMH6p GL0HDbAopN5Lv8ireWd8pOhPLVbSb12cWM1crx+yHOS3q8YNWjIXGiZr/QkOPtPr cymSXB8yuITJ7gnjbs/GxZHg6rxU0knC/Ck8hE7FqqYYHgytTklOXDE2ef1J2lFe 1VmotD+nTsCir0mZWSdcRrszEk7tzaZT7n1oWggKvWySDB6qaH0II8vWumJchQnN pElIQn/WDgpekIqplamNqXJnKnDXZJpEVA01OHHDN4MNSc+Ad08hQy4FyFzpB6/G jv9TnDMfGC6ma9pr1ipOXyCgCa2pHYEUCaYxUqRA0O/4ATVl7/PplqT0rqtJ6hKg o/hmaVCawIFOUKD87/bo7Em2HBs3xNwE/c5ggbsQElLYeydrgPrZfrPfjkshv5K3 eIKDb+HPyis0is1aiF7m/bz1hSIYZp0bQhuKCdzLRjZobwCm5WDPhtuuAWb7vYVw GSLCJWyet+bLyZxynNOt67gKm9je9lt8YTr5nilz49KeDytspK0= =pacJ -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull rdma updates from Jason Gunthorpe: "This has been a smaller cycle with many of the commits being smallish code fixes and improvements across the drivers. - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and rxe - Memory window support in hns - mlx5 user API 'flow mutate/steering' allows accessing the full packet mangling and matching machinery from user space - Support inter-working with verbs API calls in the 'devx' mlx5 user API, and provide options to use devx with less privilege - Modernize the use of syfs and the device interface to use attribute groups and cdev properly for uverbs, and clean up some of the core code's device list management - More progress on net namespaces for RDMA devices - Consolidate driver BAR mmapping support into core code helpers and rework how RDMA holds poitners to mm_struct for get_user_pages cases - First pass to use 'dev_name' instead of ib_device->name - Device renaming for RDMA devices" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits) IB/mlx5: Add support for extended atomic operations RDMA/core: Fix comment for hw stats init for port == 0 RDMA/core: Refactor ib_register_device() function RDMA/core: Fix unwinding flow in case of error to register device ib_srp: Remove WARN_ON in srp_terminate_io() IB/mlx5: Allow scatter to CQE without global signaled WRs IB/mlx5: Verify that driver supports user flags IB/mlx5: Support scatter to CQE for DC transport type RDMA/drivers: Use core provided API for registering device attributes RDMA/core: Allow existing drivers to set one sysfs group per device IB/rxe: Remove unnecessary enum values RDMA/umad: Use kernel API to allocate umad indexes RDMA/uverbs: Use kernel API to allocate uverbs indexes RDMA/core: Increase total number of RDMA ports across all devices IB/mlx4: Add port and TID to MAD debug print IB/mlx4: Enable debug print of SMPs RDMA/core: Rename ports_parent to ports_kobj RDMA/core: Do not expose unsupported counters IB/mlx4: Refer to the device kobject instead of ports_parent RDMA/nldev: Allow IB device rename through RDMA netlink ...
This commit is contained in:
commit
da19a102ce
@ -91,6 +91,24 @@ Description:
|
||||
stacked (e.g: VLAN interfaces) but still have the same MAC
|
||||
address as their parent device.
|
||||
|
||||
What: /sys/class/net/<iface>/dev_port
|
||||
Date: February 2014
|
||||
KernelVersion: 3.15
|
||||
Contact: netdev@vger.kernel.org
|
||||
Description:
|
||||
Indicates the port number of this network device, formatted
|
||||
as a decimal value. Some NICs have multiple independent ports
|
||||
on the same PCI bus, device and function. This attribute allows
|
||||
userspace to distinguish the respective interfaces.
|
||||
|
||||
Note: some device drivers started to use 'dev_id' for this
|
||||
purpose since long before 3.15 and have not adopted the new
|
||||
attribute ever since. To query the port number, some tools look
|
||||
exclusively at 'dev_port', while others only consult 'dev_id'.
|
||||
If a network device has multiple client adapter ports as
|
||||
described in the previous paragraph and does not set this
|
||||
attribute to its port number, it's a kernel bug.
|
||||
|
||||
What: /sys/class/net/<iface>/dormant
|
||||
Date: March 2006
|
||||
KernelVersion: 2.6.17
|
||||
|
@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
|
||||
config INFINIBAND_USER_ACCESS
|
||||
tristate "InfiniBand userspace access (verbs and CM)"
|
||||
select ANON_INODES
|
||||
depends on MMU
|
||||
---help---
|
||||
Userspace InfiniBand access support. This enables the
|
||||
kernel side of userspace verbs and the userspace
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <net/addrconf.h>
|
||||
#include <net/ip6_route.h>
|
||||
#include <rdma/ib_addr.h>
|
||||
#include <rdma/ib_sa.h>
|
||||
#include <rdma/ib.h>
|
||||
#include <rdma/rdma_netlink.h>
|
||||
#include <net/netlink.h>
|
||||
@ -61,6 +62,7 @@ struct addr_req {
|
||||
struct rdma_dev_addr *addr, void *context);
|
||||
unsigned long timeout;
|
||||
struct delayed_work work;
|
||||
bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
|
||||
int status;
|
||||
u32 seq;
|
||||
};
|
||||
@ -219,18 +221,54 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_addr_size_kss);
|
||||
|
||||
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
|
||||
const struct net_device *dev,
|
||||
const unsigned char *dst_dev_addr)
|
||||
/**
|
||||
* rdma_copy_src_l2_addr - Copy netdevice source addresses
|
||||
* @dev_addr: Destination address pointer where to copy the addresses
|
||||
* @dev: Netdevice whose source addresses to copy
|
||||
*
|
||||
* rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
|
||||
* This includes unicast address, broadcast address, device type and
|
||||
* interface index.
|
||||
*/
|
||||
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
|
||||
const struct net_device *dev)
|
||||
{
|
||||
dev_addr->dev_type = dev->type;
|
||||
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
|
||||
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
|
||||
if (dst_dev_addr)
|
||||
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
|
||||
dev_addr->bound_dev_if = dev->ifindex;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_copy_addr);
|
||||
EXPORT_SYMBOL(rdma_copy_src_l2_addr);
|
||||
|
||||
static struct net_device *
|
||||
rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
|
||||
{
|
||||
struct net_device *dev = NULL;
|
||||
int ret = -EADDRNOTAVAIL;
|
||||
|
||||
switch (src_in->sa_family) {
|
||||
case AF_INET:
|
||||
dev = __ip_dev_find(net,
|
||||
((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
|
||||
false);
|
||||
if (dev)
|
||||
ret = 0;
|
||||
break;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
for_each_netdev_rcu(net, dev) {
|
||||
if (ipv6_chk_addr(net,
|
||||
&((const struct sockaddr_in6 *)src_in)->sin6_addr,
|
||||
dev, 1)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return ret ? ERR_PTR(ret) : dev;
|
||||
}
|
||||
|
||||
int rdma_translate_ip(const struct sockaddr *addr,
|
||||
struct rdma_dev_addr *dev_addr)
|
||||
@ -241,38 +279,17 @@ int rdma_translate_ip(const struct sockaddr *addr,
|
||||
dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
rdma_copy_addr(dev_addr, dev, NULL);
|
||||
rdma_copy_src_l2_addr(dev_addr, dev);
|
||||
dev_put(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (addr->sa_family) {
|
||||
case AF_INET:
|
||||
dev = ip_dev_find(dev_addr->net,
|
||||
((const struct sockaddr_in *)addr)->sin_addr.s_addr);
|
||||
|
||||
if (!dev)
|
||||
return -EADDRNOTAVAIL;
|
||||
|
||||
rdma_copy_addr(dev_addr, dev, NULL);
|
||||
dev_put(dev);
|
||||
break;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
rcu_read_lock();
|
||||
for_each_netdev_rcu(dev_addr->net, dev) {
|
||||
if (ipv6_chk_addr(dev_addr->net,
|
||||
&((const struct sockaddr_in6 *)addr)->sin6_addr,
|
||||
dev, 1)) {
|
||||
rdma_copy_addr(dev_addr, dev, NULL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
rcu_read_lock();
|
||||
dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
|
||||
if (!IS_ERR(dev))
|
||||
rdma_copy_src_l2_addr(dev_addr, dev);
|
||||
rcu_read_unlock();
|
||||
return PTR_ERR_OR_ZERO(dev);
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_translate_ip);
|
||||
|
||||
@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
|
||||
spin_unlock_bh(&lock);
|
||||
}
|
||||
|
||||
static int ib_nl_fetch_ha(const struct dst_entry *dst,
|
||||
struct rdma_dev_addr *dev_addr,
|
||||
static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
|
||||
const void *daddr, u32 seq, u16 family)
|
||||
{
|
||||
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
|
||||
if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
|
||||
return -EADDRNOTAVAIL;
|
||||
|
||||
/* We fill in what we can, the response will fill the rest */
|
||||
rdma_copy_addr(dev_addr, dst->dev, NULL);
|
||||
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
|
||||
}
|
||||
|
||||
@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
|
||||
neigh_event_send(n, NULL);
|
||||
ret = -ENODATA;
|
||||
} else {
|
||||
rdma_copy_addr(dev_addr, dst->dev, n->ha);
|
||||
memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
|
||||
}
|
||||
|
||||
neigh_release(n);
|
||||
@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
|
||||
(const void *)&dst_in6->sin6_addr;
|
||||
sa_family_t family = dst_in->sa_family;
|
||||
|
||||
/* Gateway + ARPHRD_INFINIBAND -> IB router */
|
||||
if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
|
||||
return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
|
||||
/* If we have a gateway in IB mode then it must be an IB network */
|
||||
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
|
||||
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
|
||||
else
|
||||
return dst_fetch_ha(dst, dev_addr, daddr);
|
||||
}
|
||||
|
||||
static int addr4_resolve(struct sockaddr_in *src_in,
|
||||
const struct sockaddr_in *dst_in,
|
||||
static int addr4_resolve(struct sockaddr *src_sock,
|
||||
const struct sockaddr *dst_sock,
|
||||
struct rdma_dev_addr *addr,
|
||||
struct rtable **prt)
|
||||
{
|
||||
struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
|
||||
const struct sockaddr_in *dst_in =
|
||||
(const struct sockaddr_in *)dst_sock;
|
||||
|
||||
__be32 src_ip = src_in->sin_addr.s_addr;
|
||||
__be32 dst_ip = dst_in->sin_addr.s_addr;
|
||||
struct rtable *rt;
|
||||
@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
src_in->sin_family = AF_INET;
|
||||
src_in->sin_addr.s_addr = fl4.saddr;
|
||||
|
||||
/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
|
||||
* definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
|
||||
* type accordingly.
|
||||
*/
|
||||
if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
|
||||
addr->network = RDMA_NETWORK_IPV4;
|
||||
|
||||
addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
|
||||
|
||||
*prt = rt;
|
||||
@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static int addr6_resolve(struct sockaddr_in6 *src_in,
|
||||
const struct sockaddr_in6 *dst_in,
|
||||
static int addr6_resolve(struct sockaddr *src_sock,
|
||||
const struct sockaddr *dst_sock,
|
||||
struct rdma_dev_addr *addr,
|
||||
struct dst_entry **pdst)
|
||||
{
|
||||
struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
|
||||
const struct sockaddr_in6 *dst_in =
|
||||
(const struct sockaddr_in6 *)dst_sock;
|
||||
struct flowi6 fl6;
|
||||
struct dst_entry *dst;
|
||||
struct rt6_info *rt;
|
||||
int ret;
|
||||
|
||||
memset(&fl6, 0, sizeof fl6);
|
||||
@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
rt = (struct rt6_info *)dst;
|
||||
if (ipv6_addr_any(&src_in->sin6_addr)) {
|
||||
src_in->sin6_family = AF_INET6;
|
||||
if (ipv6_addr_any(&src_in->sin6_addr))
|
||||
src_in->sin6_addr = fl6.saddr;
|
||||
}
|
||||
|
||||
/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
|
||||
* definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
|
||||
* type accordingly.
|
||||
*/
|
||||
if (rt->rt6i_flags & RTF_GATEWAY &&
|
||||
ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
|
||||
addr->network = RDMA_NETWORK_IPV6;
|
||||
|
||||
addr->hoplimit = ip6_dst_hoplimit(dst);
|
||||
|
||||
@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int addr6_resolve(struct sockaddr_in6 *src_in,
|
||||
const struct sockaddr_in6 *dst_in,
|
||||
static int addr6_resolve(struct sockaddr *src_sock,
|
||||
const struct sockaddr *dst_sock,
|
||||
struct rdma_dev_addr *addr,
|
||||
struct dst_entry **pdst)
|
||||
{
|
||||
@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
|
||||
static int addr_resolve_neigh(const struct dst_entry *dst,
|
||||
const struct sockaddr *dst_in,
|
||||
struct rdma_dev_addr *addr,
|
||||
unsigned int ndev_flags,
|
||||
u32 seq)
|
||||
{
|
||||
if (dst->dev->flags & IFF_LOOPBACK) {
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
ret = rdma_translate_ip(dst_in, addr);
|
||||
if (!ret)
|
||||
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
|
||||
MAX_ADDR_LEN);
|
||||
if (ndev_flags & IFF_LOOPBACK) {
|
||||
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
|
||||
} else {
|
||||
if (!(ndev_flags & IFF_NOARP)) {
|
||||
/* If the device doesn't do ARP internally */
|
||||
ret = fetch_ha(dst, addr, dst_in, seq);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
|
||||
const struct sockaddr *dst_in,
|
||||
const struct dst_entry *dst,
|
||||
const struct net_device *ndev)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (dst->dev->flags & IFF_LOOPBACK)
|
||||
ret = rdma_translate_ip(dst_in, dev_addr);
|
||||
else
|
||||
rdma_copy_src_l2_addr(dev_addr, dst->dev);
|
||||
|
||||
/*
|
||||
* If there's a gateway and type of device not ARPHRD_INFINIBAND,
|
||||
* we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
|
||||
* network type accordingly.
|
||||
*/
|
||||
if (has_gateway(dst, dst_in->sa_family) &&
|
||||
ndev->type != ARPHRD_INFINIBAND)
|
||||
dev_addr->network = dst_in->sa_family == AF_INET ?
|
||||
RDMA_NETWORK_IPV4 :
|
||||
RDMA_NETWORK_IPV6;
|
||||
else
|
||||
dev_addr->network = RDMA_NETWORK_IB;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
|
||||
unsigned int *ndev_flags,
|
||||
const struct sockaddr *dst_in,
|
||||
const struct dst_entry *dst)
|
||||
{
|
||||
struct net_device *ndev = READ_ONCE(dst->dev);
|
||||
|
||||
*ndev_flags = ndev->flags;
|
||||
/* A physical device must be the RDMA device to use */
|
||||
if (ndev->flags & IFF_LOOPBACK) {
|
||||
/*
|
||||
* RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
|
||||
* loopback IP address. So if route is resolved to loopback
|
||||
* interface, translate that to a real ndev based on non
|
||||
* loopback IP address.
|
||||
*/
|
||||
ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
|
||||
if (IS_ERR(ndev))
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* If the device doesn't do ARP internally */
|
||||
if (!(dst->dev->flags & IFF_NOARP))
|
||||
return fetch_ha(dst, addr, dst_in, seq);
|
||||
return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
|
||||
}
|
||||
|
||||
rdma_copy_addr(addr, dst->dev, NULL);
|
||||
static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
|
||||
{
|
||||
struct net_device *ndev;
|
||||
|
||||
ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
|
||||
if (IS_ERR(ndev))
|
||||
return PTR_ERR(ndev);
|
||||
|
||||
/*
|
||||
* Since we are holding the rcu, reading net and ifindex
|
||||
* are safe without any additional reference; because
|
||||
* change_net_namespace() in net/core/dev.c does rcu sync
|
||||
* after it changes the state to IFF_DOWN and before
|
||||
* updating netdev fields {net, ifindex}.
|
||||
*/
|
||||
addr->net = dev_net(ndev);
|
||||
addr->bound_dev_if = ndev->ifindex;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
|
||||
{
|
||||
addr->net = &init_net;
|
||||
addr->bound_dev_if = 0;
|
||||
}
|
||||
|
||||
static int addr_resolve(struct sockaddr *src_in,
|
||||
const struct sockaddr *dst_in,
|
||||
struct rdma_dev_addr *addr,
|
||||
bool resolve_neigh,
|
||||
bool resolve_by_gid_attr,
|
||||
u32 seq)
|
||||
{
|
||||
struct net_device *ndev;
|
||||
struct dst_entry *dst;
|
||||
struct dst_entry *dst = NULL;
|
||||
unsigned int ndev_flags = 0;
|
||||
struct rtable *rt = NULL;
|
||||
int ret;
|
||||
|
||||
if (!addr->net) {
|
||||
@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
if (resolve_by_gid_attr) {
|
||||
if (!addr->sgid_attr) {
|
||||
rcu_read_unlock();
|
||||
pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
/*
|
||||
* If the request is for a specific gid attribute of the
|
||||
* rdma_dev_addr, derive net from the netdevice of the
|
||||
* GID attribute.
|
||||
*/
|
||||
ret = set_addr_netns_by_gid_rcu(addr);
|
||||
if (ret) {
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (src_in->sa_family == AF_INET) {
|
||||
struct rtable *rt = NULL;
|
||||
const struct sockaddr_in *dst_in4 =
|
||||
(const struct sockaddr_in *)dst_in;
|
||||
|
||||
ret = addr4_resolve((struct sockaddr_in *)src_in,
|
||||
dst_in4, addr, &rt);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (resolve_neigh)
|
||||
ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
|
||||
|
||||
if (addr->bound_dev_if) {
|
||||
ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
|
||||
} else {
|
||||
ndev = rt->dst.dev;
|
||||
dev_hold(ndev);
|
||||
}
|
||||
|
||||
ip_rt_put(rt);
|
||||
ret = addr4_resolve(src_in, dst_in, addr, &rt);
|
||||
dst = &rt->dst;
|
||||
} else {
|
||||
const struct sockaddr_in6 *dst_in6 =
|
||||
(const struct sockaddr_in6 *)dst_in;
|
||||
ret = addr6_resolve(src_in, dst_in, addr, &dst);
|
||||
}
|
||||
if (ret) {
|
||||
rcu_read_unlock();
|
||||
goto done;
|
||||
}
|
||||
ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
|
||||
rcu_read_unlock();
|
||||
|
||||
ret = addr6_resolve((struct sockaddr_in6 *)src_in,
|
||||
dst_in6, addr,
|
||||
&dst);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (resolve_neigh)
|
||||
ret = addr_resolve_neigh(dst, dst_in, addr, seq);
|
||||
|
||||
if (addr->bound_dev_if) {
|
||||
ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
|
||||
} else {
|
||||
ndev = dst->dev;
|
||||
dev_hold(ndev);
|
||||
}
|
||||
/*
|
||||
* Resolve neighbor destination address if requested and
|
||||
* only if src addr translation didn't fail.
|
||||
*/
|
||||
if (!ret && resolve_neigh)
|
||||
ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
|
||||
|
||||
if (src_in->sa_family == AF_INET)
|
||||
ip_rt_put(rt);
|
||||
else
|
||||
dst_release(dst);
|
||||
}
|
||||
|
||||
if (ndev) {
|
||||
if (ndev->flags & IFF_LOOPBACK)
|
||||
ret = rdma_translate_ip(dst_in, addr);
|
||||
else
|
||||
addr->bound_dev_if = ndev->ifindex;
|
||||
dev_put(ndev);
|
||||
}
|
||||
|
||||
done:
|
||||
/*
|
||||
* Clear the addr net to go back to its original state, only if it was
|
||||
* derived from GID attribute in this context.
|
||||
*/
|
||||
if (resolve_by_gid_attr)
|
||||
rdma_addr_set_net_defaults(addr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
|
||||
src_in = (struct sockaddr *)&req->src_addr;
|
||||
dst_in = (struct sockaddr *)&req->dst_addr;
|
||||
req->status = addr_resolve(src_in, dst_in, req->addr,
|
||||
true, req->seq);
|
||||
true, req->resolve_by_gid_attr,
|
||||
req->seq);
|
||||
if (req->status && time_after_eq(jiffies, req->timeout)) {
|
||||
req->status = -ETIMEDOUT;
|
||||
} else if (req->status == -ENODATA) {
|
||||
@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
|
||||
}
|
||||
|
||||
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
|
||||
struct rdma_dev_addr *addr, int timeout_ms,
|
||||
struct rdma_dev_addr *addr, unsigned long timeout_ms,
|
||||
void (*callback)(int status, struct sockaddr *src_addr,
|
||||
struct rdma_dev_addr *addr, void *context),
|
||||
void *context)
|
||||
bool resolve_by_gid_attr, void *context)
|
||||
{
|
||||
struct sockaddr *src_in, *dst_in;
|
||||
struct addr_req *req;
|
||||
@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
|
||||
req->addr = addr;
|
||||
req->callback = callback;
|
||||
req->context = context;
|
||||
req->resolve_by_gid_attr = resolve_by_gid_attr;
|
||||
INIT_DELAYED_WORK(&req->work, process_one_req);
|
||||
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
|
||||
|
||||
req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
|
||||
req->status = addr_resolve(src_in, dst_in, addr, true,
|
||||
req->resolve_by_gid_attr, req->seq);
|
||||
switch (req->status) {
|
||||
case 0:
|
||||
req->timeout = jiffies;
|
||||
@ -641,25 +716,53 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_resolve_ip);
|
||||
|
||||
int rdma_resolve_ip_route(struct sockaddr *src_addr,
|
||||
const struct sockaddr *dst_addr,
|
||||
struct rdma_dev_addr *addr)
|
||||
int roce_resolve_route_from_path(struct sa_path_rec *rec,
|
||||
const struct ib_gid_attr *attr)
|
||||
{
|
||||
struct sockaddr_storage ssrc_addr = {};
|
||||
struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
|
||||
union {
|
||||
struct sockaddr _sockaddr;
|
||||
struct sockaddr_in _sockaddr_in;
|
||||
struct sockaddr_in6 _sockaddr_in6;
|
||||
} sgid, dgid;
|
||||
struct rdma_dev_addr dev_addr = {};
|
||||
int ret;
|
||||
|
||||
if (src_addr) {
|
||||
if (src_addr->sa_family != dst_addr->sa_family)
|
||||
return -EINVAL;
|
||||
if (rec->roce.route_resolved)
|
||||
return 0;
|
||||
|
||||
memcpy(src_in, src_addr, rdma_addr_size(src_addr));
|
||||
} else {
|
||||
src_in->sa_family = dst_addr->sa_family;
|
||||
}
|
||||
rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
|
||||
rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
|
||||
|
||||
return addr_resolve(src_in, dst_addr, addr, false, 0);
|
||||
if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
|
||||
return -EINVAL;
|
||||
|
||||
if (!attr || !attr->ndev)
|
||||
return -EINVAL;
|
||||
|
||||
dev_addr.net = &init_net;
|
||||
dev_addr.sgid_attr = attr;
|
||||
|
||||
ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
|
||||
&dev_addr, false, true, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
|
||||
dev_addr.network == RDMA_NETWORK_IPV6) &&
|
||||
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
|
||||
return -EINVAL;
|
||||
|
||||
rec->roce.route_resolved = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* rdma_addr_cancel - Cancel resolve ip request
|
||||
* @addr: Pointer to address structure given previously
|
||||
* during rdma_resolve_ip().
|
||||
* rdma_addr_cancel() is synchronous function which cancels any pending
|
||||
* request if there is any.
|
||||
*/
|
||||
void rdma_addr_cancel(struct rdma_dev_addr *addr)
|
||||
{
|
||||
struct addr_req *req, *temp_req;
|
||||
@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
|
||||
* guarentees no work is running and none will be started.
|
||||
*/
|
||||
cancel_delayed_work_sync(&found->work);
|
||||
|
||||
if (found->callback)
|
||||
found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
|
||||
found->addr, found->context);
|
||||
|
||||
kfree(found);
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_addr_cancel);
|
||||
@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
|
||||
|
||||
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
|
||||
const union ib_gid *dgid,
|
||||
u8 *dmac, const struct net_device *ndev,
|
||||
u8 *dmac, const struct ib_gid_attr *sgid_attr,
|
||||
int *hoplimit)
|
||||
{
|
||||
struct rdma_dev_addr dev_addr;
|
||||
@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
|
||||
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
|
||||
|
||||
memset(&dev_addr, 0, sizeof(dev_addr));
|
||||
dev_addr.bound_dev_if = ndev->ifindex;
|
||||
dev_addr.net = &init_net;
|
||||
dev_addr.sgid_attr = sgid_attr;
|
||||
|
||||
init_completion(&ctx.comp);
|
||||
ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
|
||||
&dev_addr, 1000, resolve_cb, &ctx);
|
||||
&dev_addr, 1000, resolve_cb, true, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
|
||||
u8 port_num = entry->attr.port_num;
|
||||
struct ib_gid_table *table = rdma_gid_table(device, port_num);
|
||||
|
||||
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
|
||||
device->name, port_num, entry->attr.index,
|
||||
entry->attr.gid.raw);
|
||||
dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
|
||||
port_num, entry->attr.index, entry->attr.gid.raw);
|
||||
|
||||
if (rdma_cap_roce_gid_table(device, port_num) &&
|
||||
entry->state != GID_TABLE_ENTRY_INVALID)
|
||||
@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
|
||||
{
|
||||
entry->state = GID_TABLE_ENTRY_VALID;
|
||||
|
||||
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
|
||||
entry->attr.device->name, entry->attr.port_num,
|
||||
entry->attr.index, entry->attr.gid.raw);
|
||||
dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
|
||||
__func__, entry->attr.port_num, entry->attr.index,
|
||||
entry->attr.gid.raw);
|
||||
|
||||
lockdep_assert_held(&table->lock);
|
||||
write_lock_irq(&table->rwlock);
|
||||
@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
|
||||
int ret;
|
||||
|
||||
if (!attr->ndev) {
|
||||
pr_err("%s NULL netdev device=%s port=%d index=%d\n",
|
||||
__func__, attr->device->name, attr->port_num,
|
||||
attr->index);
|
||||
dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
|
||||
__func__, attr->port_num, attr->index);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
|
||||
ret = attr->device->add_gid(attr, &entry->context);
|
||||
if (ret) {
|
||||
pr_err("%s GID add failed device=%s port=%d index=%d\n",
|
||||
__func__, attr->device->name, attr->port_num,
|
||||
attr->index);
|
||||
dev_err(&attr->device->dev,
|
||||
"%s GID add failed port=%d index=%d\n",
|
||||
__func__, attr->port_num, attr->index);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -353,9 +351,8 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
|
||||
|
||||
lockdep_assert_held(&table->lock);
|
||||
|
||||
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
|
||||
ib_dev->name, port, ix,
|
||||
table->data_vec[ix]->attr.gid.raw);
|
||||
dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
|
||||
ix, table->data_vec[ix]->attr.gid.raw);
|
||||
|
||||
write_lock_irq(&table->rwlock);
|
||||
entry = table->data_vec[ix];
|
||||
@ -782,9 +779,9 @@ static void release_gid_table(struct ib_device *device, u8 port,
|
||||
if (is_gid_entry_free(table->data_vec[i]))
|
||||
continue;
|
||||
if (kref_read(&table->data_vec[i]->kref) > 1) {
|
||||
pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
|
||||
device->name, i,
|
||||
kref_read(&table->data_vec[i]->kref));
|
||||
dev_err(&device->dev,
|
||||
"GID entry ref leak for index %d ref=%d\n", i,
|
||||
kref_read(&table->data_vec[i]->kref));
|
||||
leak = true;
|
||||
}
|
||||
}
|
||||
@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_hold_gid_attr);
|
||||
|
||||
/**
|
||||
* rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
|
||||
* which must be in UP state.
|
||||
*
|
||||
* @attr:Pointer to the GID attribute
|
||||
*
|
||||
* Returns pointer to netdevice if the netdevice was attached to GID and
|
||||
* netdevice is in UP state. Caller must hold RCU lock as this API
|
||||
* reads the netdev flags which can change while netdevice migrates to
|
||||
* different net namespace. Returns ERR_PTR with error code otherwise.
|
||||
*
|
||||
*/
|
||||
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
|
||||
{
|
||||
struct ib_gid_table_entry *entry =
|
||||
container_of(attr, struct ib_gid_table_entry, attr);
|
||||
struct ib_device *device = entry->attr.device;
|
||||
struct net_device *ndev = ERR_PTR(-ENODEV);
|
||||
u8 port_num = entry->attr.port_num;
|
||||
struct ib_gid_table *table;
|
||||
unsigned long flags;
|
||||
bool valid;
|
||||
|
||||
table = rdma_gid_table(device, port_num);
|
||||
|
||||
read_lock_irqsave(&table->rwlock, flags);
|
||||
valid = is_gid_entry_valid(table->data_vec[attr->index]);
|
||||
if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
|
||||
ndev = attr->ndev;
|
||||
read_unlock_irqrestore(&table->rwlock, flags);
|
||||
return ndev;
|
||||
}
|
||||
|
||||
static int config_non_roce_gid_cache(struct ib_device *device,
|
||||
u8 port, int gid_tbl_len)
|
||||
{
|
||||
@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
|
||||
continue;
|
||||
ret = device->query_gid(device, port, i, &gid_attr.gid);
|
||||
if (ret) {
|
||||
pr_warn("query_gid failed (%d) for %s (index %d)\n",
|
||||
ret, device->name, i);
|
||||
dev_warn(&device->dev,
|
||||
"query_gid failed (%d) for index %d\n", ret,
|
||||
i);
|
||||
goto err;
|
||||
}
|
||||
gid_attr.index = i;
|
||||
@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,
|
||||
|
||||
ret = ib_query_port(device, port, tprops);
|
||||
if (ret) {
|
||||
pr_warn("ib_query_port failed (%d) for %s\n",
|
||||
ret, device->name);
|
||||
dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
|
||||
for (i = 0; i < pkey_cache->table_len; ++i) {
|
||||
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
|
||||
if (ret) {
|
||||
pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
|
||||
ret, device->name, i);
|
||||
dev_warn(&device->dev,
|
||||
"ib_query_pkey failed (%d) for index %d\n",
|
||||
ret, i);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
|
||||
cm_id_priv);
|
||||
ret = cm_init_av_by_path(param->alternate_path, NULL,
|
||||
&cm_id_priv->alt_av, cm_id_priv);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
|
||||
cm_id_priv->tid = lap_msg->hdr.tid;
|
||||
ret = atomic_inc_and_test(&cm_id_priv->work_count);
|
||||
@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
|
||||
cm_dev->going_down = 0;
|
||||
cm_dev->device = device_create(&cm_class, &ib_device->dev,
|
||||
MKDEV(0, 0), NULL,
|
||||
"%s", ib_device->name);
|
||||
"%s", dev_name(&ib_device->dev));
|
||||
if (IS_ERR(cm_dev->device)) {
|
||||
kfree(cm_dev);
|
||||
return;
|
||||
|
@ -639,13 +639,21 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
|
||||
id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
|
||||
}
|
||||
|
||||
static int cma_acquire_dev(struct rdma_id_private *id_priv,
|
||||
const struct rdma_id_private *listen_id_priv)
|
||||
/**
|
||||
* cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
|
||||
* based on source ip address.
|
||||
* @id_priv: cm_id which should be bound to cma device
|
||||
*
|
||||
* cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
|
||||
* based on source IP address. It returns 0 on success or error code otherwise.
|
||||
* It is applicable to active and passive side cm_id.
|
||||
*/
|
||||
static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
|
||||
{
|
||||
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
||||
const struct ib_gid_attr *sgid_attr;
|
||||
struct cma_device *cma_dev;
|
||||
union ib_gid gid, iboe_gid, *gidp;
|
||||
struct cma_device *cma_dev;
|
||||
enum ib_gid_type gid_type;
|
||||
int ret = -ENODEV;
|
||||
u8 port;
|
||||
@ -654,41 +662,125 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
|
||||
id_priv->id.ps == RDMA_PS_IPOIB)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&lock);
|
||||
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
|
||||
&iboe_gid);
|
||||
|
||||
memcpy(&gid, dev_addr->src_dev_addr +
|
||||
rdma_addr_gid_offset(dev_addr), sizeof gid);
|
||||
|
||||
if (listen_id_priv) {
|
||||
cma_dev = listen_id_priv->cma_dev;
|
||||
port = listen_id_priv->id.port_num;
|
||||
gidp = rdma_protocol_roce(cma_dev->device, port) ?
|
||||
&iboe_gid : &gid;
|
||||
gid_type = listen_id_priv->gid_type;
|
||||
sgid_attr = cma_validate_port(cma_dev->device, port,
|
||||
gid_type, gidp, id_priv);
|
||||
if (!IS_ERR(sgid_attr)) {
|
||||
id_priv->id.port_num = port;
|
||||
cma_bind_sgid_attr(id_priv, sgid_attr);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
rdma_addr_gid_offset(dev_addr), sizeof(gid));
|
||||
|
||||
mutex_lock(&lock);
|
||||
list_for_each_entry(cma_dev, &dev_list, list) {
|
||||
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
|
||||
if (listen_id_priv &&
|
||||
listen_id_priv->cma_dev == cma_dev &&
|
||||
listen_id_priv->id.port_num == port)
|
||||
continue;
|
||||
|
||||
for (port = rdma_start_port(cma_dev->device);
|
||||
port <= rdma_end_port(cma_dev->device); port++) {
|
||||
gidp = rdma_protocol_roce(cma_dev->device, port) ?
|
||||
&iboe_gid : &gid;
|
||||
gid_type = cma_dev->default_gid_type[port - 1];
|
||||
sgid_attr = cma_validate_port(cma_dev->device, port,
|
||||
gid_type, gidp, id_priv);
|
||||
if (!IS_ERR(sgid_attr)) {
|
||||
id_priv->id.port_num = port;
|
||||
cma_bind_sgid_attr(id_priv, sgid_attr);
|
||||
cma_attach_to_dev(id_priv, cma_dev);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
|
||||
* @id_priv: cm id to bind to cma device
|
||||
* @listen_id_priv: listener cm id to match against
|
||||
* @req: Pointer to req structure containaining incoming
|
||||
* request information
|
||||
* cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
|
||||
* rdma device matches for listen_id and incoming request. It also verifies
|
||||
* that a GID table entry is present for the source address.
|
||||
* Returns 0 on success, or returns error code otherwise.
|
||||
*/
|
||||
static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
|
||||
const struct rdma_id_private *listen_id_priv,
|
||||
struct cma_req_info *req)
|
||||
{
|
||||
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
||||
const struct ib_gid_attr *sgid_attr;
|
||||
enum ib_gid_type gid_type;
|
||||
union ib_gid gid;
|
||||
|
||||
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
|
||||
id_priv->id.ps == RDMA_PS_IPOIB)
|
||||
return -EINVAL;
|
||||
|
||||
if (rdma_protocol_roce(req->device, req->port))
|
||||
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
|
||||
&gid);
|
||||
else
|
||||
memcpy(&gid, dev_addr->src_dev_addr +
|
||||
rdma_addr_gid_offset(dev_addr), sizeof(gid));
|
||||
|
||||
gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
|
||||
sgid_attr = cma_validate_port(req->device, req->port,
|
||||
gid_type, &gid, id_priv);
|
||||
if (IS_ERR(sgid_attr))
|
||||
return PTR_ERR(sgid_attr);
|
||||
|
||||
id_priv->id.port_num = req->port;
|
||||
cma_bind_sgid_attr(id_priv, sgid_attr);
|
||||
/* Need to acquire lock to protect against reader
|
||||
* of cma_dev->id_list such as cma_netdev_callback() and
|
||||
* cma_process_remove().
|
||||
*/
|
||||
mutex_lock(&lock);
|
||||
cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
|
||||
mutex_unlock(&lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
|
||||
const struct rdma_id_private *listen_id_priv)
|
||||
{
|
||||
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
||||
const struct ib_gid_attr *sgid_attr;
|
||||
struct cma_device *cma_dev;
|
||||
enum ib_gid_type gid_type;
|
||||
int ret = -ENODEV;
|
||||
union ib_gid gid;
|
||||
u8 port;
|
||||
|
||||
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
|
||||
id_priv->id.ps == RDMA_PS_IPOIB)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(&gid, dev_addr->src_dev_addr +
|
||||
rdma_addr_gid_offset(dev_addr), sizeof(gid));
|
||||
|
||||
mutex_lock(&lock);
|
||||
|
||||
cma_dev = listen_id_priv->cma_dev;
|
||||
port = listen_id_priv->id.port_num;
|
||||
gid_type = listen_id_priv->gid_type;
|
||||
sgid_attr = cma_validate_port(cma_dev->device, port,
|
||||
gid_type, &gid, id_priv);
|
||||
if (!IS_ERR(sgid_attr)) {
|
||||
id_priv->id.port_num = port;
|
||||
cma_bind_sgid_attr(id_priv, sgid_attr);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_for_each_entry(cma_dev, &dev_list, list) {
|
||||
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
|
||||
if (listen_id_priv->cma_dev == cma_dev &&
|
||||
listen_id_priv->id.port_num == port)
|
||||
continue;
|
||||
|
||||
gid_type = cma_dev->default_gid_type[port - 1];
|
||||
sgid_attr = cma_validate_port(cma_dev->device, port,
|
||||
gid_type, &gid, id_priv);
|
||||
if (!IS_ERR(sgid_attr)) {
|
||||
id_priv->id.port_num = port;
|
||||
cma_bind_sgid_attr(id_priv, sgid_attr);
|
||||
@ -785,10 +877,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
|
||||
if (!id_priv)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (caller)
|
||||
id_priv->res.kern_name = caller;
|
||||
else
|
||||
rdma_restrack_set_task(&id_priv->res, current);
|
||||
rdma_restrack_set_task(&id_priv->res, caller);
|
||||
id_priv->res.type = RDMA_RESTRACK_CM_ID;
|
||||
id_priv->state = RDMA_CM_IDLE;
|
||||
id_priv->id.context = context;
|
||||
@ -1462,17 +1551,34 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
|
||||
return rdma_protocol_roce(device, port_num);
|
||||
}
|
||||
|
||||
static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
|
||||
{
|
||||
const struct sockaddr *daddr =
|
||||
(const struct sockaddr *)&req->listen_addr_storage;
|
||||
const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
|
||||
|
||||
/* Returns true if the req is for IPv6 link local */
|
||||
return (daddr->sa_family == AF_INET6 &&
|
||||
(ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
|
||||
}
|
||||
|
||||
static bool cma_match_net_dev(const struct rdma_cm_id *id,
|
||||
const struct net_device *net_dev,
|
||||
u8 port_num)
|
||||
const struct cma_req_info *req)
|
||||
{
|
||||
const struct rdma_addr *addr = &id->route.addr;
|
||||
|
||||
if (!net_dev)
|
||||
/* This request is an AF_IB request */
|
||||
return (!id->port_num || id->port_num == port_num) &&
|
||||
return (!id->port_num || id->port_num == req->port) &&
|
||||
(addr->src_addr.ss_family == AF_IB);
|
||||
|
||||
/*
|
||||
* If the request is not for IPv6 link local, allow matching
|
||||
* request to any netdevice of the one or multiport rdma device.
|
||||
*/
|
||||
if (!cma_is_req_ipv6_ll(req))
|
||||
return true;
|
||||
/*
|
||||
* Net namespaces must match, and if the listner is listening
|
||||
* on a specific netdevice than netdevice must match as well.
|
||||
@ -1500,13 +1606,14 @@ static struct rdma_id_private *cma_find_listener(
|
||||
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
|
||||
if (cma_match_private_data(id_priv, ib_event->private_data)) {
|
||||
if (id_priv->id.device == cm_id->device &&
|
||||
cma_match_net_dev(&id_priv->id, net_dev, req->port))
|
||||
cma_match_net_dev(&id_priv->id, net_dev, req))
|
||||
return id_priv;
|
||||
list_for_each_entry(id_priv_dev,
|
||||
&id_priv->listen_list,
|
||||
listen_list) {
|
||||
if (id_priv_dev->id.device == cm_id->device &&
|
||||
cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
|
||||
cma_match_net_dev(&id_priv_dev->id,
|
||||
net_dev, req))
|
||||
return id_priv_dev;
|
||||
}
|
||||
}
|
||||
@ -1518,18 +1625,18 @@ static struct rdma_id_private *cma_find_listener(
|
||||
static struct rdma_id_private *
|
||||
cma_ib_id_from_event(struct ib_cm_id *cm_id,
|
||||
const struct ib_cm_event *ib_event,
|
||||
struct cma_req_info *req,
|
||||
struct net_device **net_dev)
|
||||
{
|
||||
struct cma_req_info req;
|
||||
struct rdma_bind_list *bind_list;
|
||||
struct rdma_id_private *id_priv;
|
||||
int err;
|
||||
|
||||
err = cma_save_req_info(ib_event, &req);
|
||||
err = cma_save_req_info(ib_event, req);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
*net_dev = cma_get_net_dev(ib_event, &req);
|
||||
*net_dev = cma_get_net_dev(ib_event, req);
|
||||
if (IS_ERR(*net_dev)) {
|
||||
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
|
||||
/* Assuming the protocol is AF_IB */
|
||||
@ -1567,17 +1674,17 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
|
||||
}
|
||||
|
||||
if (!validate_net_dev(*net_dev,
|
||||
(struct sockaddr *)&req.listen_addr_storage,
|
||||
(struct sockaddr *)&req.src_addr_storage)) {
|
||||
(struct sockaddr *)&req->listen_addr_storage,
|
||||
(struct sockaddr *)&req->src_addr_storage)) {
|
||||
id_priv = ERR_PTR(-EHOSTUNREACH);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
|
||||
rdma_ps_from_service_id(req.service_id),
|
||||
cma_port_from_service_id(req.service_id));
|
||||
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
|
||||
rdma_ps_from_service_id(req->service_id),
|
||||
cma_port_from_service_id(req->service_id));
|
||||
id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
|
||||
err:
|
||||
rcu_read_unlock();
|
||||
if (IS_ERR(id_priv) && *net_dev) {
|
||||
@ -1710,8 +1817,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
|
||||
mutex_lock(&id_priv->handler_mutex);
|
||||
mutex_unlock(&id_priv->handler_mutex);
|
||||
|
||||
rdma_restrack_del(&id_priv->res);
|
||||
if (id_priv->cma_dev) {
|
||||
rdma_restrack_del(&id_priv->res);
|
||||
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
|
||||
if (id_priv->cm_id.ib)
|
||||
ib_destroy_cm_id(id_priv->cm_id.ib);
|
||||
@ -1902,7 +2009,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
|
||||
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
|
||||
|
||||
if (net_dev) {
|
||||
rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
|
||||
rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
|
||||
} else {
|
||||
if (!cma_protocol_roce(listen_id) &&
|
||||
cma_any_addr(cma_src_addr(id_priv))) {
|
||||
@ -1952,7 +2059,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
|
||||
goto err;
|
||||
|
||||
if (net_dev) {
|
||||
rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
|
||||
rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
|
||||
} else {
|
||||
if (!cma_any_addr(cma_src_addr(id_priv))) {
|
||||
ret = cma_translate_addr(cma_src_addr(id_priv),
|
||||
@ -1999,11 +2106,12 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
|
||||
{
|
||||
struct rdma_id_private *listen_id, *conn_id = NULL;
|
||||
struct rdma_cm_event event = {};
|
||||
struct cma_req_info req = {};
|
||||
struct net_device *net_dev;
|
||||
u8 offset;
|
||||
int ret;
|
||||
|
||||
listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
|
||||
listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
|
||||
if (IS_ERR(listen_id))
|
||||
return PTR_ERR(listen_id);
|
||||
|
||||
@ -2036,7 +2144,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
|
||||
}
|
||||
|
||||
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
|
||||
ret = cma_acquire_dev(conn_id, listen_id);
|
||||
ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
|
||||
if (ret)
|
||||
goto err2;
|
||||
|
||||
@ -2232,7 +2340,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = cma_acquire_dev(conn_id, listen_id);
|
||||
ret = cma_iw_acquire_dev(conn_id, listen_id);
|
||||
if (ret) {
|
||||
mutex_unlock(&conn_id->handler_mutex);
|
||||
rdma_destroy_id(new_cm_id);
|
||||
@ -2354,8 +2462,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
|
||||
|
||||
ret = rdma_listen(id, id_priv->backlog);
|
||||
if (ret)
|
||||
pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
|
||||
ret, cma_dev->device->name);
|
||||
dev_warn(&cma_dev->device->dev,
|
||||
"RDMA CMA: cma_listen_on_dev, error %d\n", ret);
|
||||
}
|
||||
|
||||
static void cma_listen_on_all(struct rdma_id_private *id_priv)
|
||||
@ -2402,8 +2510,8 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec,
|
||||
queue_work(cma_wq, &work->work);
|
||||
}
|
||||
|
||||
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
|
||||
struct cma_work *work)
|
||||
static int cma_query_ib_route(struct rdma_id_private *id_priv,
|
||||
unsigned long timeout_ms, struct cma_work *work)
|
||||
{
|
||||
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
||||
struct sa_path_rec path_rec;
|
||||
@ -2521,7 +2629,8 @@ static void cma_init_resolve_addr_work(struct cma_work *work,
|
||||
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
|
||||
}
|
||||
|
||||
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
|
||||
static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
|
||||
unsigned long timeout_ms)
|
||||
{
|
||||
struct rdma_route *route = &id_priv->id.route;
|
||||
struct cma_work *work;
|
||||
@ -2643,7 +2752,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_set_ib_path);
|
||||
|
||||
static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
|
||||
static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
|
||||
{
|
||||
struct cma_work *work;
|
||||
|
||||
@ -2744,7 +2853,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
|
||||
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
|
||||
{
|
||||
struct rdma_id_private *id_priv;
|
||||
int ret;
|
||||
@ -2759,7 +2868,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
|
||||
else if (rdma_protocol_roce(id->device, id->port_num))
|
||||
ret = cma_resolve_iboe_route(id_priv);
|
||||
else if (rdma_protocol_iwarp(id->device, id->port_num))
|
||||
ret = cma_resolve_iw_route(id_priv, timeout_ms);
|
||||
ret = cma_resolve_iw_route(id_priv);
|
||||
else
|
||||
ret = -ENOSYS;
|
||||
|
||||
@ -2862,7 +2971,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
|
||||
|
||||
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
|
||||
if (!status && !id_priv->cma_dev) {
|
||||
status = cma_acquire_dev(id_priv, NULL);
|
||||
status = cma_acquire_dev_by_src_ip(id_priv);
|
||||
if (status)
|
||||
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
|
||||
status);
|
||||
@ -2882,13 +2991,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
|
||||
if (id_priv->id.event_handler(&id_priv->id, &event)) {
|
||||
cma_exch(id_priv, RDMA_CM_DESTROYING);
|
||||
mutex_unlock(&id_priv->handler_mutex);
|
||||
cma_deref_id(id_priv);
|
||||
rdma_destroy_id(&id_priv->id);
|
||||
return;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&id_priv->handler_mutex);
|
||||
cma_deref_id(id_priv);
|
||||
}
|
||||
|
||||
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
|
||||
@ -2966,7 +3073,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
|
||||
}
|
||||
|
||||
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
|
||||
const struct sockaddr *dst_addr, int timeout_ms)
|
||||
const struct sockaddr *dst_addr, unsigned long timeout_ms)
|
||||
{
|
||||
struct rdma_id_private *id_priv;
|
||||
int ret;
|
||||
@ -2985,16 +3092,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
|
||||
atomic_inc(&id_priv->refcount);
|
||||
if (cma_any_addr(dst_addr)) {
|
||||
ret = cma_resolve_loopback(id_priv);
|
||||
} else {
|
||||
if (dst_addr->sa_family == AF_IB) {
|
||||
ret = cma_resolve_ib_addr(id_priv);
|
||||
} else {
|
||||
ret = rdma_resolve_ip(cma_src_addr(id_priv),
|
||||
dst_addr, &id->route.addr.dev_addr,
|
||||
timeout_ms, addr_handler, id_priv);
|
||||
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
|
||||
&id->route.addr.dev_addr,
|
||||
timeout_ms, addr_handler,
|
||||
false, id_priv);
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
@ -3003,7 +3110,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
|
||||
return 0;
|
||||
err:
|
||||
cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
|
||||
cma_deref_id(id_priv);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_resolve_addr);
|
||||
@ -3414,7 +3520,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
|
||||
if (ret)
|
||||
goto err1;
|
||||
|
||||
ret = cma_acquire_dev(id_priv, NULL);
|
||||
ret = cma_acquire_dev_by_src_ip(id_priv);
|
||||
if (ret)
|
||||
goto err1;
|
||||
}
|
||||
@ -3439,10 +3545,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
|
||||
|
||||
return 0;
|
||||
err2:
|
||||
if (id_priv->cma_dev) {
|
||||
rdma_restrack_del(&id_priv->res);
|
||||
rdma_restrack_del(&id_priv->res);
|
||||
if (id_priv->cma_dev)
|
||||
cma_release_dev(id_priv);
|
||||
}
|
||||
err1:
|
||||
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
|
||||
return ret;
|
||||
@ -3839,10 +3944,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
|
||||
|
||||
id_priv = container_of(id, struct rdma_id_private, id);
|
||||
|
||||
if (caller)
|
||||
id_priv->res.kern_name = caller;
|
||||
else
|
||||
rdma_restrack_set_task(&id_priv->res, current);
|
||||
rdma_restrack_set_task(&id_priv->res, caller);
|
||||
|
||||
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
|
||||
return -EINVAL;
|
||||
@ -4087,9 +4189,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
|
||||
(!ib_sa_sendonly_fullmem_support(&sa_client,
|
||||
id_priv->id.device,
|
||||
id_priv->id.port_num))) {
|
||||
pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
|
||||
"RDMA CM: SM doesn't support Send Only Full Member option\n",
|
||||
id_priv->id.device->name, id_priv->id.port_num);
|
||||
dev_warn(
|
||||
&id_priv->id.device->dev,
|
||||
"RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
|
||||
id_priv->id.port_num);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
|
||||
|
||||
static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
|
||||
{
|
||||
return !strcmp(ib_dev->name, cookie);
|
||||
return !strcmp(dev_name(&ib_dev->dev), cookie);
|
||||
}
|
||||
|
||||
static int cma_configfs_params_get(struct config_item *item,
|
||||
|
@ -44,7 +44,7 @@
|
||||
#include "mad_priv.h"
|
||||
|
||||
/* Total number of ports combined across all struct ib_devices's */
|
||||
#define RDMA_MAX_PORTS 1024
|
||||
#define RDMA_MAX_PORTS 8192
|
||||
|
||||
struct pkey_index_qp_list {
|
||||
struct list_head pkey_index_list;
|
||||
@ -87,6 +87,7 @@ int ib_device_register_sysfs(struct ib_device *device,
|
||||
int (*port_callback)(struct ib_device *,
|
||||
u8, struct kobject *));
|
||||
void ib_device_unregister_sysfs(struct ib_device *device);
|
||||
int ib_device_rename(struct ib_device *ibdev, const char *name);
|
||||
|
||||
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
|
||||
struct net_device *idev, void *cookie);
|
||||
@ -338,7 +339,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
|
||||
|
||||
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
|
||||
const union ib_gid *dgid,
|
||||
u8 *dmac, const struct net_device *ndev,
|
||||
u8 *dmac, const struct ib_gid_attr *sgid_attr,
|
||||
int *hoplimit);
|
||||
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
|
||||
const struct net_device *dev);
|
||||
|
||||
struct sa_path_rec;
|
||||
int roce_resolve_route_from_path(struct sa_path_rec *rec,
|
||||
const struct ib_gid_attr *attr);
|
||||
|
||||
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
|
||||
#endif /* _CORE_PRIV_H */
|
||||
|
@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
|
||||
IB_POLL_BATCH);
|
||||
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
|
||||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
|
||||
queue_work(ib_comp_wq, &cq->work);
|
||||
queue_work(cq->comp_wq, &cq->work);
|
||||
}
|
||||
|
||||
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
|
||||
{
|
||||
queue_work(ib_comp_wq, &cq->work);
|
||||
queue_work(cq->comp_wq, &cq->work);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -161,7 +161,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
|
||||
goto out_destroy_cq;
|
||||
|
||||
cq->res.type = RDMA_RESTRACK_CQ;
|
||||
cq->res.kern_name = caller;
|
||||
rdma_restrack_set_task(&cq->res, caller);
|
||||
rdma_restrack_add(&cq->res);
|
||||
|
||||
switch (cq->poll_ctx) {
|
||||
@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
|
||||
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
|
||||
break;
|
||||
case IB_POLL_WORKQUEUE:
|
||||
case IB_POLL_UNBOUND_WORKQUEUE:
|
||||
cq->comp_handler = ib_cq_completion_workqueue;
|
||||
INIT_WORK(&cq->work, ib_cq_poll_work);
|
||||
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
|
||||
cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
|
||||
ib_comp_wq : ib_comp_unbound_wq;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
|
||||
irq_poll_disable(&cq->iop);
|
||||
break;
|
||||
case IB_POLL_WORKQUEUE:
|
||||
case IB_POLL_UNBOUND_WORKQUEUE:
|
||||
cancel_work_sync(&cq->work);
|
||||
break;
|
||||
default:
|
||||
|
@ -61,6 +61,7 @@ struct ib_client_data {
|
||||
};
|
||||
|
||||
struct workqueue_struct *ib_comp_wq;
|
||||
struct workqueue_struct *ib_comp_unbound_wq;
|
||||
struct workqueue_struct *ib_wq;
|
||||
EXPORT_SYMBOL_GPL(ib_wq);
|
||||
|
||||
@ -122,8 +123,9 @@ static int ib_device_check_mandatory(struct ib_device *device)
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
|
||||
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
|
||||
pr_warn("Device %s is missing mandatory function %s\n",
|
||||
device->name, mandatory_table[i].name);
|
||||
dev_warn(&device->dev,
|
||||
"Device is missing mandatory function %s\n",
|
||||
mandatory_table[i].name);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
@ -163,16 +165,40 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
|
||||
struct ib_device *device;
|
||||
|
||||
list_for_each_entry(device, &device_list, core_list)
|
||||
if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
|
||||
if (!strcmp(name, dev_name(&device->dev)))
|
||||
return device;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int alloc_name(char *name)
|
||||
int ib_device_rename(struct ib_device *ibdev, const char *name)
|
||||
{
|
||||
struct ib_device *device;
|
||||
int ret = 0;
|
||||
|
||||
if (!strcmp(name, dev_name(&ibdev->dev)))
|
||||
return ret;
|
||||
|
||||
mutex_lock(&device_mutex);
|
||||
list_for_each_entry(device, &device_list, core_list) {
|
||||
if (!strcmp(name, dev_name(&device->dev))) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = device_rename(&ibdev->dev, name);
|
||||
if (ret)
|
||||
goto out;
|
||||
strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
|
||||
out:
|
||||
mutex_unlock(&device_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int alloc_name(struct ib_device *ibdev, const char *name)
|
||||
{
|
||||
unsigned long *inuse;
|
||||
char buf[IB_DEVICE_NAME_MAX];
|
||||
struct ib_device *device;
|
||||
int i;
|
||||
|
||||
@ -181,24 +207,21 @@ static int alloc_name(char *name)
|
||||
return -ENOMEM;
|
||||
|
||||
list_for_each_entry(device, &device_list, core_list) {
|
||||
if (!sscanf(device->name, name, &i))
|
||||
char buf[IB_DEVICE_NAME_MAX];
|
||||
|
||||
if (sscanf(dev_name(&device->dev), name, &i) != 1)
|
||||
continue;
|
||||
if (i < 0 || i >= PAGE_SIZE * 8)
|
||||
continue;
|
||||
snprintf(buf, sizeof buf, name, i);
|
||||
if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
|
||||
if (!strcmp(buf, dev_name(&device->dev)))
|
||||
set_bit(i, inuse);
|
||||
}
|
||||
|
||||
i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
|
||||
free_page((unsigned long) inuse);
|
||||
snprintf(buf, sizeof buf, name, i);
|
||||
|
||||
if (__ib_device_get_by_name(buf))
|
||||
return -ENFILE;
|
||||
|
||||
strlcpy(name, buf, IB_DEVICE_NAME_MAX);
|
||||
return 0;
|
||||
return dev_set_name(&ibdev->dev, name, i);
|
||||
}
|
||||
|
||||
static void ib_device_release(struct device *device)
|
||||
@ -221,9 +244,7 @@ static void ib_device_release(struct device *device)
|
||||
static int ib_device_uevent(struct device *device,
|
||||
struct kobj_uevent_env *env)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
|
||||
if (add_uevent_var(env, "NAME=%s", dev->name))
|
||||
if (add_uevent_var(env, "NAME=%s", dev_name(device)))
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
@ -269,7 +290,7 @@ struct ib_device *ib_alloc_device(size_t size)
|
||||
|
||||
INIT_LIST_HEAD(&device->event_handler_list);
|
||||
spin_lock_init(&device->event_handler_lock);
|
||||
spin_lock_init(&device->client_data_lock);
|
||||
rwlock_init(&device->client_data_lock);
|
||||
INIT_LIST_HEAD(&device->client_data_list);
|
||||
INIT_LIST_HEAD(&device->port_list);
|
||||
|
||||
@ -285,6 +306,7 @@ EXPORT_SYMBOL(ib_alloc_device);
|
||||
*/
|
||||
void ib_dealloc_device(struct ib_device *device)
|
||||
{
|
||||
WARN_ON(!list_empty(&device->client_data_list));
|
||||
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
|
||||
device->reg_state != IB_DEV_UNINITIALIZED);
|
||||
rdma_restrack_clean(&device->res);
|
||||
@ -295,9 +317,8 @@ EXPORT_SYMBOL(ib_dealloc_device);
|
||||
static int add_client_context(struct ib_device *device, struct ib_client *client)
|
||||
{
|
||||
struct ib_client_data *context;
|
||||
unsigned long flags;
|
||||
|
||||
context = kmalloc(sizeof *context, GFP_KERNEL);
|
||||
context = kmalloc(sizeof(*context), GFP_KERNEL);
|
||||
if (!context)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -306,9 +327,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
|
||||
context->going_down = false;
|
||||
|
||||
down_write(&lists_rwsem);
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
write_lock_irq(&device->client_data_lock);
|
||||
list_add(&context->list, &device->client_data_list);
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
write_unlock_irq(&device->client_data_lock);
|
||||
up_write(&lists_rwsem);
|
||||
|
||||
return 0;
|
||||
@ -444,22 +465,8 @@ static u32 __dev_new_index(void)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_register_device - Register an IB device with IB core
|
||||
* @device:Device to register
|
||||
*
|
||||
* Low-level drivers use ib_register_device() to register their
|
||||
* devices with the IB core. All registered clients will receive a
|
||||
* callback for each device that is added. @device must be allocated
|
||||
* with ib_alloc_device().
|
||||
*/
|
||||
int ib_register_device(struct ib_device *device,
|
||||
int (*port_callback)(struct ib_device *,
|
||||
u8, struct kobject *))
|
||||
static void setup_dma_device(struct ib_device *device)
|
||||
{
|
||||
int ret;
|
||||
struct ib_client *client;
|
||||
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
|
||||
struct device *parent = device->dev.parent;
|
||||
|
||||
WARN_ON_ONCE(device->dma_device);
|
||||
@ -491,56 +498,113 @@ int ib_register_device(struct ib_device *device,
|
||||
WARN_ON_ONCE(!parent);
|
||||
device->dma_device = parent;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&device_mutex);
|
||||
static void cleanup_device(struct ib_device *device)
|
||||
{
|
||||
ib_cache_cleanup_one(device);
|
||||
ib_cache_release_one(device);
|
||||
kfree(device->port_pkey_list);
|
||||
kfree(device->port_immutable);
|
||||
}
|
||||
|
||||
if (strchr(device->name, '%')) {
|
||||
ret = alloc_name(device->name);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
static int setup_device(struct ib_device *device)
|
||||
{
|
||||
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
|
||||
int ret;
|
||||
|
||||
if (ib_device_check_mandatory(device)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
ret = ib_device_check_mandatory(device);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = read_port_immutable(device);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't create per port immutable data %s\n",
|
||||
device->name);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = setup_port_pkey_list(device);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't create per port_pkey_list\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ib_cache_setup_one(device);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
|
||||
goto port_cleanup;
|
||||
}
|
||||
|
||||
ret = ib_device_register_rdmacg(device);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't register device with rdma cgroup\n");
|
||||
goto cache_cleanup;
|
||||
dev_warn(&device->dev,
|
||||
"Couldn't create per port immutable data\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(&device->attrs, 0, sizeof(device->attrs));
|
||||
ret = device->query_device(device, &device->attrs, &uhw);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't query the device attributes\n");
|
||||
goto cg_cleanup;
|
||||
dev_warn(&device->dev,
|
||||
"Couldn't query the device attributes\n");
|
||||
goto port_cleanup;
|
||||
}
|
||||
|
||||
ret = setup_port_pkey_list(device);
|
||||
if (ret) {
|
||||
dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
|
||||
goto port_cleanup;
|
||||
}
|
||||
|
||||
ret = ib_cache_setup_one(device);
|
||||
if (ret) {
|
||||
dev_warn(&device->dev,
|
||||
"Couldn't set up InfiniBand P_Key/GID cache\n");
|
||||
goto pkey_cleanup;
|
||||
}
|
||||
return 0;
|
||||
|
||||
pkey_cleanup:
|
||||
kfree(device->port_pkey_list);
|
||||
port_cleanup:
|
||||
kfree(device->port_immutable);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_register_device - Register an IB device with IB core
|
||||
* @device:Device to register
|
||||
*
|
||||
* Low-level drivers use ib_register_device() to register their
|
||||
* devices with the IB core. All registered clients will receive a
|
||||
* callback for each device that is added. @device must be allocated
|
||||
* with ib_alloc_device().
|
||||
*/
|
||||
int ib_register_device(struct ib_device *device, const char *name,
|
||||
int (*port_callback)(struct ib_device *, u8,
|
||||
struct kobject *))
|
||||
{
|
||||
int ret;
|
||||
struct ib_client *client;
|
||||
|
||||
setup_dma_device(device);
|
||||
|
||||
mutex_lock(&device_mutex);
|
||||
|
||||
if (strchr(name, '%')) {
|
||||
ret = alloc_name(device, name);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
ret = dev_set_name(&device->dev, name);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
if (__ib_device_get_by_name(dev_name(&device->dev))) {
|
||||
ret = -ENFILE;
|
||||
goto out;
|
||||
}
|
||||
strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
|
||||
|
||||
ret = setup_device(device);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
device->index = __dev_new_index();
|
||||
|
||||
ret = ib_device_register_rdmacg(device);
|
||||
if (ret) {
|
||||
dev_warn(&device->dev,
|
||||
"Couldn't register device with rdma cgroup\n");
|
||||
goto dev_cleanup;
|
||||
}
|
||||
|
||||
ret = ib_device_register_sysfs(device, port_callback);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't register device %s with driver model\n",
|
||||
device->name);
|
||||
dev_warn(&device->dev,
|
||||
"Couldn't register device with driver model\n");
|
||||
goto cg_cleanup;
|
||||
}
|
||||
|
||||
@ -550,7 +614,6 @@ int ib_register_device(struct ib_device *device,
|
||||
if (!add_client_context(device, client) && client->add)
|
||||
client->add(device);
|
||||
|
||||
device->index = __dev_new_index();
|
||||
down_write(&lists_rwsem);
|
||||
list_add_tail(&device->core_list, &device_list);
|
||||
up_write(&lists_rwsem);
|
||||
@ -559,11 +622,8 @@ int ib_register_device(struct ib_device *device,
|
||||
|
||||
cg_cleanup:
|
||||
ib_device_unregister_rdmacg(device);
|
||||
cache_cleanup:
|
||||
ib_cache_cleanup_one(device);
|
||||
ib_cache_release_one(device);
|
||||
port_cleanup:
|
||||
kfree(device->port_immutable);
|
||||
dev_cleanup:
|
||||
cleanup_device(device);
|
||||
out:
|
||||
mutex_unlock(&device_mutex);
|
||||
return ret;
|
||||
@ -585,21 +645,20 @@ void ib_unregister_device(struct ib_device *device)
|
||||
|
||||
down_write(&lists_rwsem);
|
||||
list_del(&device->core_list);
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
|
||||
write_lock_irq(&device->client_data_lock);
|
||||
list_for_each_entry(context, &device->client_data_list, list)
|
||||
context->going_down = true;
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
write_unlock_irq(&device->client_data_lock);
|
||||
downgrade_write(&lists_rwsem);
|
||||
|
||||
list_for_each_entry_safe(context, tmp, &device->client_data_list,
|
||||
list) {
|
||||
list_for_each_entry(context, &device->client_data_list, list) {
|
||||
if (context->client->remove)
|
||||
context->client->remove(device, context->data);
|
||||
}
|
||||
up_read(&lists_rwsem);
|
||||
|
||||
ib_device_unregister_rdmacg(device);
|
||||
ib_device_unregister_sysfs(device);
|
||||
ib_device_unregister_rdmacg(device);
|
||||
|
||||
mutex_unlock(&device_mutex);
|
||||
|
||||
@ -609,10 +668,13 @@ void ib_unregister_device(struct ib_device *device)
|
||||
kfree(device->port_pkey_list);
|
||||
|
||||
down_write(&lists_rwsem);
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
|
||||
write_lock_irqsave(&device->client_data_lock, flags);
|
||||
list_for_each_entry_safe(context, tmp, &device->client_data_list,
|
||||
list) {
|
||||
list_del(&context->list);
|
||||
kfree(context);
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
}
|
||||
write_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
up_write(&lists_rwsem);
|
||||
|
||||
device->reg_state = IB_DEV_UNREGISTERED;
|
||||
@ -662,9 +724,8 @@ EXPORT_SYMBOL(ib_register_client);
|
||||
*/
|
||||
void ib_unregister_client(struct ib_client *client)
|
||||
{
|
||||
struct ib_client_data *context, *tmp;
|
||||
struct ib_client_data *context;
|
||||
struct ib_device *device;
|
||||
unsigned long flags;
|
||||
|
||||
mutex_lock(&device_mutex);
|
||||
|
||||
@ -676,14 +737,14 @@ void ib_unregister_client(struct ib_client *client)
|
||||
struct ib_client_data *found_context = NULL;
|
||||
|
||||
down_write(&lists_rwsem);
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
|
||||
write_lock_irq(&device->client_data_lock);
|
||||
list_for_each_entry(context, &device->client_data_list, list)
|
||||
if (context->client == client) {
|
||||
context->going_down = true;
|
||||
found_context = context;
|
||||
break;
|
||||
}
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
write_unlock_irq(&device->client_data_lock);
|
||||
up_write(&lists_rwsem);
|
||||
|
||||
if (client->remove)
|
||||
@ -691,17 +752,18 @@ void ib_unregister_client(struct ib_client *client)
|
||||
found_context->data : NULL);
|
||||
|
||||
if (!found_context) {
|
||||
pr_warn("No client context found for %s/%s\n",
|
||||
device->name, client->name);
|
||||
dev_warn(&device->dev,
|
||||
"No client context found for %s\n",
|
||||
client->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
down_write(&lists_rwsem);
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
write_lock_irq(&device->client_data_lock);
|
||||
list_del(&found_context->list);
|
||||
kfree(found_context);
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
write_unlock_irq(&device->client_data_lock);
|
||||
up_write(&lists_rwsem);
|
||||
kfree(found_context);
|
||||
}
|
||||
|
||||
mutex_unlock(&device_mutex);
|
||||
@ -722,13 +784,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
|
||||
void *ret = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
read_lock_irqsave(&device->client_data_lock, flags);
|
||||
list_for_each_entry(context, &device->client_data_list, list)
|
||||
if (context->client == client) {
|
||||
ret = context->data;
|
||||
break;
|
||||
}
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
read_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -749,18 +811,18 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
|
||||
struct ib_client_data *context;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&device->client_data_lock, flags);
|
||||
write_lock_irqsave(&device->client_data_lock, flags);
|
||||
list_for_each_entry(context, &device->client_data_list, list)
|
||||
if (context->client == client) {
|
||||
context->data = data;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_warn("No client context found for %s/%s\n",
|
||||
device->name, client->name);
|
||||
dev_warn(&device->dev, "No client context found for %s\n",
|
||||
client->name);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
write_unlock_irqrestore(&device->client_data_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_set_client_data);
|
||||
|
||||
@ -1166,10 +1228,19 @@ static int __init ib_core_init(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ib_comp_unbound_wq =
|
||||
alloc_workqueue("ib-comp-unb-wq",
|
||||
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
|
||||
if (!ib_comp_unbound_wq) {
|
||||
ret = -ENOMEM;
|
||||
goto err_comp;
|
||||
}
|
||||
|
||||
ret = class_register(&ib_class);
|
||||
if (ret) {
|
||||
pr_warn("Couldn't create InfiniBand device class\n");
|
||||
goto err_comp;
|
||||
goto err_comp_unbound;
|
||||
}
|
||||
|
||||
ret = rdma_nl_init();
|
||||
@ -1218,6 +1289,8 @@ static int __init ib_core_init(void)
|
||||
rdma_nl_exit();
|
||||
err_sysfs:
|
||||
class_unregister(&ib_class);
|
||||
err_comp_unbound:
|
||||
destroy_workqueue(ib_comp_unbound_wq);
|
||||
err_comp:
|
||||
destroy_workqueue(ib_comp_wq);
|
||||
err:
|
||||
@ -1236,6 +1309,7 @@ static void __exit ib_core_cleanup(void)
|
||||
addr_cleanup();
|
||||
rdma_nl_exit();
|
||||
class_unregister(&ib_class);
|
||||
destroy_workqueue(ib_comp_unbound_wq);
|
||||
destroy_workqueue(ib_comp_wq);
|
||||
/* Make sure that any pending umem accounting work is done. */
|
||||
destroy_workqueue(ib_wq);
|
||||
|
@ -213,7 +213,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
|
||||
device = pd->device;
|
||||
if (!device->alloc_fmr || !device->dealloc_fmr ||
|
||||
!device->map_phys_fmr || !device->unmap_fmr) {
|
||||
pr_info(PFX "Device %s does not support FMRs\n", device->name);
|
||||
dev_info(&device->dev, "Device does not support FMRs\n");
|
||||
return ERR_PTR(-ENOSYS);
|
||||
}
|
||||
|
||||
@ -257,7 +257,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
|
||||
atomic_set(&pool->flush_ser, 0);
|
||||
init_waitqueue_head(&pool->force_wait);
|
||||
|
||||
pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
|
||||
pool->worker =
|
||||
kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
|
||||
if (IS_ERR(pool->worker)) {
|
||||
pr_warn(PFX "couldn't start cleanup kthread worker\n");
|
||||
ret = PTR_ERR(pool->worker);
|
||||
|
@ -509,7 +509,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
|
||||
cm_id->m_local_addr = cm_id->local_addr;
|
||||
cm_id->m_remote_addr = cm_id->remote_addr;
|
||||
|
||||
memcpy(pm_reg_msg.dev_name, cm_id->device->name,
|
||||
memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
|
||||
sizeof(pm_reg_msg.dev_name));
|
||||
memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
|
||||
sizeof(pm_reg_msg.if_name));
|
||||
|
@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
int ret2, qpn;
|
||||
u8 mgmt_class, vclass;
|
||||
|
||||
if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
|
||||
(qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
|
||||
return ERR_PTR(-EPROTONOSUPPORT);
|
||||
|
||||
/* Validate parameters */
|
||||
qpn = get_spl_qp_index(qp_type);
|
||||
if (qpn == -1) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: invalid QP Type %d\n",
|
||||
qp_type);
|
||||
dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
|
||||
__func__, qp_type);
|
||||
goto error1;
|
||||
}
|
||||
|
||||
if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: invalid RMPP Version %u\n",
|
||||
rmpp_version);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: invalid RMPP Version %u\n",
|
||||
__func__, rmpp_version);
|
||||
goto error1;
|
||||
}
|
||||
|
||||
/* Validate MAD registration request if supplied */
|
||||
if (mad_reg_req) {
|
||||
if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: invalid Class Version %u\n",
|
||||
mad_reg_req->mgmt_class_version);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: invalid Class Version %u\n",
|
||||
__func__,
|
||||
mad_reg_req->mgmt_class_version);
|
||||
goto error1;
|
||||
}
|
||||
if (!recv_handler) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: no recv_handler\n");
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: no recv_handler\n", __func__);
|
||||
goto error1;
|
||||
}
|
||||
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
|
||||
@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
*/
|
||||
if (mad_reg_req->mgmt_class !=
|
||||
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
|
||||
mad_reg_req->mgmt_class);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: Invalid Mgmt Class 0x%x\n",
|
||||
__func__, mad_reg_req->mgmt_class);
|
||||
goto error1;
|
||||
}
|
||||
} else if (mad_reg_req->mgmt_class == 0) {
|
||||
@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
* Class 0 is reserved in IBA and is used for
|
||||
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
|
||||
*/
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: Invalid Mgmt Class 0\n");
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: Invalid Mgmt Class 0\n",
|
||||
__func__);
|
||||
goto error1;
|
||||
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
|
||||
/*
|
||||
@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
* ensure supplied OUI is not zero
|
||||
*/
|
||||
if (!is_vendor_oui(mad_reg_req->oui)) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: No OUI specified for class 0x%x\n",
|
||||
mad_reg_req->mgmt_class);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: No OUI specified for class 0x%x\n",
|
||||
__func__,
|
||||
mad_reg_req->mgmt_class);
|
||||
goto error1;
|
||||
}
|
||||
}
|
||||
/* Make sure class supplied is consistent with RMPP */
|
||||
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
|
||||
if (rmpp_version) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
|
||||
mad_reg_req->mgmt_class);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: RMPP version for non-RMPP class 0x%x\n",
|
||||
__func__, mad_reg_req->mgmt_class);
|
||||
goto error1;
|
||||
}
|
||||
}
|
||||
@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
|
||||
(mad_reg_req->mgmt_class !=
|
||||
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
|
||||
mad_reg_req->mgmt_class);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: Invalid SM QP type: class 0x%x\n",
|
||||
__func__, mad_reg_req->mgmt_class);
|
||||
goto error1;
|
||||
}
|
||||
} else {
|
||||
@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
|
||||
(mad_reg_req->mgmt_class ==
|
||||
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
|
||||
mad_reg_req->mgmt_class);
|
||||
dev_dbg_ratelimited(&device->dev,
|
||||
"%s: Invalid GS QP type: class 0x%x\n",
|
||||
__func__, mad_reg_req->mgmt_class);
|
||||
goto error1;
|
||||
}
|
||||
}
|
||||
@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
|
||||
/* Validate device and port */
|
||||
port_priv = ib_get_mad_port(device, port_num);
|
||||
if (!port_priv) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: Invalid port %d\n",
|
||||
port_num);
|
||||
dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
|
||||
__func__, port_num);
|
||||
ret = ERR_PTR(-ENODEV);
|
||||
goto error1;
|
||||
}
|
||||
|
||||
/* Verify the QP requested is supported. For example, Ethernet devices
|
||||
* will not have QP0 */
|
||||
/* Verify the QP requested is supported. For example, Ethernet devices
|
||||
* will not have QP0.
|
||||
*/
|
||||
if (!port_priv->qp_info[qpn].qp) {
|
||||
dev_notice(&device->dev,
|
||||
"ib_register_mad_agent: QP %d not supported\n", qpn);
|
||||
dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
|
||||
__func__, qpn);
|
||||
ret = ERR_PTR(-EPROTONOSUPPORT);
|
||||
goto error1;
|
||||
}
|
||||
@ -2408,7 +2414,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
|
||||
}
|
||||
|
||||
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
|
||||
int timeout_ms)
|
||||
unsigned long timeout_ms)
|
||||
{
|
||||
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
|
||||
wait_for_response(mad_send_wr);
|
||||
@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device,
|
||||
cq_size *= 2;
|
||||
|
||||
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
|
||||
IB_POLL_WORKQUEUE);
|
||||
IB_POLL_UNBOUND_WORKQUEUE);
|
||||
if (IS_ERR(port_priv->cq)) {
|
||||
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
|
||||
ret = PTR_ERR(port_priv->cq);
|
||||
|
@ -221,6 +221,6 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
|
||||
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
|
||||
|
||||
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
|
||||
int timeout_ms);
|
||||
unsigned long timeout_ms);
|
||||
|
||||
#endif /* __IB_MAD_PRIV_H__ */
|
||||
|
@ -47,9 +47,9 @@ static struct {
|
||||
const struct rdma_nl_cbs *cb_table;
|
||||
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
|
||||
|
||||
int rdma_nl_chk_listeners(unsigned int group)
|
||||
bool rdma_nl_chk_listeners(unsigned int group)
|
||||
{
|
||||
return (netlink_has_listeners(nls, group)) ? 0 : -1;
|
||||
return netlink_has_listeners(nls, group);
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_nl_chk_listeners);
|
||||
|
||||
|
@ -179,7 +179,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
|
||||
{
|
||||
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
|
||||
return -EMSGSIZE;
|
||||
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
|
||||
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
|
||||
dev_name(&device->dev)))
|
||||
return -EMSGSIZE;
|
||||
|
||||
return 0;
|
||||
@ -645,6 +646,36 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
||||
struct ib_device *device;
|
||||
u32 index;
|
||||
int err;
|
||||
|
||||
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
|
||||
extack);
|
||||
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
||||
return -EINVAL;
|
||||
|
||||
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
||||
device = ib_device_get_by_index(index);
|
||||
if (!device)
|
||||
return -EINVAL;
|
||||
|
||||
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
|
||||
char name[IB_DEVICE_NAME_MAX] = {};
|
||||
|
||||
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
|
||||
IB_DEVICE_NAME_MAX);
|
||||
err = ib_device_rename(device, name);
|
||||
}
|
||||
|
||||
put_device(&device->dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int _nldev_get_dumpit(struct ib_device *device,
|
||||
struct sk_buff *skb,
|
||||
struct netlink_callback *cb,
|
||||
@ -1077,6 +1108,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
|
||||
.doit = nldev_get_doit,
|
||||
.dump = nldev_get_dumpit,
|
||||
},
|
||||
[RDMA_NLDEV_CMD_SET] = {
|
||||
.doit = nldev_set_doit,
|
||||
.flags = RDMA_NL_ADMIN_PERM,
|
||||
},
|
||||
[RDMA_NLDEV_CMD_PORT_GET] = {
|
||||
.doit = nldev_port_get_doit,
|
||||
.dump = nldev_port_get_dumpit,
|
||||
|
@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f)
|
||||
uverbs_uobject_put(uobj);
|
||||
}
|
||||
|
||||
static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
|
||||
{
|
||||
struct ib_device *ib_dev = ibcontext->device;
|
||||
struct task_struct *owning_process = NULL;
|
||||
struct mm_struct *owning_mm = NULL;
|
||||
|
||||
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
|
||||
if (!owning_process)
|
||||
return;
|
||||
|
||||
owning_mm = get_task_mm(owning_process);
|
||||
if (!owning_mm) {
|
||||
pr_info("no mm, disassociate ucontext is pending task termination\n");
|
||||
while (1) {
|
||||
put_task_struct(owning_process);
|
||||
usleep_range(1000, 2000);
|
||||
owning_process = get_pid_task(ibcontext->tgid,
|
||||
PIDTYPE_PID);
|
||||
if (!owning_process ||
|
||||
owning_process->state == TASK_DEAD) {
|
||||
pr_info("disassociate ucontext done, task was terminated\n");
|
||||
/* in case task was dead need to release the
|
||||
* task struct.
|
||||
*/
|
||||
if (owning_process)
|
||||
put_task_struct(owning_process);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
down_write(&owning_mm->mmap_sem);
|
||||
ib_dev->disassociate_ucontext(ibcontext);
|
||||
up_write(&owning_mm->mmap_sem);
|
||||
mmput(owning_mm);
|
||||
put_task_struct(owning_process);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the ucontext off the ufile and completely disconnect it from the
|
||||
* ib_device
|
||||
@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
|
||||
enum rdma_remove_reason reason)
|
||||
{
|
||||
struct ib_ucontext *ucontext = ufile->ucontext;
|
||||
struct ib_device *ib_dev = ucontext->device;
|
||||
int ret;
|
||||
|
||||
if (reason == RDMA_REMOVE_DRIVER_REMOVE)
|
||||
ufile_disassociate_ucontext(ucontext);
|
||||
/*
|
||||
* If we are closing the FD then the user mmap VMAs must have
|
||||
* already been destroyed as they hold on to the filep, otherwise
|
||||
* they need to be zap'd.
|
||||
*/
|
||||
if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
|
||||
uverbs_user_mmap_disassociate(ufile);
|
||||
if (ib_dev->disassociate_ucontext)
|
||||
ib_dev->disassociate_ucontext(ucontext);
|
||||
}
|
||||
|
||||
put_pid(ucontext->tgid);
|
||||
ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
|
||||
ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
|
||||
RDMACG_RESOURCE_HCA_HANDLE);
|
||||
|
||||
/*
|
||||
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
|
||||
* the error return.
|
||||
*/
|
||||
ret = ucontext->device->dealloc_ucontext(ucontext);
|
||||
ret = ib_dev->dealloc_ucontext(ucontext);
|
||||
WARN_ON(ret);
|
||||
|
||||
ufile->ucontext = NULL;
|
||||
|
@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi);
|
||||
void uverbs_destroy_api(struct uverbs_api *uapi);
|
||||
void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
|
||||
unsigned int num_attrs);
|
||||
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
|
||||
|
||||
#endif /* RDMA_CORE_H */
|
||||
|
@ -50,8 +50,7 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
|
||||
|
||||
dev = container_of(res, struct ib_device, res);
|
||||
pr_err("restrack: %s", CUT_HERE);
|
||||
pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
|
||||
dev->name);
|
||||
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
|
||||
hash_for_each(res->hash, bkt, e, node) {
|
||||
if (rdma_is_kernel_res(e)) {
|
||||
owner = e->kern_name;
|
||||
@ -156,6 +155,21 @@ static bool res_is_user(struct rdma_restrack_entry *res)
|
||||
}
|
||||
}
|
||||
|
||||
void rdma_restrack_set_task(struct rdma_restrack_entry *res,
|
||||
const char *caller)
|
||||
{
|
||||
if (caller) {
|
||||
res->kern_name = caller;
|
||||
return;
|
||||
}
|
||||
|
||||
if (res->task)
|
||||
put_task_struct(res->task);
|
||||
get_task_struct(current);
|
||||
res->task = current;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_restrack_set_task);
|
||||
|
||||
void rdma_restrack_add(struct rdma_restrack_entry *res)
|
||||
{
|
||||
struct ib_device *dev = res_to_dev(res);
|
||||
@ -168,7 +182,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
|
||||
|
||||
if (res_is_user(res)) {
|
||||
if (!res->task)
|
||||
rdma_restrack_set_task(res, current);
|
||||
rdma_restrack_set_task(res, NULL);
|
||||
res->kern_name = NULL;
|
||||
} else {
|
||||
set_kern_name(res);
|
||||
@ -209,7 +223,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
|
||||
struct ib_device *dev;
|
||||
|
||||
if (!res->valid)
|
||||
return;
|
||||
goto out;
|
||||
|
||||
dev = res_to_dev(res);
|
||||
if (!dev)
|
||||
@ -222,8 +236,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
|
||||
down_write(&dev->res.rwsem);
|
||||
hash_del(&res->node);
|
||||
res->valid = false;
|
||||
if (res->task)
|
||||
put_task_struct(res->task);
|
||||
up_write(&dev->res.rwsem);
|
||||
|
||||
out:
|
||||
if (res->task) {
|
||||
put_task_struct(res->task);
|
||||
res->task = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_restrack_del);
|
||||
|
@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
|
||||
}
|
||||
|
||||
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
|
||||
struct ib_device *device, u8 port_num,
|
||||
u8 method,
|
||||
struct ib_device *device, u8 port_num, u8 method,
|
||||
struct ib_sa_mcmember_rec *rec,
|
||||
ib_sa_comp_mask comp_mask,
|
||||
int timeout_ms, gfp_t gfp_mask,
|
||||
unsigned long timeout_ms, gfp_t gfp_mask,
|
||||
void (*callback)(int status,
|
||||
struct ib_sa_mcmember_rec *resp,
|
||||
void *context),
|
||||
void *context,
|
||||
struct ib_sa_query **sa_query);
|
||||
void *context, struct ib_sa_query **sa_query);
|
||||
|
||||
int mcast_init(void);
|
||||
void mcast_cleanup(void);
|
||||
|
@ -761,7 +761,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
|
||||
|
||||
/* Construct the family header first */
|
||||
header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
|
||||
memcpy(header->device_name, query->port->agent->device->name,
|
||||
memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
|
||||
LS_DEVICE_NAME_MAX);
|
||||
header->port_num = query->port->port_num;
|
||||
|
||||
@ -835,7 +835,6 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
|
||||
struct sk_buff *skb = NULL;
|
||||
struct nlmsghdr *nlh;
|
||||
void *data;
|
||||
int ret = 0;
|
||||
struct ib_sa_mad *mad;
|
||||
int len;
|
||||
|
||||
@ -862,13 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
|
||||
/* Repair the nlmsg header length */
|
||||
nlmsg_end(skb, nlh);
|
||||
|
||||
ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
|
||||
if (!ret)
|
||||
ret = len;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
|
||||
}
|
||||
|
||||
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
|
||||
@ -891,14 +884,12 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
|
||||
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
|
||||
|
||||
ret = ib_nl_send_msg(query, gfp_mask);
|
||||
if (ret <= 0) {
|
||||
if (ret) {
|
||||
ret = -EIO;
|
||||
/* Remove the request */
|
||||
spin_lock_irqsave(&ib_nl_request_lock, flags);
|
||||
list_del(&query->list);
|
||||
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -1227,46 +1218,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
|
||||
return src_path_mask;
|
||||
}
|
||||
|
||||
static int roce_resolve_route_from_path(struct sa_path_rec *rec,
|
||||
const struct ib_gid_attr *attr)
|
||||
{
|
||||
struct rdma_dev_addr dev_addr = {};
|
||||
union {
|
||||
struct sockaddr _sockaddr;
|
||||
struct sockaddr_in _sockaddr_in;
|
||||
struct sockaddr_in6 _sockaddr_in6;
|
||||
} sgid_addr, dgid_addr;
|
||||
int ret;
|
||||
|
||||
if (rec->roce.route_resolved)
|
||||
return 0;
|
||||
if (!attr || !attr->ndev)
|
||||
return -EINVAL;
|
||||
|
||||
dev_addr.bound_dev_if = attr->ndev->ifindex;
|
||||
/* TODO: Use net from the ib_gid_attr once it is added to it,
|
||||
* until than, limit itself to init_net.
|
||||
*/
|
||||
dev_addr.net = &init_net;
|
||||
|
||||
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
|
||||
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
|
||||
|
||||
/* validate the route */
|
||||
ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
|
||||
&dgid_addr._sockaddr, &dev_addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
|
||||
dev_addr.network == RDMA_NETWORK_IPV6) &&
|
||||
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
|
||||
return -EINVAL;
|
||||
|
||||
rec->roce.route_resolved = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
|
||||
struct sa_path_rec *rec,
|
||||
struct rdma_ah_attr *ah_attr,
|
||||
@ -1409,7 +1360,8 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
|
||||
spin_unlock_irqrestore(&tid_lock, flags);
|
||||
}
|
||||
|
||||
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
|
||||
static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
bool preload = gfpflags_allow_blocking(gfp_mask);
|
||||
unsigned long flags;
|
||||
@ -1433,7 +1385,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
|
||||
|
||||
if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
|
||||
(!(query->flags & IB_SA_QUERY_OPA))) {
|
||||
if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
|
||||
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
|
||||
if (!ib_nl_make_request(query, gfp_mask))
|
||||
return id;
|
||||
}
|
||||
@ -1599,7 +1551,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
|
||||
struct ib_device *device, u8 port_num,
|
||||
struct sa_path_rec *rec,
|
||||
ib_sa_comp_mask comp_mask,
|
||||
int timeout_ms, gfp_t gfp_mask,
|
||||
unsigned long timeout_ms, gfp_t gfp_mask,
|
||||
void (*callback)(int status,
|
||||
struct sa_path_rec *resp,
|
||||
void *context),
|
||||
@ -1753,7 +1705,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
|
||||
struct ib_device *device, u8 port_num, u8 method,
|
||||
struct ib_sa_service_rec *rec,
|
||||
ib_sa_comp_mask comp_mask,
|
||||
int timeout_ms, gfp_t gfp_mask,
|
||||
unsigned long timeout_ms, gfp_t gfp_mask,
|
||||
void (*callback)(int status,
|
||||
struct ib_sa_service_rec *resp,
|
||||
void *context),
|
||||
@ -1850,7 +1802,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
|
||||
u8 method,
|
||||
struct ib_sa_mcmember_rec *rec,
|
||||
ib_sa_comp_mask comp_mask,
|
||||
int timeout_ms, gfp_t gfp_mask,
|
||||
unsigned long timeout_ms, gfp_t gfp_mask,
|
||||
void (*callback)(int status,
|
||||
struct ib_sa_mcmember_rec *resp,
|
||||
void *context),
|
||||
@ -1941,7 +1893,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
|
||||
struct ib_device *device, u8 port_num,
|
||||
struct ib_sa_guidinfo_rec *rec,
|
||||
ib_sa_comp_mask comp_mask, u8 method,
|
||||
int timeout_ms, gfp_t gfp_mask,
|
||||
unsigned long timeout_ms, gfp_t gfp_mask,
|
||||
void (*callback)(int status,
|
||||
struct ib_sa_guidinfo_rec *resp,
|
||||
void *context),
|
||||
@ -2108,7 +2060,7 @@ static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
|
||||
}
|
||||
|
||||
static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
|
||||
int timeout_ms,
|
||||
unsigned long timeout_ms,
|
||||
void (*callback)(void *context),
|
||||
void *context,
|
||||
struct ib_sa_query **sa_query)
|
||||
|
@ -685,9 +685,8 @@ static int ib_mad_agent_security_change(struct notifier_block *nb,
|
||||
if (event != LSM_POLICY_CHANGE)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security,
|
||||
ag->device->name,
|
||||
ag->port_num);
|
||||
ag->smp_allowed = !security_ib_endport_manage_subnet(
|
||||
ag->security, dev_name(&ag->device->dev), ag->port_num);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
@ -708,7 +707,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
|
||||
return 0;
|
||||
|
||||
ret = security_ib_endport_manage_subnet(agent->security,
|
||||
agent->device->name,
|
||||
dev_name(&agent->device->dev),
|
||||
agent->port_num);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -512,7 +512,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
|
||||
ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
|
||||
40 + offset / 8, sizeof(data));
|
||||
if (ret < 0)
|
||||
return sprintf(buf, "N/A (no PMA)\n");
|
||||
return ret;
|
||||
|
||||
switch (width) {
|
||||
case 4:
|
||||
@ -1036,7 +1036,7 @@ static int add_port(struct ib_device *device, int port_num,
|
||||
p->port_num = port_num;
|
||||
|
||||
ret = kobject_init_and_add(&p->kobj, &port_type,
|
||||
device->ports_parent,
|
||||
device->ports_kobj,
|
||||
"%d", port_num);
|
||||
if (ret) {
|
||||
kfree(p);
|
||||
@ -1057,10 +1057,12 @@ static int add_port(struct ib_device *device, int port_num,
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
p->pma_table = get_counter_table(device, port_num);
|
||||
ret = sysfs_create_group(&p->kobj, p->pma_table);
|
||||
if (ret)
|
||||
goto err_put_gid_attrs;
|
||||
if (device->process_mad) {
|
||||
p->pma_table = get_counter_table(device, port_num);
|
||||
ret = sysfs_create_group(&p->kobj, p->pma_table);
|
||||
if (ret)
|
||||
goto err_put_gid_attrs;
|
||||
}
|
||||
|
||||
p->gid_group.name = "gids";
|
||||
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
|
||||
@ -1118,9 +1120,9 @@ static int add_port(struct ib_device *device, int port_num,
|
||||
}
|
||||
|
||||
/*
|
||||
* If port == 0, it means we have only one port and the parent
|
||||
* device, not this port device, should be the holder of the
|
||||
* hw_counters
|
||||
* If port == 0, it means hw_counters are per device and not per
|
||||
* port, so holder should be device. Therefore skip per port conunter
|
||||
* initialization.
|
||||
*/
|
||||
if (device->alloc_hw_stats && port_num)
|
||||
setup_hw_stats(device, p, port_num);
|
||||
@ -1173,7 +1175,8 @@ static int add_port(struct ib_device *device, int port_num,
|
||||
p->gid_group.attrs = NULL;
|
||||
|
||||
err_remove_pma:
|
||||
sysfs_remove_group(&p->kobj, p->pma_table);
|
||||
if (p->pma_table)
|
||||
sysfs_remove_group(&p->kobj, p->pma_table);
|
||||
|
||||
err_put_gid_attrs:
|
||||
kobject_put(&p->gid_attr_group->kobj);
|
||||
@ -1183,7 +1186,7 @@ static int add_port(struct ib_device *device, int port_num,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t show_node_type(struct device *device,
|
||||
static ssize_t node_type_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
@ -1198,8 +1201,9 @@ static ssize_t show_node_type(struct device *device,
|
||||
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
|
||||
}
|
||||
}
|
||||
static DEVICE_ATTR_RO(node_type);
|
||||
|
||||
static ssize_t show_sys_image_guid(struct device *device,
|
||||
static ssize_t sys_image_guid_show(struct device *device,
|
||||
struct device_attribute *dev_attr, char *buf)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
@ -1210,8 +1214,9 @@ static ssize_t show_sys_image_guid(struct device *device,
|
||||
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
|
||||
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
|
||||
}
|
||||
static DEVICE_ATTR_RO(sys_image_guid);
|
||||
|
||||
static ssize_t show_node_guid(struct device *device,
|
||||
static ssize_t node_guid_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
@ -1222,8 +1227,9 @@ static ssize_t show_node_guid(struct device *device,
|
||||
be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
|
||||
be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
|
||||
}
|
||||
static DEVICE_ATTR_RO(node_guid);
|
||||
|
||||
static ssize_t show_node_desc(struct device *device,
|
||||
static ssize_t node_desc_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
@ -1231,9 +1237,9 @@ static ssize_t show_node_desc(struct device *device,
|
||||
return sprintf(buf, "%.64s\n", dev->node_desc);
|
||||
}
|
||||
|
||||
static ssize_t set_node_desc(struct device *device,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
static ssize_t node_desc_store(struct device *device,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
struct ib_device_modify desc = {};
|
||||
@ -1249,8 +1255,9 @@ static ssize_t set_node_desc(struct device *device,
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR_RW(node_desc);
|
||||
|
||||
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
|
||||
static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct ib_device *dev = container_of(device, struct ib_device, dev);
|
||||
@ -1259,19 +1266,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
|
||||
strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
|
||||
return strlen(buf);
|
||||
}
|
||||
static DEVICE_ATTR_RO(fw_ver);
|
||||
|
||||
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
|
||||
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
|
||||
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
|
||||
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
|
||||
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
|
||||
static struct attribute *ib_dev_attrs[] = {
|
||||
&dev_attr_node_type.attr,
|
||||
&dev_attr_node_guid.attr,
|
||||
&dev_attr_sys_image_guid.attr,
|
||||
&dev_attr_fw_ver.attr,
|
||||
&dev_attr_node_desc.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct device_attribute *ib_class_attributes[] = {
|
||||
&dev_attr_node_type,
|
||||
&dev_attr_sys_image_guid,
|
||||
&dev_attr_node_guid,
|
||||
&dev_attr_node_desc,
|
||||
&dev_attr_fw_ver,
|
||||
static const struct attribute_group dev_attr_group = {
|
||||
.attrs = ib_dev_attrs,
|
||||
};
|
||||
|
||||
static void free_port_list_attributes(struct ib_device *device)
|
||||
@ -1285,7 +1292,9 @@ static void free_port_list_attributes(struct ib_device *device)
|
||||
kfree(port->hw_stats);
|
||||
free_hsag(&port->kobj, port->hw_stats_ag);
|
||||
}
|
||||
sysfs_remove_group(p, port->pma_table);
|
||||
|
||||
if (port->pma_table)
|
||||
sysfs_remove_group(p, port->pma_table);
|
||||
sysfs_remove_group(p, &port->pkey_group);
|
||||
sysfs_remove_group(p, &port->gid_group);
|
||||
sysfs_remove_group(&port->gid_attr_group->kobj,
|
||||
@ -1296,7 +1305,7 @@ static void free_port_list_attributes(struct ib_device *device)
|
||||
kobject_put(p);
|
||||
}
|
||||
|
||||
kobject_put(device->ports_parent);
|
||||
kobject_put(device->ports_kobj);
|
||||
}
|
||||
|
||||
int ib_device_register_sysfs(struct ib_device *device,
|
||||
@ -1307,23 +1316,15 @@ int ib_device_register_sysfs(struct ib_device *device,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ret = dev_set_name(class_dev, "%s", device->name);
|
||||
if (ret)
|
||||
return ret;
|
||||
device->groups[0] = &dev_attr_group;
|
||||
class_dev->groups = device->groups;
|
||||
|
||||
ret = device_add(class_dev);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
|
||||
ret = device_create_file(class_dev, ib_class_attributes[i]);
|
||||
if (ret)
|
||||
goto err_unregister;
|
||||
}
|
||||
|
||||
device->ports_parent = kobject_create_and_add("ports",
|
||||
&class_dev->kobj);
|
||||
if (!device->ports_parent) {
|
||||
device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
|
||||
if (!device->ports_kobj) {
|
||||
ret = -ENOMEM;
|
||||
goto err_put;
|
||||
}
|
||||
@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device,
|
||||
|
||||
err_put:
|
||||
free_port_list_attributes(device);
|
||||
|
||||
err_unregister:
|
||||
device_del(class_dev);
|
||||
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ib_device_unregister_sysfs(struct ib_device *device)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Hold kobject until ib_dealloc_device() */
|
||||
kobject_get(&device->dev.kobj);
|
||||
/* Hold device until ib_dealloc_device() */
|
||||
get_device(&device->dev);
|
||||
|
||||
free_port_list_attributes(device);
|
||||
|
||||
@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device)
|
||||
free_hsag(&device->dev.kobj, device->hw_stats_ag);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
|
||||
device_remove_file(&device->dev, ib_class_attributes[i]);
|
||||
|
||||
device_unregister(&device->dev);
|
||||
}
|
||||
|
@ -85,7 +85,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||
struct page **page_list;
|
||||
struct vm_area_struct **vma_list;
|
||||
unsigned long lock_limit;
|
||||
unsigned long new_pinned;
|
||||
unsigned long cur_base;
|
||||
struct mm_struct *mm;
|
||||
unsigned long npages;
|
||||
int ret;
|
||||
int i;
|
||||
@ -107,25 +109,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||
if (!can_do_mlock())
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
umem = kzalloc(sizeof *umem, GFP_KERNEL);
|
||||
if (!umem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (access & IB_ACCESS_ON_DEMAND) {
|
||||
umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
|
||||
if (!umem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
umem->is_odp = 1;
|
||||
} else {
|
||||
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
|
||||
if (!umem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
umem->context = context;
|
||||
umem->length = size;
|
||||
umem->address = addr;
|
||||
umem->page_shift = PAGE_SHIFT;
|
||||
umem->writable = ib_access_writable(access);
|
||||
umem->owning_mm = mm = current->mm;
|
||||
mmgrab(mm);
|
||||
|
||||
if (access & IB_ACCESS_ON_DEMAND) {
|
||||
ret = ib_umem_odp_get(context, umem, access);
|
||||
ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
|
||||
if (ret)
|
||||
goto umem_kfree;
|
||||
return umem;
|
||||
}
|
||||
|
||||
umem->odp_data = NULL;
|
||||
|
||||
/* We assume the memory is from hugetlb until proved otherwise */
|
||||
umem->hugetlb = 1;
|
||||
|
||||
@ -144,25 +153,25 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||
umem->hugetlb = 0;
|
||||
|
||||
npages = ib_umem_num_pages(umem);
|
||||
if (npages == 0 || npages > UINT_MAX) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
current->mm->pinned_vm += npages;
|
||||
if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
down_write(&mm->mmap_sem);
|
||||
if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
|
||||
(new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
|
||||
up_write(&mm->mmap_sem);
|
||||
ret = -ENOMEM;
|
||||
goto vma;
|
||||
goto out;
|
||||
}
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
mm->pinned_vm = new_pinned;
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
cur_base = addr & PAGE_MASK;
|
||||
|
||||
if (npages == 0 || npages > UINT_MAX) {
|
||||
ret = -EINVAL;
|
||||
goto vma;
|
||||
}
|
||||
|
||||
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto vma;
|
||||
@ -172,14 +181,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||
|
||||
sg_list_start = umem->sg_head.sgl;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
while (npages) {
|
||||
down_read(&mm->mmap_sem);
|
||||
ret = get_user_pages_longterm(cur_base,
|
||||
min_t(unsigned long, npages,
|
||||
PAGE_SIZE / sizeof (struct page *)),
|
||||
gup_flags, page_list, vma_list);
|
||||
if (ret < 0) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
up_read(&mm->mmap_sem);
|
||||
goto umem_release;
|
||||
}
|
||||
|
||||
@ -187,17 +196,20 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||
cur_base += ret * PAGE_SIZE;
|
||||
npages -= ret;
|
||||
|
||||
/* Continue to hold the mmap_sem as vma_list access
|
||||
* needs to be protected.
|
||||
*/
|
||||
for_each_sg(sg_list_start, sg, ret, i) {
|
||||
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
|
||||
umem->hugetlb = 0;
|
||||
|
||||
sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
|
||||
}
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
/* preparing for next loop */
|
||||
sg_list_start = sg;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
umem->nmap = ib_dma_map_sg_attrs(context->device,
|
||||
umem->sg_head.sgl,
|
||||
@ -216,29 +228,40 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||
umem_release:
|
||||
__ib_umem_release(context->device, umem, 0);
|
||||
vma:
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
current->mm->pinned_vm -= ib_umem_num_pages(umem);
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
down_write(&mm->mmap_sem);
|
||||
mm->pinned_vm -= ib_umem_num_pages(umem);
|
||||
up_write(&mm->mmap_sem);
|
||||
out:
|
||||
if (vma_list)
|
||||
free_page((unsigned long) vma_list);
|
||||
free_page((unsigned long) page_list);
|
||||
umem_kfree:
|
||||
if (ret)
|
||||
if (ret) {
|
||||
mmdrop(umem->owning_mm);
|
||||
kfree(umem);
|
||||
}
|
||||
return ret ? ERR_PTR(ret) : umem;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_get);
|
||||
|
||||
static void ib_umem_account(struct work_struct *work)
|
||||
static void __ib_umem_release_tail(struct ib_umem *umem)
|
||||
{
|
||||
mmdrop(umem->owning_mm);
|
||||
if (umem->is_odp)
|
||||
kfree(to_ib_umem_odp(umem));
|
||||
else
|
||||
kfree(umem);
|
||||
}
|
||||
|
||||
static void ib_umem_release_defer(struct work_struct *work)
|
||||
{
|
||||
struct ib_umem *umem = container_of(work, struct ib_umem, work);
|
||||
|
||||
down_write(&umem->mm->mmap_sem);
|
||||
umem->mm->pinned_vm -= umem->diff;
|
||||
up_write(&umem->mm->mmap_sem);
|
||||
mmput(umem->mm);
|
||||
kfree(umem);
|
||||
down_write(&umem->owning_mm->mmap_sem);
|
||||
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
|
||||
up_write(&umem->owning_mm->mmap_sem);
|
||||
|
||||
__ib_umem_release_tail(umem);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -248,52 +271,36 @@ static void ib_umem_account(struct work_struct *work)
|
||||
void ib_umem_release(struct ib_umem *umem)
|
||||
{
|
||||
struct ib_ucontext *context = umem->context;
|
||||
struct mm_struct *mm;
|
||||
struct task_struct *task;
|
||||
unsigned long diff;
|
||||
|
||||
if (umem->odp_data) {
|
||||
ib_umem_odp_release(umem);
|
||||
if (umem->is_odp) {
|
||||
ib_umem_odp_release(to_ib_umem_odp(umem));
|
||||
__ib_umem_release_tail(umem);
|
||||
return;
|
||||
}
|
||||
|
||||
__ib_umem_release(umem->context->device, umem, 1);
|
||||
|
||||
task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
|
||||
if (!task)
|
||||
goto out;
|
||||
mm = get_task_mm(task);
|
||||
put_task_struct(task);
|
||||
if (!mm)
|
||||
goto out;
|
||||
|
||||
diff = ib_umem_num_pages(umem);
|
||||
|
||||
/*
|
||||
* We may be called with the mm's mmap_sem already held. This
|
||||
* can happen when a userspace munmap() is the call that drops
|
||||
* the last reference to our file and calls our release
|
||||
* method. If there are memory regions to destroy, we'll end
|
||||
* up here and not be able to take the mmap_sem. In that case
|
||||
* we defer the vm_locked accounting to the system workqueue.
|
||||
* we defer the vm_locked accounting a workqueue.
|
||||
*/
|
||||
if (context->closing) {
|
||||
if (!down_write_trylock(&mm->mmap_sem)) {
|
||||
INIT_WORK(&umem->work, ib_umem_account);
|
||||
umem->mm = mm;
|
||||
umem->diff = diff;
|
||||
|
||||
if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
|
||||
INIT_WORK(&umem->work, ib_umem_release_defer);
|
||||
queue_work(ib_wq, &umem->work);
|
||||
return;
|
||||
}
|
||||
} else
|
||||
down_write(&mm->mmap_sem);
|
||||
} else {
|
||||
down_write(&umem->owning_mm->mmap_sem);
|
||||
}
|
||||
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
|
||||
up_write(&umem->owning_mm->mmap_sem);
|
||||
|
||||
mm->pinned_vm -= diff;
|
||||
up_write(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
out:
|
||||
kfree(umem);
|
||||
__ib_umem_release_tail(umem);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_release);
|
||||
|
||||
@ -303,7 +310,7 @@ int ib_umem_page_count(struct ib_umem *umem)
|
||||
int n;
|
||||
struct scatterlist *sg;
|
||||
|
||||
if (umem->odp_data)
|
||||
if (umem->is_odp)
|
||||
return ib_umem_num_pages(umem);
|
||||
|
||||
n = 0;
|
||||
|
@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n)
|
||||
struct ib_umem_odp *umem_odp =
|
||||
container_of(n, struct ib_umem_odp, interval_tree);
|
||||
|
||||
return ib_umem_start(umem_odp->umem);
|
||||
return ib_umem_start(&umem_odp->umem);
|
||||
}
|
||||
|
||||
/* Note that the representation of the intervals in the interval tree
|
||||
@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n)
|
||||
struct ib_umem_odp *umem_odp =
|
||||
container_of(n, struct ib_umem_odp, interval_tree);
|
||||
|
||||
return ib_umem_end(umem_odp->umem) - 1;
|
||||
return ib_umem_end(&umem_odp->umem) - 1;
|
||||
}
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
|
||||
node_start, node_last, static, rbt_ib_umem)
|
||||
|
||||
static void ib_umem_notifier_start_account(struct ib_umem *item)
|
||||
static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
mutex_lock(&item->odp_data->umem_mutex);
|
||||
|
||||
/* Only update private counters for this umem if it has them.
|
||||
* Otherwise skip it. All page faults will be delayed for this umem. */
|
||||
if (item->odp_data->mn_counters_active) {
|
||||
int notifiers_count = item->odp_data->notifiers_count++;
|
||||
|
||||
if (notifiers_count == 0)
|
||||
/* Initialize the completion object for waiting on
|
||||
* notifiers. Since notifier_count is zero, no one
|
||||
* should be waiting right now. */
|
||||
reinit_completion(&item->odp_data->notifier_completion);
|
||||
}
|
||||
mutex_unlock(&item->odp_data->umem_mutex);
|
||||
}
|
||||
|
||||
static void ib_umem_notifier_end_account(struct ib_umem *item)
|
||||
{
|
||||
mutex_lock(&item->odp_data->umem_mutex);
|
||||
|
||||
/* Only update private counters for this umem if it has them.
|
||||
* Otherwise skip it. All page faults will be delayed for this umem. */
|
||||
if (item->odp_data->mn_counters_active) {
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
if (umem_odp->notifiers_count++ == 0)
|
||||
/*
|
||||
* This sequence increase will notify the QP page fault that
|
||||
* the page that is going to be mapped in the spte could have
|
||||
* been freed.
|
||||
* Initialize the completion object for waiting on
|
||||
* notifiers. Since notifier_count is zero, no one should be
|
||||
* waiting right now.
|
||||
*/
|
||||
++item->odp_data->notifiers_seq;
|
||||
if (--item->odp_data->notifiers_count == 0)
|
||||
complete_all(&item->odp_data->notifier_completion);
|
||||
}
|
||||
mutex_unlock(&item->odp_data->umem_mutex);
|
||||
reinit_completion(&umem_odp->notifier_completion);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
}
|
||||
|
||||
/* Account for a new mmu notifier in an ib_ucontext. */
|
||||
static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
|
||||
static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
atomic_inc(&context->notifier_count);
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
/*
|
||||
* This sequence increase will notify the QP page fault that the page
|
||||
* that is going to be mapped in the spte could have been freed.
|
||||
*/
|
||||
++umem_odp->notifiers_seq;
|
||||
if (--umem_odp->notifiers_count == 0)
|
||||
complete_all(&umem_odp->notifier_completion);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
}
|
||||
|
||||
/* Account for a terminating mmu notifier in an ib_ucontext.
|
||||
*
|
||||
* Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
|
||||
* the function takes the semaphore itself. */
|
||||
static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
|
||||
static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
|
||||
u64 start, u64 end, void *cookie)
|
||||
{
|
||||
int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
|
||||
if (zero_notifiers &&
|
||||
!list_empty(&context->no_private_counters)) {
|
||||
/* No currently running mmu notifiers. Now is the chance to
|
||||
* add private accounting to all previously added umems. */
|
||||
struct ib_umem_odp *odp_data, *next;
|
||||
|
||||
/* Prevent concurrent mmu notifiers from working on the
|
||||
* no_private_counters list. */
|
||||
down_write(&context->umem_rwsem);
|
||||
|
||||
/* Read the notifier_count again, with the umem_rwsem
|
||||
* semaphore taken for write. */
|
||||
if (!atomic_read(&context->notifier_count)) {
|
||||
list_for_each_entry_safe(odp_data, next,
|
||||
&context->no_private_counters,
|
||||
no_private_counters) {
|
||||
mutex_lock(&odp_data->umem_mutex);
|
||||
odp_data->mn_counters_active = true;
|
||||
list_del(&odp_data->no_private_counters);
|
||||
complete_all(&odp_data->notifier_completion);
|
||||
mutex_unlock(&odp_data->umem_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
up_write(&context->umem_rwsem);
|
||||
}
|
||||
}
|
||||
|
||||
static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
|
||||
u64 end, void *cookie) {
|
||||
/*
|
||||
* Increase the number of notifiers running, to
|
||||
* prevent any further fault handling on this MR.
|
||||
*/
|
||||
ib_umem_notifier_start_account(item);
|
||||
item->odp_data->dying = 1;
|
||||
ib_umem_notifier_start_account(umem_odp);
|
||||
umem_odp->dying = 1;
|
||||
/* Make sure that the fact the umem is dying is out before we release
|
||||
* all pending page faults. */
|
||||
smp_wmb();
|
||||
complete_all(&item->odp_data->notifier_completion);
|
||||
item->context->invalidate_range(item, ib_umem_start(item),
|
||||
ib_umem_end(item));
|
||||
complete_all(&umem_odp->notifier_completion);
|
||||
umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
|
||||
ib_umem_end(umem));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ib_umem_notifier_release(struct mmu_notifier *mn,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
|
||||
struct ib_ucontext_per_mm *per_mm =
|
||||
container_of(mn, struct ib_ucontext_per_mm, mn);
|
||||
|
||||
if (!context->invalidate_range)
|
||||
return;
|
||||
|
||||
ib_ucontext_notifier_start_account(context);
|
||||
down_read(&context->umem_rwsem);
|
||||
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
|
||||
ULLONG_MAX,
|
||||
ib_umem_notifier_release_trampoline,
|
||||
true,
|
||||
NULL);
|
||||
up_read(&context->umem_rwsem);
|
||||
down_read(&per_mm->umem_rwsem);
|
||||
if (per_mm->active)
|
||||
rbt_ib_umem_for_each_in_range(
|
||||
&per_mm->umem_tree, 0, ULLONG_MAX,
|
||||
ib_umem_notifier_release_trampoline, true, NULL);
|
||||
up_read(&per_mm->umem_rwsem);
|
||||
}
|
||||
|
||||
static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
|
||||
static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
|
||||
u64 end, void *cookie)
|
||||
{
|
||||
ib_umem_notifier_start_account(item);
|
||||
item->context->invalidate_range(item, start, start + PAGE_SIZE);
|
||||
item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
|
||||
ib_umem_notifier_end_account(item);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
|
||||
u64 end, void *cookie)
|
||||
static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
|
||||
u64 start, u64 end, void *cookie)
|
||||
{
|
||||
ib_umem_notifier_start_account(item);
|
||||
item->context->invalidate_range(item, start, end);
|
||||
item->umem.context->invalidate_range(item, start, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
unsigned long end,
|
||||
bool blockable)
|
||||
{
|
||||
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
|
||||
int ret;
|
||||
|
||||
if (!context->invalidate_range)
|
||||
return 0;
|
||||
struct ib_ucontext_per_mm *per_mm =
|
||||
container_of(mn, struct ib_ucontext_per_mm, mn);
|
||||
|
||||
if (blockable)
|
||||
down_read(&context->umem_rwsem);
|
||||
else if (!down_read_trylock(&context->umem_rwsem))
|
||||
down_read(&per_mm->umem_rwsem);
|
||||
else if (!down_read_trylock(&per_mm->umem_rwsem))
|
||||
return -EAGAIN;
|
||||
|
||||
ib_ucontext_notifier_start_account(context);
|
||||
ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
|
||||
end,
|
||||
invalidate_range_start_trampoline,
|
||||
blockable, NULL);
|
||||
up_read(&context->umem_rwsem);
|
||||
if (!per_mm->active) {
|
||||
up_read(&per_mm->umem_rwsem);
|
||||
/*
|
||||
* At this point active is permanently set and visible to this
|
||||
* CPU without a lock, that fact is relied on to skip the unlock
|
||||
* in range_end.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
|
||||
invalidate_range_start_trampoline,
|
||||
blockable, NULL);
|
||||
}
|
||||
|
||||
static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
|
||||
static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
|
||||
u64 end, void *cookie)
|
||||
{
|
||||
ib_umem_notifier_end_account(item);
|
||||
@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
|
||||
struct ib_ucontext_per_mm *per_mm =
|
||||
container_of(mn, struct ib_ucontext_per_mm, mn);
|
||||
|
||||
if (!context->invalidate_range)
|
||||
if (unlikely(!per_mm->active))
|
||||
return;
|
||||
|
||||
/*
|
||||
* TODO: we currently bail out if there is any sleepable work to be done
|
||||
* in ib_umem_notifier_invalidate_range_start so we shouldn't really block
|
||||
* here. But this is ugly and fragile.
|
||||
*/
|
||||
down_read(&context->umem_rwsem);
|
||||
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
|
||||
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
|
||||
end,
|
||||
invalidate_range_end_trampoline, true, NULL);
|
||||
up_read(&context->umem_rwsem);
|
||||
ib_ucontext_notifier_end_account(context);
|
||||
up_read(&per_mm->umem_rwsem);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops ib_umem_notifiers = {
|
||||
@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
|
||||
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
|
||||
};
|
||||
|
||||
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
|
||||
unsigned long addr,
|
||||
size_t size)
|
||||
static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
struct ib_umem *umem;
|
||||
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
|
||||
down_write(&per_mm->umem_rwsem);
|
||||
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
||||
rbt_ib_umem_insert(&umem_odp->interval_tree,
|
||||
&per_mm->umem_tree);
|
||||
up_write(&per_mm->umem_rwsem);
|
||||
}
|
||||
|
||||
static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
|
||||
down_write(&per_mm->umem_rwsem);
|
||||
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
||||
rbt_ib_umem_remove(&umem_odp->interval_tree,
|
||||
&per_mm->umem_tree);
|
||||
complete_all(&umem_odp->notifier_completion);
|
||||
|
||||
up_write(&per_mm->umem_rwsem);
|
||||
}
|
||||
|
||||
static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct ib_ucontext_per_mm *per_mm;
|
||||
int ret;
|
||||
|
||||
per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
|
||||
if (!per_mm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
per_mm->context = ctx;
|
||||
per_mm->mm = mm;
|
||||
per_mm->umem_tree = RB_ROOT_CACHED;
|
||||
init_rwsem(&per_mm->umem_rwsem);
|
||||
per_mm->active = ctx->invalidate_range;
|
||||
|
||||
rcu_read_lock();
|
||||
per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
||||
rcu_read_unlock();
|
||||
|
||||
WARN_ON(mm != current->mm);
|
||||
|
||||
per_mm->mn.ops = &ib_umem_notifiers;
|
||||
ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
|
||||
if (ret) {
|
||||
dev_err(&ctx->device->dev,
|
||||
"Failed to register mmu_notifier %d\n", ret);
|
||||
goto out_pid;
|
||||
}
|
||||
|
||||
list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
|
||||
return per_mm;
|
||||
|
||||
out_pid:
|
||||
put_pid(per_mm->tgid);
|
||||
kfree(per_mm);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int get_per_mm(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
struct ib_ucontext *ctx = umem_odp->umem.context;
|
||||
struct ib_ucontext_per_mm *per_mm;
|
||||
|
||||
/*
|
||||
* Generally speaking we expect only one or two per_mm in this list,
|
||||
* so no reason to optimize this search today.
|
||||
*/
|
||||
mutex_lock(&ctx->per_mm_list_lock);
|
||||
list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
|
||||
if (per_mm->mm == umem_odp->umem.owning_mm)
|
||||
goto found;
|
||||
}
|
||||
|
||||
per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
|
||||
if (IS_ERR(per_mm)) {
|
||||
mutex_unlock(&ctx->per_mm_list_lock);
|
||||
return PTR_ERR(per_mm);
|
||||
}
|
||||
|
||||
found:
|
||||
umem_odp->per_mm = per_mm;
|
||||
per_mm->odp_mrs_count++;
|
||||
mutex_unlock(&ctx->per_mm_list_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_per_mm(struct rcu_head *rcu)
|
||||
{
|
||||
kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
|
||||
}
|
||||
|
||||
void put_per_mm(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
|
||||
struct ib_ucontext *ctx = umem_odp->umem.context;
|
||||
bool need_free;
|
||||
|
||||
mutex_lock(&ctx->per_mm_list_lock);
|
||||
umem_odp->per_mm = NULL;
|
||||
per_mm->odp_mrs_count--;
|
||||
need_free = per_mm->odp_mrs_count == 0;
|
||||
if (need_free)
|
||||
list_del(&per_mm->ucontext_list);
|
||||
mutex_unlock(&ctx->per_mm_list_lock);
|
||||
|
||||
if (!need_free)
|
||||
return;
|
||||
|
||||
/*
|
||||
* NOTE! mmu_notifier_unregister() can happen between a start/end
|
||||
* callback, resulting in an start/end, and thus an unbalanced
|
||||
* lock. This doesn't really matter to us since we are about to kfree
|
||||
* the memory that holds the lock, however LOCKDEP doesn't like this.
|
||||
*/
|
||||
down_write(&per_mm->umem_rwsem);
|
||||
per_mm->active = false;
|
||||
up_write(&per_mm->umem_rwsem);
|
||||
|
||||
WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
|
||||
mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
|
||||
put_pid(per_mm->tgid);
|
||||
mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
|
||||
}
|
||||
|
||||
struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
|
||||
unsigned long addr, size_t size)
|
||||
{
|
||||
struct ib_ucontext *ctx = per_mm->context;
|
||||
struct ib_umem_odp *odp_data;
|
||||
struct ib_umem *umem;
|
||||
int pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
|
||||
if (!umem)
|
||||
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
|
||||
if (!odp_data)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
umem->context = context;
|
||||
umem = &odp_data->umem;
|
||||
umem->context = ctx;
|
||||
umem->length = size;
|
||||
umem->address = addr;
|
||||
umem->page_shift = PAGE_SHIFT;
|
||||
umem->writable = 1;
|
||||
|
||||
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
|
||||
if (!odp_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out_umem;
|
||||
}
|
||||
odp_data->umem = umem;
|
||||
umem->is_odp = 1;
|
||||
odp_data->per_mm = per_mm;
|
||||
|
||||
mutex_init(&odp_data->umem_mutex);
|
||||
init_completion(&odp_data->notifier_completion);
|
||||
@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
|
||||
goto out_page_list;
|
||||
}
|
||||
|
||||
down_write(&context->umem_rwsem);
|
||||
context->odp_mrs_count++;
|
||||
rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
|
||||
if (likely(!atomic_read(&context->notifier_count)))
|
||||
odp_data->mn_counters_active = true;
|
||||
else
|
||||
list_add(&odp_data->no_private_counters,
|
||||
&context->no_private_counters);
|
||||
up_write(&context->umem_rwsem);
|
||||
/*
|
||||
* Caller must ensure that the umem_odp that the per_mm came from
|
||||
* cannot be freed during the call to ib_alloc_odp_umem.
|
||||
*/
|
||||
mutex_lock(&ctx->per_mm_list_lock);
|
||||
per_mm->odp_mrs_count++;
|
||||
mutex_unlock(&ctx->per_mm_list_lock);
|
||||
add_umem_to_per_mm(odp_data);
|
||||
|
||||
umem->odp_data = odp_data;
|
||||
|
||||
return umem;
|
||||
return odp_data;
|
||||
|
||||
out_page_list:
|
||||
vfree(odp_data->page_list);
|
||||
out_odp_data:
|
||||
kfree(odp_data);
|
||||
out_umem:
|
||||
kfree(umem);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_alloc_odp_umem);
|
||||
|
||||
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
|
||||
int access)
|
||||
int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
|
||||
{
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
/*
|
||||
* NOTE: This must called in a process context where umem->owning_mm
|
||||
* == current->mm
|
||||
*/
|
||||
struct mm_struct *mm = umem->owning_mm;
|
||||
int ret_val;
|
||||
struct pid *our_pid;
|
||||
struct mm_struct *mm = get_task_mm(current);
|
||||
|
||||
if (!mm)
|
||||
return -EINVAL;
|
||||
|
||||
if (access & IB_ACCESS_HUGETLB) {
|
||||
struct vm_area_struct *vma;
|
||||
@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
|
||||
umem->hugetlb = 0;
|
||||
}
|
||||
|
||||
/* Prevent creating ODP MRs in child processes */
|
||||
rcu_read_lock();
|
||||
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
||||
rcu_read_unlock();
|
||||
put_pid(our_pid);
|
||||
if (context->tgid != our_pid) {
|
||||
ret_val = -EINVAL;
|
||||
goto out_mm;
|
||||
}
|
||||
mutex_init(&umem_odp->umem_mutex);
|
||||
|
||||
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
|
||||
if (!umem->odp_data) {
|
||||
ret_val = -ENOMEM;
|
||||
goto out_mm;
|
||||
}
|
||||
umem->odp_data->umem = umem;
|
||||
|
||||
mutex_init(&umem->odp_data->umem_mutex);
|
||||
|
||||
init_completion(&umem->odp_data->notifier_completion);
|
||||
init_completion(&umem_odp->notifier_completion);
|
||||
|
||||
if (ib_umem_num_pages(umem)) {
|
||||
umem->odp_data->page_list =
|
||||
vzalloc(array_size(sizeof(*umem->odp_data->page_list),
|
||||
umem_odp->page_list =
|
||||
vzalloc(array_size(sizeof(*umem_odp->page_list),
|
||||
ib_umem_num_pages(umem)));
|
||||
if (!umem->odp_data->page_list) {
|
||||
ret_val = -ENOMEM;
|
||||
goto out_odp_data;
|
||||
}
|
||||
if (!umem_odp->page_list)
|
||||
return -ENOMEM;
|
||||
|
||||
umem->odp_data->dma_list =
|
||||
vzalloc(array_size(sizeof(*umem->odp_data->dma_list),
|
||||
umem_odp->dma_list =
|
||||
vzalloc(array_size(sizeof(*umem_odp->dma_list),
|
||||
ib_umem_num_pages(umem)));
|
||||
if (!umem->odp_data->dma_list) {
|
||||
if (!umem_odp->dma_list) {
|
||||
ret_val = -ENOMEM;
|
||||
goto out_page_list;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* When using MMU notifiers, we will get a
|
||||
* notification before the "current" task (and MM) is
|
||||
* destroyed. We use the umem_rwsem semaphore to synchronize.
|
||||
*/
|
||||
down_write(&context->umem_rwsem);
|
||||
context->odp_mrs_count++;
|
||||
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
||||
rbt_ib_umem_insert(&umem->odp_data->interval_tree,
|
||||
&context->umem_tree);
|
||||
if (likely(!atomic_read(&context->notifier_count)) ||
|
||||
context->odp_mrs_count == 1)
|
||||
umem->odp_data->mn_counters_active = true;
|
||||
else
|
||||
list_add(&umem->odp_data->no_private_counters,
|
||||
&context->no_private_counters);
|
||||
downgrade_write(&context->umem_rwsem);
|
||||
ret_val = get_per_mm(umem_odp);
|
||||
if (ret_val)
|
||||
goto out_dma_list;
|
||||
add_umem_to_per_mm(umem_odp);
|
||||
|
||||
if (context->odp_mrs_count == 1) {
|
||||
/*
|
||||
* Note that at this point, no MMU notifier is running
|
||||
* for this context!
|
||||
*/
|
||||
atomic_set(&context->notifier_count, 0);
|
||||
INIT_HLIST_NODE(&context->mn.hlist);
|
||||
context->mn.ops = &ib_umem_notifiers;
|
||||
/*
|
||||
* Lock-dep detects a false positive for mmap_sem vs.
|
||||
* umem_rwsem, due to not grasping downgrade_write correctly.
|
||||
*/
|
||||
lockdep_off();
|
||||
ret_val = mmu_notifier_register(&context->mn, mm);
|
||||
lockdep_on();
|
||||
if (ret_val) {
|
||||
pr_err("Failed to register mmu_notifier %d\n", ret_val);
|
||||
ret_val = -EBUSY;
|
||||
goto out_mutex;
|
||||
}
|
||||
}
|
||||
|
||||
up_read(&context->umem_rwsem);
|
||||
|
||||
/*
|
||||
* Note that doing an mmput can cause a notifier for the relevant mm.
|
||||
* If the notifier is called while we hold the umem_rwsem, this will
|
||||
* cause a deadlock. Therefore, we release the reference only after we
|
||||
* released the semaphore.
|
||||
*/
|
||||
mmput(mm);
|
||||
return 0;
|
||||
|
||||
out_mutex:
|
||||
up_read(&context->umem_rwsem);
|
||||
vfree(umem->odp_data->dma_list);
|
||||
out_dma_list:
|
||||
vfree(umem_odp->dma_list);
|
||||
out_page_list:
|
||||
vfree(umem->odp_data->page_list);
|
||||
out_odp_data:
|
||||
kfree(umem->odp_data);
|
||||
out_mm:
|
||||
mmput(mm);
|
||||
vfree(umem_odp->page_list);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
void ib_umem_odp_release(struct ib_umem *umem)
|
||||
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
struct ib_ucontext *context = umem->context;
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
|
||||
/*
|
||||
* Ensure that no more pages are mapped in the umem.
|
||||
@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
|
||||
* It is the driver's responsibility to ensure, before calling us,
|
||||
* that the hardware will not attempt to access the MR any more.
|
||||
*/
|
||||
ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
|
||||
ib_umem_end(umem));
|
||||
|
||||
down_write(&context->umem_rwsem);
|
||||
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
||||
rbt_ib_umem_remove(&umem->odp_data->interval_tree,
|
||||
&context->umem_tree);
|
||||
context->odp_mrs_count--;
|
||||
if (!umem->odp_data->mn_counters_active) {
|
||||
list_del(&umem->odp_data->no_private_counters);
|
||||
complete_all(&umem->odp_data->notifier_completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Downgrade the lock to a read lock. This ensures that the notifiers
|
||||
* (who lock the mutex for reading) will be able to finish, and we
|
||||
* will be able to enventually obtain the mmu notifiers SRCU. Note
|
||||
* that since we are doing it atomically, no other user could register
|
||||
* and unregister while we do the check.
|
||||
*/
|
||||
downgrade_write(&context->umem_rwsem);
|
||||
if (!context->odp_mrs_count) {
|
||||
struct task_struct *owning_process = NULL;
|
||||
struct mm_struct *owning_mm = NULL;
|
||||
|
||||
owning_process = get_pid_task(context->tgid,
|
||||
PIDTYPE_PID);
|
||||
if (owning_process == NULL)
|
||||
/*
|
||||
* The process is already dead, notifier were removed
|
||||
* already.
|
||||
*/
|
||||
goto out;
|
||||
|
||||
owning_mm = get_task_mm(owning_process);
|
||||
if (owning_mm == NULL)
|
||||
/*
|
||||
* The process' mm is already dead, notifier were
|
||||
* removed already.
|
||||
*/
|
||||
goto out_put_task;
|
||||
mmu_notifier_unregister(&context->mn, owning_mm);
|
||||
|
||||
mmput(owning_mm);
|
||||
|
||||
out_put_task:
|
||||
put_task_struct(owning_process);
|
||||
}
|
||||
out:
|
||||
up_read(&context->umem_rwsem);
|
||||
|
||||
vfree(umem->odp_data->dma_list);
|
||||
vfree(umem->odp_data->page_list);
|
||||
kfree(umem->odp_data);
|
||||
kfree(umem);
|
||||
remove_umem_from_per_mm(umem_odp);
|
||||
put_per_mm(umem_odp);
|
||||
vfree(umem_odp->dma_list);
|
||||
vfree(umem_odp->page_list);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -544,7 +492,7 @@ void ib_umem_odp_release(struct ib_umem *umem)
|
||||
* @access_mask: access permissions needed for this page.
|
||||
* @current_seq: sequence number for synchronization with invalidations.
|
||||
* the sequence number is taken from
|
||||
* umem->odp_data->notifiers_seq.
|
||||
* umem_odp->notifiers_seq.
|
||||
*
|
||||
* The function returns -EFAULT if the DMA mapping operation fails. It returns
|
||||
* -EAGAIN if a concurrent invalidation prevents us from updating the page.
|
||||
@ -554,12 +502,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
|
||||
* umem.
|
||||
*/
|
||||
static int ib_umem_odp_map_dma_single_page(
|
||||
struct ib_umem *umem,
|
||||
struct ib_umem_odp *umem_odp,
|
||||
int page_index,
|
||||
struct page *page,
|
||||
u64 access_mask,
|
||||
unsigned long current_seq)
|
||||
{
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
struct ib_device *dev = umem->context->device;
|
||||
dma_addr_t dma_addr;
|
||||
int stored_page = 0;
|
||||
@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page(
|
||||
* handle case of a racing notifier. This check also allows us to bail
|
||||
* early if we have a notifier running in parallel with us.
|
||||
*/
|
||||
if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
|
||||
if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
if (!(umem->odp_data->dma_list[page_index])) {
|
||||
if (!(umem_odp->dma_list[page_index])) {
|
||||
dma_addr = ib_dma_map_page(dev,
|
||||
page,
|
||||
0, BIT(umem->page_shift),
|
||||
@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page(
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
|
||||
umem->odp_data->page_list[page_index] = page;
|
||||
umem_odp->dma_list[page_index] = dma_addr | access_mask;
|
||||
umem_odp->page_list[page_index] = page;
|
||||
umem->npages++;
|
||||
stored_page = 1;
|
||||
} else if (umem->odp_data->page_list[page_index] == page) {
|
||||
umem->odp_data->dma_list[page_index] |= access_mask;
|
||||
} else if (umem_odp->page_list[page_index] == page) {
|
||||
umem_odp->dma_list[page_index] |= access_mask;
|
||||
} else {
|
||||
pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
|
||||
umem->odp_data->page_list[page_index], page);
|
||||
umem_odp->page_list[page_index], page);
|
||||
/* Better remove the mapping now, to prevent any further
|
||||
* damage. */
|
||||
remove_existing_mapping = 1;
|
||||
@ -605,7 +554,7 @@ static int ib_umem_odp_map_dma_single_page(
|
||||
|
||||
if (remove_existing_mapping && umem->context->invalidate_range) {
|
||||
invalidate_page_trampoline(
|
||||
umem,
|
||||
umem_odp,
|
||||
ib_umem_start(umem) + (page_index >> umem->page_shift),
|
||||
ib_umem_start(umem) + ((page_index + 1) >>
|
||||
umem->page_shift),
|
||||
@ -621,7 +570,7 @@ static int ib_umem_odp_map_dma_single_page(
|
||||
*
|
||||
* Pins the range of pages passed in the argument, and maps them to
|
||||
* DMA addresses. The DMA addresses of the mapped pages is updated in
|
||||
* umem->odp_data->dma_list.
|
||||
* umem_odp->dma_list.
|
||||
*
|
||||
* Returns the number of pages mapped in success, negative error code
|
||||
* for failure.
|
||||
@ -629,7 +578,7 @@ static int ib_umem_odp_map_dma_single_page(
|
||||
* the function from completing its task.
|
||||
* An -ENOENT error code indicates that userspace process is being terminated
|
||||
* and mm was already destroyed.
|
||||
* @umem: the umem to map and pin
|
||||
* @umem_odp: the umem to map and pin
|
||||
* @user_virt: the address from which we need to map.
|
||||
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
|
||||
* bigger due to alignment, and may also be smaller in case of an error
|
||||
@ -639,13 +588,15 @@ static int ib_umem_odp_map_dma_single_page(
|
||||
* range.
|
||||
* @current_seq: the MMU notifiers sequance value for synchronization with
|
||||
* invalidations. the sequance number is read from
|
||||
* umem->odp_data->notifiers_seq before calling this function
|
||||
* umem_odp->notifiers_seq before calling this function
|
||||
*/
|
||||
int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||
u64 access_mask, unsigned long current_seq)
|
||||
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
|
||||
u64 bcnt, u64 access_mask,
|
||||
unsigned long current_seq)
|
||||
{
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
struct task_struct *owning_process = NULL;
|
||||
struct mm_struct *owning_mm = NULL;
|
||||
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
|
||||
struct page **local_page_list = NULL;
|
||||
u64 page_mask, off;
|
||||
int j, k, ret = 0, start_idx, npages = 0, page_shift;
|
||||
@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||
user_virt = user_virt & page_mask;
|
||||
bcnt += off; /* Charge for the first page offset as well. */
|
||||
|
||||
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
|
||||
if (owning_process == NULL) {
|
||||
/*
|
||||
* owning_process is allowed to be NULL, this means somehow the mm is
|
||||
* existing beyond the lifetime of the originating process.. Presumably
|
||||
* mmget_not_zero will fail in this case.
|
||||
*/
|
||||
owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
|
||||
if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
|
||||
ret = -EINVAL;
|
||||
goto out_no_task;
|
||||
}
|
||||
|
||||
owning_mm = get_task_mm(owning_process);
|
||||
if (owning_mm == NULL) {
|
||||
ret = -ENOENT;
|
||||
goto out_put_task;
|
||||
}
|
||||
|
||||
@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||
break;
|
||||
|
||||
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
|
||||
mutex_lock(&umem->odp_data->umem_mutex);
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
|
||||
if (user_virt & ~page_mask) {
|
||||
p += PAGE_SIZE;
|
||||
@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||
}
|
||||
|
||||
ret = ib_umem_odp_map_dma_single_page(
|
||||
umem, k, local_page_list[j],
|
||||
umem_odp, k, local_page_list[j],
|
||||
access_mask, current_seq);
|
||||
if (ret < 0)
|
||||
break;
|
||||
@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||
p = page_to_phys(local_page_list[j]);
|
||||
k++;
|
||||
}
|
||||
mutex_unlock(&umem->odp_data->umem_mutex);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
if (ret < 0) {
|
||||
/* Release left over pages when handling errors. */
|
||||
@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||
|
||||
mmput(owning_mm);
|
||||
out_put_task:
|
||||
put_task_struct(owning_process);
|
||||
out_no_task:
|
||||
if (owning_process)
|
||||
put_task_struct(owning_process);
|
||||
free_page((unsigned long)local_page_list);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
|
||||
|
||||
void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
|
||||
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
|
||||
u64 bound)
|
||||
{
|
||||
struct ib_umem *umem = &umem_odp->umem;
|
||||
int idx;
|
||||
u64 addr;
|
||||
struct ib_device *dev = umem->context->device;
|
||||
@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
|
||||
* faults from completion. We might be racing with other
|
||||
* invalidations, so we must make sure we free each page only
|
||||
* once. */
|
||||
mutex_lock(&umem->odp_data->umem_mutex);
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
|
||||
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
|
||||
if (umem->odp_data->page_list[idx]) {
|
||||
struct page *page = umem->odp_data->page_list[idx];
|
||||
dma_addr_t dma = umem->odp_data->dma_list[idx];
|
||||
if (umem_odp->page_list[idx]) {
|
||||
struct page *page = umem_odp->page_list[idx];
|
||||
dma_addr_t dma = umem_odp->dma_list[idx];
|
||||
dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
|
||||
|
||||
WARN_ON(!dma_addr);
|
||||
@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
|
||||
/* on demand pinning support */
|
||||
if (!umem->context->invalidate_range)
|
||||
put_page(page);
|
||||
umem->odp_data->page_list[idx] = NULL;
|
||||
umem->odp_data->dma_list[idx] = 0;
|
||||
umem_odp->page_list[idx] = NULL;
|
||||
umem_odp->dma_list[idx] = 0;
|
||||
umem->npages--;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&umem->odp_data->umem_mutex);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
|
||||
|
||||
@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
|
||||
return -EAGAIN;
|
||||
next = rbt_ib_umem_iter_next(node, start, last - 1);
|
||||
umem = container_of(node, struct ib_umem_odp, interval_tree);
|
||||
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
|
||||
ret_val = cb(umem, start, last, cookie) || ret_val;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
|
@ -138,7 +138,7 @@ static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
|
||||
static dev_t dynamic_umad_dev;
|
||||
static dev_t dynamic_issm_dev;
|
||||
|
||||
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
|
||||
static DEFINE_IDA(umad_ida);
|
||||
|
||||
static void ib_umad_add_one(struct ib_device *device);
|
||||
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
|
||||
@ -1132,7 +1132,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
|
||||
if (!port)
|
||||
return -ENODEV;
|
||||
|
||||
return sprintf(buf, "%s\n", port->ib_dev->name);
|
||||
return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
|
||||
}
|
||||
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
|
||||
|
||||
@ -1159,11 +1159,10 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
|
||||
dev_t base_umad;
|
||||
dev_t base_issm;
|
||||
|
||||
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
|
||||
if (devnum >= IB_UMAD_MAX_PORTS)
|
||||
devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
|
||||
if (devnum < 0)
|
||||
return -1;
|
||||
port->dev_num = devnum;
|
||||
set_bit(devnum, dev_map);
|
||||
if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
|
||||
base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
|
||||
base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
|
||||
@ -1227,7 +1226,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
|
||||
|
||||
err_cdev:
|
||||
cdev_del(&port->cdev);
|
||||
clear_bit(devnum, dev_map);
|
||||
ida_free(&umad_ida, devnum);
|
||||
|
||||
return -1;
|
||||
}
|
||||
@ -1261,7 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
|
||||
}
|
||||
|
||||
mutex_unlock(&port->file_mutex);
|
||||
clear_bit(port->dev_num, dev_map);
|
||||
ida_free(&umad_ida, port->dev_num);
|
||||
}
|
||||
|
||||
static void ib_umad_add_one(struct ib_device *device)
|
||||
|
@ -100,13 +100,14 @@ struct ib_uverbs_device {
|
||||
atomic_t refcount;
|
||||
int num_comp_vectors;
|
||||
struct completion comp;
|
||||
struct device *dev;
|
||||
struct device dev;
|
||||
/* First group for device attributes, NULL terminated array */
|
||||
const struct attribute_group *groups[2];
|
||||
struct ib_device __rcu *ib_dev;
|
||||
int devnum;
|
||||
struct cdev cdev;
|
||||
struct rb_root xrcd_tree;
|
||||
struct mutex xrcd_tree_mutex;
|
||||
struct kobject kobj;
|
||||
struct srcu_struct disassociate_srcu;
|
||||
struct mutex lists_mutex; /* protect lists */
|
||||
struct list_head uverbs_file_list;
|
||||
@ -146,7 +147,6 @@ struct ib_uverbs_file {
|
||||
struct ib_event_handler event_handler;
|
||||
struct ib_uverbs_async_event_file *async_file;
|
||||
struct list_head list;
|
||||
int is_closed;
|
||||
|
||||
/*
|
||||
* To access the uobjects list hw_destroy_rwsem must be held for write
|
||||
@ -158,6 +158,9 @@ struct ib_uverbs_file {
|
||||
spinlock_t uobjects_lock;
|
||||
struct list_head uobjects;
|
||||
|
||||
struct mutex umap_lock;
|
||||
struct list_head umaps;
|
||||
|
||||
u64 uverbs_cmd_mask;
|
||||
u64 uverbs_ex_cmd_mask;
|
||||
|
||||
@ -218,12 +221,6 @@ struct ib_ucq_object {
|
||||
u32 async_events_reported;
|
||||
};
|
||||
|
||||
struct ib_uflow_resources;
|
||||
struct ib_uflow_object {
|
||||
struct ib_uobject uobject;
|
||||
struct ib_uflow_resources *resources;
|
||||
};
|
||||
|
||||
extern const struct file_operations uverbs_event_fops;
|
||||
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
|
||||
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
|
||||
|
@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
|
||||
/* ufile is required when some objects are released */
|
||||
ucontext->ufile = file;
|
||||
|
||||
rcu_read_lock();
|
||||
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
||||
rcu_read_unlock();
|
||||
ucontext->closing = 0;
|
||||
ucontext->closing = false;
|
||||
ucontext->cleanup_retryable = false;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
ucontext->umem_tree = RB_ROOT_CACHED;
|
||||
init_rwsem(&ucontext->umem_rwsem);
|
||||
ucontext->odp_mrs_count = 0;
|
||||
INIT_LIST_HEAD(&ucontext->no_private_counters);
|
||||
|
||||
mutex_init(&ucontext->per_mm_list_lock);
|
||||
INIT_LIST_HEAD(&ucontext->per_mm_list);
|
||||
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
|
||||
ucontext->invalidate_range = NULL;
|
||||
|
||||
@ -172,7 +166,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
|
||||
put_unused_fd(resp.async_fd);
|
||||
|
||||
err_free:
|
||||
put_pid(ucontext->tgid);
|
||||
ib_dev->dealloc_ucontext(ucontext);
|
||||
|
||||
err_alloc:
|
||||
@ -2769,16 +2762,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
|
||||
return ret ? ret : in_len;
|
||||
}
|
||||
|
||||
struct ib_uflow_resources {
|
||||
size_t max;
|
||||
size_t num;
|
||||
size_t collection_num;
|
||||
size_t counters_num;
|
||||
struct ib_counters **counters;
|
||||
struct ib_flow_action **collection;
|
||||
};
|
||||
|
||||
static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
|
||||
struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
|
||||
{
|
||||
struct ib_uflow_resources *resources;
|
||||
|
||||
@ -2808,6 +2792,7 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
|
||||
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(flow_resources_alloc);
|
||||
|
||||
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
|
||||
{
|
||||
@ -2826,10 +2811,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
|
||||
kfree(uflow_res->counters);
|
||||
kfree(uflow_res);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
|
||||
|
||||
static void flow_resources_add(struct ib_uflow_resources *uflow_res,
|
||||
enum ib_flow_spec_type type,
|
||||
void *ibobj)
|
||||
void flow_resources_add(struct ib_uflow_resources *uflow_res,
|
||||
enum ib_flow_spec_type type,
|
||||
void *ibobj)
|
||||
{
|
||||
WARN_ON(uflow_res->num >= uflow_res->max);
|
||||
|
||||
@ -2850,6 +2836,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
|
||||
|
||||
uflow_res->num++;
|
||||
}
|
||||
EXPORT_SYMBOL(flow_resources_add);
|
||||
|
||||
static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
|
||||
struct ib_uverbs_flow_spec *kern_spec,
|
||||
@ -3484,7 +3471,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
|
||||
struct ib_uverbs_create_flow cmd;
|
||||
struct ib_uverbs_create_flow_resp resp;
|
||||
struct ib_uobject *uobj;
|
||||
struct ib_uflow_object *uflow;
|
||||
struct ib_flow *flow_id;
|
||||
struct ib_uverbs_flow_attr *kern_flow_attr;
|
||||
struct ib_flow_attr *flow_attr;
|
||||
@ -3623,13 +3609,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
|
||||
err = PTR_ERR(flow_id);
|
||||
goto err_free;
|
||||
}
|
||||
atomic_inc(&qp->usecnt);
|
||||
flow_id->qp = qp;
|
||||
flow_id->device = qp->device;
|
||||
flow_id->uobject = uobj;
|
||||
uobj->object = flow_id;
|
||||
uflow = container_of(uobj, typeof(*uflow), uobject);
|
||||
uflow->resources = uflow_res;
|
||||
|
||||
ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
|
||||
|
||||
memset(&resp, 0, sizeof(resp));
|
||||
resp.flow_handle = uobj->id;
|
||||
|
@ -57,6 +57,7 @@ struct bundle_priv {
|
||||
struct ib_uverbs_attr *uattrs;
|
||||
|
||||
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
|
||||
DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
|
||||
|
||||
/*
|
||||
* Must be last. bundle ends in a flex array which overlaps
|
||||
@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
|
||||
0, uattr->len - len);
|
||||
}
|
||||
|
||||
static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
|
||||
const struct uverbs_api_attr *attr_uapi,
|
||||
struct uverbs_objs_arr_attr *attr,
|
||||
struct ib_uverbs_attr *uattr,
|
||||
u32 attr_bkey)
|
||||
{
|
||||
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
|
||||
size_t array_len;
|
||||
u32 *idr_vals;
|
||||
int ret = 0;
|
||||
size_t i;
|
||||
|
||||
if (uattr->attr_data.reserved)
|
||||
return -EINVAL;
|
||||
|
||||
if (uattr->len % sizeof(u32))
|
||||
return -EINVAL;
|
||||
|
||||
array_len = uattr->len / sizeof(u32);
|
||||
if (array_len < spec->u2.objs_arr.min_len ||
|
||||
array_len > spec->u2.objs_arr.max_len)
|
||||
return -EINVAL;
|
||||
|
||||
attr->uobjects =
|
||||
uverbs_alloc(&pbundle->bundle,
|
||||
array_size(array_len, sizeof(*attr->uobjects)));
|
||||
if (IS_ERR(attr->uobjects))
|
||||
return PTR_ERR(attr->uobjects);
|
||||
|
||||
/*
|
||||
* Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
|
||||
* to store idrs array and avoid additional memory allocation. The
|
||||
* idrs array is offset to the end of the uobjects array so we will be
|
||||
* able to read idr and replace with a pointer.
|
||||
*/
|
||||
idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
|
||||
|
||||
if (uattr->len > sizeof(uattr->data)) {
|
||||
ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
|
||||
uattr->len);
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
} else {
|
||||
memcpy(idr_vals, &uattr->data, uattr->len);
|
||||
}
|
||||
|
||||
for (i = 0; i != array_len; i++) {
|
||||
attr->uobjects[i] = uverbs_get_uobject_from_file(
|
||||
spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
|
||||
spec->u2.objs_arr.access, idr_vals[i]);
|
||||
if (IS_ERR(attr->uobjects[i])) {
|
||||
ret = PTR_ERR(attr->uobjects[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
attr->len = i;
|
||||
__set_bit(attr_bkey, pbundle->spec_finalize);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
|
||||
struct uverbs_objs_arr_attr *attr,
|
||||
bool commit)
|
||||
{
|
||||
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
|
||||
int current_ret;
|
||||
int ret = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i != attr->len; i++) {
|
||||
current_ret = uverbs_finalize_object(
|
||||
attr->uobjects[i], spec->u2.objs_arr.access, commit);
|
||||
if (!ret)
|
||||
ret = current_ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int uverbs_process_attr(struct bundle_priv *pbundle,
|
||||
const struct uverbs_api_attr *attr_uapi,
|
||||
struct ib_uverbs_attr *uattr, u32 attr_bkey)
|
||||
@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case UVERBS_ATTR_TYPE_IDRS_ARRAY:
|
||||
return uverbs_process_idrs_array(pbundle, attr_uapi,
|
||||
&e->objs_arr_attr, uattr,
|
||||
attr_bkey);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
@ -300,8 +386,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
|
||||
return -EPROTONOSUPPORT;
|
||||
return 0;
|
||||
}
|
||||
attr = srcu_dereference(
|
||||
*slot, &pbundle->bundle.ufile->device->disassociate_srcu);
|
||||
attr = rcu_dereference_protected(*slot, true);
|
||||
|
||||
/* Reject duplicate attributes from user-space */
|
||||
if (test_bit(attr_bkey, pbundle->bundle.attr_present))
|
||||
@ -384,6 +469,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
|
||||
unsigned int i;
|
||||
int ret = 0;
|
||||
|
||||
/* fast path for simple uobjects */
|
||||
i = -1;
|
||||
while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
|
||||
i + 1)) < key_bitmap_len) {
|
||||
@ -397,6 +483,30 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
|
||||
ret = current_ret;
|
||||
}
|
||||
|
||||
i = -1;
|
||||
while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
|
||||
i + 1)) < key_bitmap_len) {
|
||||
struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
|
||||
const struct uverbs_api_attr *attr_uapi;
|
||||
void __rcu **slot;
|
||||
int current_ret;
|
||||
|
||||
slot = uapi_get_attr_for_method(
|
||||
pbundle,
|
||||
pbundle->method_key | uapi_bkey_to_key_attr(i));
|
||||
if (WARN_ON(!slot))
|
||||
continue;
|
||||
|
||||
attr_uapi = rcu_dereference_protected(*slot, true);
|
||||
|
||||
if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
|
||||
current_ret = uverbs_free_idrs_array(
|
||||
attr_uapi, &attr->objs_arr_attr, commit);
|
||||
if (!ret)
|
||||
ret = current_ret;
|
||||
}
|
||||
}
|
||||
|
||||
for (memblock = pbundle->allocated_mem; memblock;) {
|
||||
struct bundle_alloc_head *tmp = memblock;
|
||||
|
||||
@ -429,7 +539,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
|
||||
uapi_key_ioctl_method(hdr->method_id));
|
||||
if (unlikely(!slot))
|
||||
return -EPROTONOSUPPORT;
|
||||
method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
|
||||
method_elm = rcu_dereference_protected(*slot, true);
|
||||
|
||||
if (!method_elm->use_stack) {
|
||||
pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
|
||||
@ -461,6 +571,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
|
||||
memset(pbundle->bundle.attr_present, 0,
|
||||
sizeof(pbundle->bundle.attr_present));
|
||||
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
|
||||
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
|
||||
|
||||
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
|
||||
destroy_ret = bundle_destroy(pbundle, ret == 0);
|
||||
@ -611,3 +722,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(uverbs_copy_to);
|
||||
|
||||
int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
|
||||
size_t idx, s64 lower_bound, u64 upper_bound,
|
||||
s64 *def_val)
|
||||
{
|
||||
const struct uverbs_attr *attr;
|
||||
|
||||
attr = uverbs_attr_get(attrs_bundle, idx);
|
||||
if (IS_ERR(attr)) {
|
||||
if ((PTR_ERR(attr) != -ENOENT) || !def_val)
|
||||
return PTR_ERR(attr);
|
||||
|
||||
*to = *def_val;
|
||||
} else {
|
||||
*to = attr->ptr_attr.data;
|
||||
}
|
||||
|
||||
if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_uverbs_get_const);
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
@ -72,7 +73,7 @@ enum {
|
||||
static dev_t dynamic_uverbs_dev;
|
||||
static struct class *uverbs_class;
|
||||
|
||||
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
|
||||
static DEFINE_IDA(uverbs_ida);
|
||||
|
||||
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
|
||||
const char __user *buf, int in_len,
|
||||
@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ib_uverbs_release_dev(struct kobject *kobj)
|
||||
static void ib_uverbs_release_dev(struct device *device)
|
||||
{
|
||||
struct ib_uverbs_device *dev =
|
||||
container_of(kobj, struct ib_uverbs_device, kobj);
|
||||
container_of(device, struct ib_uverbs_device, dev);
|
||||
|
||||
uverbs_destroy_api(dev->uapi);
|
||||
cleanup_srcu_struct(&dev->disassociate_srcu);
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static struct kobj_type ib_uverbs_dev_ktype = {
|
||||
.release = ib_uverbs_release_dev,
|
||||
};
|
||||
|
||||
static void ib_uverbs_release_async_event_file(struct kref *ref)
|
||||
{
|
||||
struct ib_uverbs_async_event_file *file =
|
||||
@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref)
|
||||
if (atomic_dec_and_test(&file->device->refcount))
|
||||
ib_uverbs_comp_dev(file->device);
|
||||
|
||||
kobject_put(&file->device->kobj);
|
||||
put_device(&file->device->dev);
|
||||
kfree(file);
|
||||
}
|
||||
|
||||
@ -816,6 +813,226 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Each time we map IO memory into user space this keeps track of the mapping.
|
||||
* When the device is hot-unplugged we 'zap' the mmaps in user space to point
|
||||
* to the zero page and allow the hot unplug to proceed.
|
||||
*
|
||||
* This is necessary for cases like PCI physical hot unplug as the actual BAR
|
||||
* memory may vanish after this and access to it from userspace could MCE.
|
||||
*
|
||||
* RDMA drivers supporting disassociation must have their user space designed
|
||||
* to cope in some way with their IO pages going to the zero page.
|
||||
*/
|
||||
struct rdma_umap_priv {
|
||||
struct vm_area_struct *vma;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static const struct vm_operations_struct rdma_umap_ops;
|
||||
|
||||
static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
|
||||
|
||||
priv->vma = vma;
|
||||
vma->vm_private_data = priv;
|
||||
vma->vm_ops = &rdma_umap_ops;
|
||||
|
||||
mutex_lock(&ufile->umap_lock);
|
||||
list_add(&priv->list, &ufile->umaps);
|
||||
mutex_unlock(&ufile->umap_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The VMA has been dup'd, initialize the vm_private_data with a new tracking
|
||||
* struct
|
||||
*/
|
||||
static void rdma_umap_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
|
||||
struct rdma_umap_priv *opriv = vma->vm_private_data;
|
||||
struct rdma_umap_priv *priv;
|
||||
|
||||
if (!opriv)
|
||||
return;
|
||||
|
||||
/* We are racing with disassociation */
|
||||
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
|
||||
goto out_zap;
|
||||
/*
|
||||
* Disassociation already completed, the VMA should already be zapped.
|
||||
*/
|
||||
if (!ufile->ucontext)
|
||||
goto out_unlock;
|
||||
|
||||
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
goto out_unlock;
|
||||
rdma_umap_priv_init(priv, vma);
|
||||
|
||||
up_read(&ufile->hw_destroy_rwsem);
|
||||
return;
|
||||
|
||||
out_unlock:
|
||||
up_read(&ufile->hw_destroy_rwsem);
|
||||
out_zap:
|
||||
/*
|
||||
* We can't allow the VMA to be created with the actual IO pages, that
|
||||
* would break our API contract, and it can't be stopped at this
|
||||
* point, so zap it.
|
||||
*/
|
||||
vma->vm_private_data = NULL;
|
||||
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
|
||||
}
|
||||
|
||||
static void rdma_umap_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
|
||||
struct rdma_umap_priv *priv = vma->vm_private_data;
|
||||
|
||||
if (!priv)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The vma holds a reference on the struct file that created it, which
|
||||
* in turn means that the ib_uverbs_file is guaranteed to exist at
|
||||
* this point.
|
||||
*/
|
||||
mutex_lock(&ufile->umap_lock);
|
||||
list_del(&priv->list);
|
||||
mutex_unlock(&ufile->umap_lock);
|
||||
kfree(priv);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct rdma_umap_ops = {
|
||||
.open = rdma_umap_open,
|
||||
.close = rdma_umap_close,
|
||||
};
|
||||
|
||||
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long size)
|
||||
{
|
||||
struct ib_uverbs_file *ufile = ucontext->ufile;
|
||||
struct rdma_umap_priv *priv;
|
||||
|
||||
if (vma->vm_end - vma->vm_start != size)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* Driver is using this wrong, must be called by ib_uverbs_mmap */
|
||||
if (WARN_ON(!vma->vm_file ||
|
||||
vma->vm_file->private_data != ufile))
|
||||
return ERR_PTR(-EINVAL);
|
||||
lockdep_assert_held(&ufile->device->disassociate_srcu);
|
||||
|
||||
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return priv;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map IO memory into a process. This is to be called by drivers as part of
|
||||
* their mmap() functions if they wish to send something like PCI-E BAR memory
|
||||
* to userspace.
|
||||
*/
|
||||
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
|
||||
unsigned long pfn, unsigned long size, pgprot_t prot)
|
||||
{
|
||||
struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
|
||||
|
||||
if (IS_ERR(priv))
|
||||
return PTR_ERR(priv);
|
||||
|
||||
vma->vm_page_prot = prot;
|
||||
if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
|
||||
kfree(priv);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
rdma_umap_priv_init(priv, vma);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_user_mmap_io);
|
||||
|
||||
/*
|
||||
* The page case is here for a slightly different reason, the driver expects
|
||||
* to be able to free the page it is sharing to user space when it destroys
|
||||
* its ucontext, which means we need to zap the user space references.
|
||||
*
|
||||
* We could handle this differently by providing an API to allocate a shared
|
||||
* page and then only freeing the shared page when the last ufile is
|
||||
* destroyed.
|
||||
*/
|
||||
int rdma_user_mmap_page(struct ib_ucontext *ucontext,
|
||||
struct vm_area_struct *vma, struct page *page,
|
||||
unsigned long size)
|
||||
{
|
||||
struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
|
||||
|
||||
if (IS_ERR(priv))
|
||||
return PTR_ERR(priv);
|
||||
|
||||
if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
|
||||
vma->vm_page_prot)) {
|
||||
kfree(priv);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
rdma_umap_priv_init(priv, vma);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_user_mmap_page);
|
||||
|
||||
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
|
||||
{
|
||||
struct rdma_umap_priv *priv, *next_priv;
|
||||
|
||||
lockdep_assert_held(&ufile->hw_destroy_rwsem);
|
||||
|
||||
while (1) {
|
||||
struct mm_struct *mm = NULL;
|
||||
|
||||
/* Get an arbitrary mm pointer that hasn't been cleaned yet */
|
||||
mutex_lock(&ufile->umap_lock);
|
||||
if (!list_empty(&ufile->umaps)) {
|
||||
mm = list_first_entry(&ufile->umaps,
|
||||
struct rdma_umap_priv, list)
|
||||
->vma->vm_mm;
|
||||
mmget(mm);
|
||||
}
|
||||
mutex_unlock(&ufile->umap_lock);
|
||||
if (!mm)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The umap_lock is nested under mmap_sem since it used within
|
||||
* the vma_ops callbacks, so we have to clean the list one mm
|
||||
* at a time to get the lock ordering right. Typically there
|
||||
* will only be one mm, so no big deal.
|
||||
*/
|
||||
down_write(&mm->mmap_sem);
|
||||
mutex_lock(&ufile->umap_lock);
|
||||
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
|
||||
list) {
|
||||
struct vm_area_struct *vma = priv->vma;
|
||||
|
||||
if (vma->vm_mm != mm)
|
||||
continue;
|
||||
list_del_init(&priv->list);
|
||||
|
||||
zap_vma_ptes(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start);
|
||||
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
|
||||
}
|
||||
mutex_unlock(&ufile->umap_lock);
|
||||
up_write(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ib_uverbs_open() does not need the BKL:
|
||||
*
|
||||
@ -839,6 +1056,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
|
||||
if (!atomic_inc_not_zero(&dev->refcount))
|
||||
return -ENXIO;
|
||||
|
||||
get_device(&dev->dev);
|
||||
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
|
||||
mutex_lock(&dev->lists_mutex);
|
||||
ib_dev = srcu_dereference(dev->ib_dev,
|
||||
@ -876,9 +1094,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
|
||||
spin_lock_init(&file->uobjects_lock);
|
||||
INIT_LIST_HEAD(&file->uobjects);
|
||||
init_rwsem(&file->hw_destroy_rwsem);
|
||||
mutex_init(&file->umap_lock);
|
||||
INIT_LIST_HEAD(&file->umaps);
|
||||
|
||||
filp->private_data = file;
|
||||
kobject_get(&dev->kobj);
|
||||
list_add_tail(&file->list, &dev->uverbs_file_list);
|
||||
mutex_unlock(&dev->lists_mutex);
|
||||
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
|
||||
@ -899,6 +1118,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
|
||||
if (atomic_dec_and_test(&dev->refcount))
|
||||
ib_uverbs_comp_dev(dev);
|
||||
|
||||
put_device(&dev->dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -909,10 +1129,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
|
||||
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
|
||||
|
||||
mutex_lock(&file->device->lists_mutex);
|
||||
if (!file->is_closed) {
|
||||
list_del(&file->list);
|
||||
file->is_closed = 1;
|
||||
}
|
||||
list_del_init(&file->list);
|
||||
mutex_unlock(&file->device->lists_mutex);
|
||||
|
||||
if (file->async_file)
|
||||
@ -951,37 +1168,34 @@ static struct ib_client uverbs_client = {
|
||||
.remove = ib_uverbs_remove_one
|
||||
};
|
||||
|
||||
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
|
||||
static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct ib_uverbs_device *dev =
|
||||
container_of(device, struct ib_uverbs_device, dev);
|
||||
int ret = -ENODEV;
|
||||
int srcu_key;
|
||||
struct ib_uverbs_device *dev = dev_get_drvdata(device);
|
||||
struct ib_device *ib_dev;
|
||||
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
|
||||
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
|
||||
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
|
||||
if (ib_dev)
|
||||
ret = sprintf(buf, "%s\n", ib_dev->name);
|
||||
ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
|
||||
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
|
||||
|
||||
return ret;
|
||||
}
|
||||
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
|
||||
static DEVICE_ATTR_RO(ibdev);
|
||||
|
||||
static ssize_t show_dev_abi_version(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t abi_version_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ib_uverbs_device *dev = dev_get_drvdata(device);
|
||||
struct ib_uverbs_device *dev =
|
||||
container_of(device, struct ib_uverbs_device, dev);
|
||||
int ret = -ENODEV;
|
||||
int srcu_key;
|
||||
struct ib_device *ib_dev;
|
||||
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
|
||||
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
|
||||
if (ib_dev)
|
||||
@ -990,7 +1204,17 @@ static ssize_t show_dev_abi_version(struct device *device,
|
||||
|
||||
return ret;
|
||||
}
|
||||
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
|
||||
static DEVICE_ATTR_RO(abi_version);
|
||||
|
||||
static struct attribute *ib_dev_attrs[] = {
|
||||
&dev_attr_abi_version.attr,
|
||||
&dev_attr_ibdev.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group dev_attr_group = {
|
||||
.attrs = ib_dev_attrs,
|
||||
};
|
||||
|
||||
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
|
||||
__stringify(IB_USER_VERBS_ABI_VERSION));
|
||||
@ -1028,65 +1252,56 @@ static void ib_uverbs_add_one(struct ib_device *device)
|
||||
return;
|
||||
}
|
||||
|
||||
device_initialize(&uverbs_dev->dev);
|
||||
uverbs_dev->dev.class = uverbs_class;
|
||||
uverbs_dev->dev.parent = device->dev.parent;
|
||||
uverbs_dev->dev.release = ib_uverbs_release_dev;
|
||||
uverbs_dev->groups[0] = &dev_attr_group;
|
||||
uverbs_dev->dev.groups = uverbs_dev->groups;
|
||||
atomic_set(&uverbs_dev->refcount, 1);
|
||||
init_completion(&uverbs_dev->comp);
|
||||
uverbs_dev->xrcd_tree = RB_ROOT;
|
||||
mutex_init(&uverbs_dev->xrcd_tree_mutex);
|
||||
kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
|
||||
mutex_init(&uverbs_dev->lists_mutex);
|
||||
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
|
||||
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
|
||||
rcu_assign_pointer(uverbs_dev->ib_dev, device);
|
||||
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
|
||||
|
||||
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
|
||||
if (devnum >= IB_UVERBS_MAX_DEVICES)
|
||||
devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
|
||||
GFP_KERNEL);
|
||||
if (devnum < 0)
|
||||
goto err;
|
||||
uverbs_dev->devnum = devnum;
|
||||
set_bit(devnum, dev_map);
|
||||
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
|
||||
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
|
||||
else
|
||||
base = IB_UVERBS_BASE_DEV + devnum;
|
||||
|
||||
rcu_assign_pointer(uverbs_dev->ib_dev, device);
|
||||
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
|
||||
|
||||
if (ib_uverbs_create_uapi(device, uverbs_dev))
|
||||
goto err_uapi;
|
||||
|
||||
cdev_init(&uverbs_dev->cdev, NULL);
|
||||
uverbs_dev->dev.devt = base;
|
||||
dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
|
||||
|
||||
cdev_init(&uverbs_dev->cdev,
|
||||
device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
|
||||
uverbs_dev->cdev.owner = THIS_MODULE;
|
||||
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
|
||||
cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
|
||||
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
|
||||
if (cdev_add(&uverbs_dev->cdev, base, 1))
|
||||
goto err_cdev;
|
||||
|
||||
uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
|
||||
uverbs_dev->cdev.dev, uverbs_dev,
|
||||
"uverbs%d", uverbs_dev->devnum);
|
||||
if (IS_ERR(uverbs_dev->dev))
|
||||
goto err_cdev;
|
||||
|
||||
if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
|
||||
goto err_class;
|
||||
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
|
||||
goto err_class;
|
||||
ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
|
||||
if (ret)
|
||||
goto err_uapi;
|
||||
|
||||
ib_set_client_data(device, &uverbs_client, uverbs_dev);
|
||||
|
||||
return;
|
||||
|
||||
err_class:
|
||||
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
|
||||
err_cdev:
|
||||
cdev_del(&uverbs_dev->cdev);
|
||||
err_uapi:
|
||||
clear_bit(devnum, dev_map);
|
||||
ida_free(&uverbs_ida, devnum);
|
||||
err:
|
||||
if (atomic_dec_and_test(&uverbs_dev->refcount))
|
||||
ib_uverbs_comp_dev(uverbs_dev);
|
||||
wait_for_completion(&uverbs_dev->comp);
|
||||
kobject_put(&uverbs_dev->kobj);
|
||||
put_device(&uverbs_dev->dev);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1107,8 +1322,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
|
||||
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
|
||||
file = list_first_entry(&uverbs_dev->uverbs_file_list,
|
||||
struct ib_uverbs_file, list);
|
||||
file->is_closed = 1;
|
||||
list_del(&file->list);
|
||||
list_del_init(&file->list);
|
||||
kref_get(&file->ref);
|
||||
|
||||
/* We must release the mutex before going ahead and calling
|
||||
@ -1156,10 +1370,8 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
|
||||
if (!uverbs_dev)
|
||||
return;
|
||||
|
||||
dev_set_drvdata(uverbs_dev->dev, NULL);
|
||||
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
|
||||
cdev_del(&uverbs_dev->cdev);
|
||||
clear_bit(uverbs_dev->devnum, dev_map);
|
||||
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
|
||||
ida_free(&uverbs_ida, uverbs_dev->devnum);
|
||||
|
||||
if (device->disassociate_ucontext) {
|
||||
/* We disassociate HW resources and immediately return.
|
||||
@ -1182,7 +1394,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
|
||||
if (wait_clients)
|
||||
wait_for_completion(&uverbs_dev->comp);
|
||||
|
||||
kobject_put(&uverbs_dev->kobj);
|
||||
put_device(&uverbs_dev->dev);
|
||||
}
|
||||
|
||||
static char *uverbs_devnode(struct device *dev, umode_t *mode)
|
||||
|
@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
|
||||
if (IS_ERR(action))
|
||||
return PTR_ERR(action);
|
||||
|
||||
atomic_set(&action->usecnt, 0);
|
||||
action->device = ib_dev;
|
||||
action->type = IB_FLOW_ACTION_ESP;
|
||||
action->uobject = uobj;
|
||||
uobj->object = action;
|
||||
uverbs_flow_action_fill_action(action, uobj, ib_dev,
|
||||
IB_FLOW_ACTION_ESP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi,
|
||||
if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
|
||||
method_elm->driver_method |= is_driver;
|
||||
|
||||
/*
|
||||
* Like other uobject based things we only support a single
|
||||
* uobject being NEW'd or DESTROY'd
|
||||
*/
|
||||
if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
|
||||
u8 access = attr->attr.u2.objs_arr.access;
|
||||
|
||||
if (WARN_ON(access == UVERBS_ACCESS_NEW ||
|
||||
access == UVERBS_ACCESS_DESTROY))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
attr_slot =
|
||||
uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
|
||||
sizeof(*attr_slot));
|
||||
|
@ -264,7 +264,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
|
||||
}
|
||||
|
||||
pd->res.type = RDMA_RESTRACK_PD;
|
||||
pd->res.kern_name = caller;
|
||||
rdma_restrack_set_task(&pd->res, caller);
|
||||
rdma_restrack_add(&pd->res);
|
||||
|
||||
if (mr_access_flags) {
|
||||
@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
|
||||
|
||||
ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
|
||||
ah_attr->roce.dmac,
|
||||
sgid_attr->ndev, &hop_limit);
|
||||
sgid_attr, &hop_limit);
|
||||
|
||||
grh->hop_limit = hop_limit;
|
||||
return ret;
|
||||
@ -1509,8 +1509,7 @@ static const struct {
|
||||
};
|
||||
|
||||
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
|
||||
enum ib_qp_type type, enum ib_qp_attr_mask mask,
|
||||
enum rdma_link_layer ll)
|
||||
enum ib_qp_type type, enum ib_qp_attr_mask mask)
|
||||
{
|
||||
enum ib_qp_attr_mask req_param, opt_param;
|
||||
|
||||
@ -1629,14 +1628,16 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
|
||||
|
||||
if (rdma_ib_or_roce(qp->device, port)) {
|
||||
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
|
||||
pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
|
||||
__func__, qp->device->name);
|
||||
dev_warn(&qp->device->dev,
|
||||
"%s rq_psn overflow, masking to 24 bits\n",
|
||||
__func__);
|
||||
attr->rq_psn &= 0xffffff;
|
||||
}
|
||||
|
||||
if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
|
||||
pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
|
||||
__func__, qp->device->name);
|
||||
dev_warn(&qp->device->dev,
|
||||
" %s sq_psn overflow, masking to 24 bits\n",
|
||||
__func__);
|
||||
attr->sq_psn &= 0xffffff;
|
||||
}
|
||||
}
|
||||
@ -1888,7 +1889,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
|
||||
cq->cq_context = cq_context;
|
||||
atomic_set(&cq->usecnt, 0);
|
||||
cq->res.type = RDMA_RESTRACK_CQ;
|
||||
cq->res.kern_name = caller;
|
||||
rdma_restrack_set_task(&cq->res, caller);
|
||||
rdma_restrack_add(&cq->res);
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,6 @@
|
||||
#ifndef __BNXT_RE_H__
|
||||
#define __BNXT_RE_H__
|
||||
#define ROCE_DRV_MODULE_NAME "bnxt_re"
|
||||
#define ROCE_DRV_MODULE_VERSION "1.0.0"
|
||||
|
||||
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
|
||||
#define BNXT_RE_PAGE_SHIFT_4K (12)
|
||||
@ -120,6 +119,8 @@ struct bnxt_re_dev {
|
||||
#define BNXT_RE_FLAG_HAVE_L2_REF 3
|
||||
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
|
||||
#define BNXT_RE_FLAG_QOS_WORK_REG 5
|
||||
#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
|
||||
#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
|
||||
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
|
||||
struct net_device *netdev;
|
||||
unsigned int version, major, minor;
|
||||
|
@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
|
||||
[BNXT_RE_TX_PKTS] = "tx_pkts",
|
||||
[BNXT_RE_TX_BYTES] = "tx_bytes",
|
||||
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
|
||||
[BNXT_RE_RX_DROPS] = "rx_roce_drops",
|
||||
[BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
|
||||
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
|
||||
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
|
||||
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
|
||||
@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
|
||||
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
|
||||
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
|
||||
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
|
||||
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
|
||||
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
|
||||
[BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
|
||||
};
|
||||
|
||||
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
|
||||
@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
|
||||
if (bnxt_re_stats) {
|
||||
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
|
||||
le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
|
||||
stats->value[BNXT_RE_RX_DROPS] =
|
||||
le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
|
||||
stats->value[BNXT_RE_RX_DISCARDS] =
|
||||
le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
|
||||
stats->value[BNXT_RE_RX_PKTS] =
|
||||
le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
|
||||
stats->value[BNXT_RE_RX_BYTES] =
|
||||
@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
|
||||
rdev->stats.res_tx_pci_err;
|
||||
stats->value[BNXT_RE_RES_RX_PCI_ERR] =
|
||||
rdev->stats.res_rx_pci_err;
|
||||
stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
|
||||
rdev->stats.res_oos_drop_count;
|
||||
}
|
||||
|
||||
return ARRAY_SIZE(bnxt_re_stat_name);
|
||||
|
@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
|
||||
BNXT_RE_TX_PKTS,
|
||||
BNXT_RE_TX_BYTES,
|
||||
BNXT_RE_RECOVERABLE_ERRORS,
|
||||
BNXT_RE_RX_DROPS,
|
||||
BNXT_RE_RX_DISCARDS,
|
||||
BNXT_RE_TO_RETRANSMITS,
|
||||
BNXT_RE_SEQ_ERR_NAKS_RCVD,
|
||||
BNXT_RE_MAX_RETRY_EXCEEDED,
|
||||
@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
|
||||
BNXT_RE_RES_SRQ_LOAD_ERR,
|
||||
BNXT_RE_RES_TX_PCI_ERR,
|
||||
BNXT_RE_RES_RX_PCI_ERR,
|
||||
BNXT_RE_OUT_OF_SEQ_ERR,
|
||||
BNXT_RE_NUM_COUNTERS
|
||||
};
|
||||
|
||||
|
@ -1598,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
|
||||
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
|
||||
new_qp_state = qp_attr->qp_state;
|
||||
if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
|
||||
ib_qp->qp_type, qp_attr_mask,
|
||||
IB_LINK_LAYER_ETHERNET)) {
|
||||
ib_qp->qp_type, qp_attr_mask)) {
|
||||
dev_err(rdev_to_dev(rdev),
|
||||
"Invalid attribute mask: %#x specified ",
|
||||
qp_attr_mask);
|
||||
@ -2664,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
|
||||
nq->budget++;
|
||||
|
||||
atomic_inc(&rdev->cq_count);
|
||||
spin_lock_init(&cq->cq_lock);
|
||||
|
||||
if (context) {
|
||||
struct bnxt_re_cq_resp resp;
|
||||
|
@ -67,7 +67,7 @@
|
||||
#include "hw_counters.h"
|
||||
|
||||
static char version[] =
|
||||
BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
|
||||
BNXT_RE_DESC "\n";
|
||||
|
||||
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
|
||||
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
|
||||
@ -535,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
|
||||
return en_dev;
|
||||
}
|
||||
|
||||
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hw_rev);
|
||||
|
||||
static ssize_t hca_type_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hca_type);
|
||||
|
||||
static struct attribute *bnxt_re_attributes[] = {
|
||||
&dev_attr_hw_rev.attr,
|
||||
&dev_attr_hca_type.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group bnxt_re_dev_attr_group = {
|
||||
.attrs = bnxt_re_attributes,
|
||||
};
|
||||
|
||||
static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
ib_unregister_device(&rdev->ibdev);
|
||||
@ -547,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
|
||||
/* ib device init */
|
||||
ibdev->owner = THIS_MODULE;
|
||||
ibdev->node_type = RDMA_NODE_IB_CA;
|
||||
strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
|
||||
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
|
||||
strlen(BNXT_RE_DESC) + 5);
|
||||
ibdev->phys_port_cnt = 1;
|
||||
@ -639,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
|
||||
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
|
||||
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
|
||||
|
||||
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
|
||||
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
|
||||
return ib_register_device(ibdev, NULL);
|
||||
return ib_register_device(ibdev, "bnxt_re%d", NULL);
|
||||
}
|
||||
|
||||
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
|
||||
}
|
||||
|
||||
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
|
||||
static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
|
||||
|
||||
static struct device_attribute *bnxt_re_attributes[] = {
|
||||
&dev_attr_hw_rev,
|
||||
&dev_attr_hca_type
|
||||
};
|
||||
|
||||
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
dev_put(rdev->netdev);
|
||||
@ -864,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (rdev->nq[0].hwq.max_elements) {
|
||||
for (i = 1; i < rdev->num_msix; i++)
|
||||
bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
|
||||
}
|
||||
for (i = 1; i < rdev->num_msix; i++)
|
||||
bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
|
||||
|
||||
if (rdev->qplib_res.rcfw)
|
||||
bnxt_qplib_cleanup_res(&rdev->qplib_res);
|
||||
@ -876,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
|
||||
static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
int rc = 0, i;
|
||||
int num_vec_enabled = 0;
|
||||
|
||||
bnxt_qplib_init_res(&rdev->qplib_res);
|
||||
|
||||
@ -891,9 +894,13 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
|
||||
"Failed to enable NQ with rc = 0x%x", rc);
|
||||
goto fail;
|
||||
}
|
||||
num_vec_enabled++;
|
||||
}
|
||||
return 0;
|
||||
fail:
|
||||
for (i = num_vec_enabled; i >= 0; i--)
|
||||
bnxt_qplib_disable_nq(&rdev->nq[i]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -925,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
|
||||
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
int rc = 0, i;
|
||||
int num_vec_created = 0;
|
||||
|
||||
/* Configure and allocate resources for qplib */
|
||||
rdev->qplib_res.rcfw = &rdev->rcfw;
|
||||
@ -951,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
|
||||
if (rc) {
|
||||
dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
|
||||
i, rc);
|
||||
goto dealloc_dpi;
|
||||
goto free_nq;
|
||||
}
|
||||
rc = bnxt_re_net_ring_alloc
|
||||
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
|
||||
@ -964,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
|
||||
dev_err(rdev_to_dev(rdev),
|
||||
"Failed to allocate NQ fw id with rc = 0x%x",
|
||||
rc);
|
||||
bnxt_qplib_free_nq(&rdev->nq[i]);
|
||||
goto free_nq;
|
||||
}
|
||||
num_vec_created++;
|
||||
}
|
||||
return 0;
|
||||
free_nq:
|
||||
for (i = 0; i < rdev->num_msix - 1; i++)
|
||||
for (i = num_vec_created; i >= 0; i--) {
|
||||
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
|
||||
bnxt_qplib_free_nq(&rdev->nq[i]);
|
||||
dealloc_dpi:
|
||||
}
|
||||
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
|
||||
&rdev->qplib_res.dpi_tbl,
|
||||
&rdev->dpi_privileged);
|
||||
@ -989,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
|
||||
struct ib_event ib_event;
|
||||
|
||||
ib_event.device = ibdev;
|
||||
if (qp)
|
||||
if (qp) {
|
||||
ib_event.element.qp = qp;
|
||||
else
|
||||
ib_event.event = event;
|
||||
if (qp->event_handler)
|
||||
qp->event_handler(&ib_event, qp->qp_context);
|
||||
|
||||
} else {
|
||||
ib_event.element.port_num = port_num;
|
||||
ib_event.event = event;
|
||||
ib_dispatch_event(&ib_event);
|
||||
ib_event.event = event;
|
||||
ib_dispatch_event(&ib_event);
|
||||
}
|
||||
}
|
||||
|
||||
#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
|
||||
@ -1189,20 +1205,20 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
|
||||
|
||||
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
int i, rc;
|
||||
int rc;
|
||||
|
||||
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
|
||||
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
|
||||
device_remove_file(&rdev->ibdev.dev,
|
||||
bnxt_re_attributes[i]);
|
||||
/* Cleanup ib dev */
|
||||
bnxt_re_unregister_ib(rdev);
|
||||
}
|
||||
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
|
||||
cancel_delayed_work(&rdev->worker);
|
||||
cancel_delayed_work_sync(&rdev->worker);
|
||||
|
||||
bnxt_re_cleanup_res(rdev);
|
||||
bnxt_re_free_res(rdev);
|
||||
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
|
||||
&rdev->flags))
|
||||
bnxt_re_cleanup_res(rdev);
|
||||
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
|
||||
bnxt_re_free_res(rdev);
|
||||
|
||||
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
|
||||
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
|
||||
@ -1241,7 +1257,7 @@ static void bnxt_re_worker(struct work_struct *work)
|
||||
|
||||
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
int i, j, rc;
|
||||
int rc;
|
||||
|
||||
bool locked;
|
||||
|
||||
@ -1331,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
|
||||
pr_err("Failed to allocate resources: %#x\n", rc);
|
||||
goto fail;
|
||||
}
|
||||
set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
|
||||
rc = bnxt_re_init_res(rdev);
|
||||
if (rc) {
|
||||
pr_err("Failed to initialize resources: %#x\n", rc);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
|
||||
|
||||
if (!rdev->is_virtfn) {
|
||||
rc = bnxt_re_setup_qos(rdev);
|
||||
if (rc)
|
||||
@ -1358,20 +1377,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
|
||||
}
|
||||
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
|
||||
dev_info(rdev_to_dev(rdev), "Device registered successfully");
|
||||
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
|
||||
rc = device_create_file(&rdev->ibdev.dev,
|
||||
bnxt_re_attributes[i]);
|
||||
if (rc) {
|
||||
dev_err(rdev_to_dev(rdev),
|
||||
"Failed to create IB sysfs: %#x", rc);
|
||||
/* Must clean up all created device files */
|
||||
for (j = 0; j < i; j++)
|
||||
device_remove_file(&rdev->ibdev.dev,
|
||||
bnxt_re_attributes[j]);
|
||||
bnxt_re_unregister_ib(rdev);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
|
||||
&rdev->active_width);
|
||||
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
|
||||
|
@ -36,6 +36,8 @@
|
||||
* Description: Fast Path Operators
|
||||
*/
|
||||
|
||||
#define dev_fmt(fmt) "QPLIB: " fmt
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/sched.h>
|
||||
@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
|
||||
|
||||
if (!qp->sq.flushed) {
|
||||
dev_dbg(&scq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Adding to SQ Flush list = %p",
|
||||
qp);
|
||||
"FP: Adding to SQ Flush list = %p\n", qp);
|
||||
bnxt_qplib_cancel_phantom_processing(qp);
|
||||
list_add_tail(&qp->sq_flush, &scq->sqf_head);
|
||||
qp->sq.flushed = true;
|
||||
@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
|
||||
if (!qp->srq) {
|
||||
if (!qp->rq.flushed) {
|
||||
dev_dbg(&rcq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Adding to RQ Flush list = %p",
|
||||
qp);
|
||||
"FP: Adding to RQ Flush list = %p\n", qp);
|
||||
list_add_tail(&qp->rq_flush, &rcq->rqf_head);
|
||||
qp->rq.flushed = true;
|
||||
}
|
||||
@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
|
||||
if (!qp->sq_hdr_buf) {
|
||||
rc = -ENOMEM;
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: Failed to create sq_hdr_buf");
|
||||
"Failed to create sq_hdr_buf\n");
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
|
||||
if (!qp->rq_hdr_buf) {
|
||||
rc = -ENOMEM;
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: Failed to create rq_hdr_buf");
|
||||
"Failed to create rq_hdr_buf\n");
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
|
||||
num_cqne_processed++;
|
||||
else
|
||||
dev_warn(&nq->pdev->dev,
|
||||
"QPLIB: cqn - type 0x%x not handled",
|
||||
type);
|
||||
"cqn - type 0x%x not handled\n", type);
|
||||
spin_unlock_bh(&cq->compl_lock);
|
||||
break;
|
||||
}
|
||||
@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
|
||||
num_srqne_processed++;
|
||||
else
|
||||
dev_warn(&nq->pdev->dev,
|
||||
"QPLIB: SRQ event 0x%x not handled",
|
||||
"SRQ event 0x%x not handled\n",
|
||||
nqsrqe->event);
|
||||
break;
|
||||
}
|
||||
@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
|
||||
break;
|
||||
default:
|
||||
dev_warn(&nq->pdev->dev,
|
||||
"QPLIB: nqe with type = 0x%x not handled",
|
||||
type);
|
||||
"nqe with type = 0x%x not handled\n", type);
|
||||
break;
|
||||
}
|
||||
raw_cons++;
|
||||
@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
|
||||
}
|
||||
|
||||
/* Make sure the HW is stopped! */
|
||||
bnxt_qplib_nq_stop_irq(nq, true);
|
||||
if (nq->requested)
|
||||
bnxt_qplib_nq_stop_irq(nq, true);
|
||||
|
||||
if (nq->bar_reg_iomem)
|
||||
iounmap(nq->bar_reg_iomem);
|
||||
@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
|
||||
rc = irq_set_affinity_hint(nq->vector, &nq->mask);
|
||||
if (rc) {
|
||||
dev_warn(&nq->pdev->dev,
|
||||
"QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
|
||||
"set affinity failed; vector: %d nq_idx: %d\n",
|
||||
nq->vector, nq_indx);
|
||||
}
|
||||
nq->requested = true;
|
||||
@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
|
||||
rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
|
||||
if (rc) {
|
||||
dev_err(&nq->pdev->dev,
|
||||
"QPLIB: Failed to request irq for nq-idx %d", nq_idx);
|
||||
"Failed to request irq for nq-idx %d\n", nq_idx);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
|
||||
|
||||
spin_lock(&srq_hwq->lock);
|
||||
if (srq->start_idx == srq->last_idx) {
|
||||
dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
|
||||
srq->id);
|
||||
dev_err(&srq_hwq->pdev->dev,
|
||||
"FP: SRQ (0x%x) is full!\n", srq->id);
|
||||
rc = -EINVAL;
|
||||
spin_unlock(&srq_hwq->lock);
|
||||
goto done;
|
||||
@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
|
||||
}
|
||||
}
|
||||
if (i == res->sgid_tbl.max)
|
||||
dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
|
||||
dev_warn(&res->pdev->dev, "SGID not found??\n");
|
||||
|
||||
qp->ah.hop_limit = sb->hop_limit;
|
||||
qp->ah.traffic_class = sb->traffic_class;
|
||||
@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
|
||||
|
||||
if (bnxt_qplib_queue_full(sq)) {
|
||||
dev_err(&sq->hwq.pdev->dev,
|
||||
"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
|
||||
"prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
|
||||
sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
|
||||
sq->q_full_delta);
|
||||
rc = -ENOMEM;
|
||||
@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
|
||||
/* Copy the inline data */
|
||||
if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
|
||||
dev_warn(&sq->hwq.pdev->dev,
|
||||
"QPLIB: Inline data length > 96 detected");
|
||||
"Inline data length > 96 detected\n");
|
||||
data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
|
||||
} else {
|
||||
data_len = wqe->inline_len;
|
||||
@ -1776,7 +1775,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
|
||||
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
|
||||
} else {
|
||||
dev_err(&sq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Failed to allocate SQ nq_work!");
|
||||
"FP: Failed to allocate SQ nq_work!\n");
|
||||
rc = -ENOMEM;
|
||||
}
|
||||
}
|
||||
@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
|
||||
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
|
||||
sch_handler = true;
|
||||
dev_dbg(&rq->hwq.pdev->dev,
|
||||
"%s Error QP. Scheduling for poll_cq\n",
|
||||
__func__);
|
||||
"%s: Error QP. Scheduling for poll_cq\n", __func__);
|
||||
goto queue_err;
|
||||
}
|
||||
if (bnxt_qplib_queue_full(rq)) {
|
||||
dev_err(&rq->hwq.pdev->dev,
|
||||
"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
|
||||
"FP: QP (0x%x) RQ is full!\n", qp->id);
|
||||
rc = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
@ -1870,7 +1868,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
|
||||
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
|
||||
} else {
|
||||
dev_err(&rq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Failed to allocate RQ nq_work!");
|
||||
"FP: Failed to allocate RQ nq_work!\n");
|
||||
rc = -ENOMEM;
|
||||
}
|
||||
}
|
||||
@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
|
||||
|
||||
if (!cq->dpi) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: FP: CREATE_CQ failed due to NULL DPI");
|
||||
"FP: CREATE_CQ failed due to NULL DPI\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
req.dpi = cpu_to_le32(cq->dpi->dpi);
|
||||
@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
|
||||
INIT_LIST_HEAD(&cq->sqf_head);
|
||||
INIT_LIST_HEAD(&cq->rqf_head);
|
||||
spin_lock_init(&cq->compl_lock);
|
||||
spin_lock_init(&cq->flush_lock);
|
||||
|
||||
bnxt_qplib_arm_cq_enable(cq);
|
||||
return 0;
|
||||
@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
|
||||
* comes back
|
||||
*/
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"FP:Got Phantom CQE");
|
||||
"FP: Got Phantom CQE\n");
|
||||
sq->condition = false;
|
||||
sq->single = true;
|
||||
rc = 0;
|
||||
@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
|
||||
peek_raw_cq_cons++;
|
||||
}
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
|
||||
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
|
||||
cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
|
||||
rc = -EINVAL;
|
||||
}
|
||||
@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
|
||||
le64_to_cpu(hwcqe->qp_handle));
|
||||
if (!qp) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Process Req qp is NULL");
|
||||
"FP: Process Req qp is NULL\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
sq = &qp->sq;
|
||||
@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
|
||||
cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
|
||||
if (cqe_sq_cons > sq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process req reported ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
|
||||
"FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
|
||||
cqe_sq_cons, sq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (qp->sq.flushed) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
|
||||
"%s: QP in Flush QP = %p\n", __func__, qp);
|
||||
goto done;
|
||||
}
|
||||
/* Require to walk the sq's swq to fabricate CQEs for all previously
|
||||
@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
|
||||
hwcqe->status != CQ_REQ_STATUS_OK) {
|
||||
cqe->status = hwcqe->status;
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Processed Req ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
|
||||
"FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
|
||||
sw_sq_cons, cqe->wr_id, cqe->status);
|
||||
cqe++;
|
||||
(*budget)--;
|
||||
@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
|
||||
qp = (struct bnxt_qplib_qp *)((unsigned long)
|
||||
le64_to_cpu(hwcqe->qp_handle));
|
||||
if (!qp) {
|
||||
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
|
||||
dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (qp->rq.flushed) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
|
||||
"%s: QP in Flush QP = %p\n", __func__, qp);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
|
||||
return -EINVAL;
|
||||
if (wr_id_idx >= srq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process RC ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
|
||||
"FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
|
||||
wr_id_idx, srq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
|
||||
rq = &qp->rq;
|
||||
if (wr_id_idx >= rq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process RC ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
|
||||
"FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
|
||||
wr_id_idx, rq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
|
||||
qp = (struct bnxt_qplib_qp *)((unsigned long)
|
||||
le64_to_cpu(hwcqe->qp_handle));
|
||||
if (!qp) {
|
||||
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
|
||||
dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (qp->rq.flushed) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
|
||||
"%s: QP in Flush QP = %p\n", __func__, qp);
|
||||
goto done;
|
||||
}
|
||||
cqe = *pcqe;
|
||||
@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
|
||||
|
||||
if (wr_id_idx >= srq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process UD ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
|
||||
"FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
|
||||
wr_id_idx, srq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
|
||||
rq = &qp->rq;
|
||||
if (wr_id_idx >= rq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process UD ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
|
||||
"FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
|
||||
wr_id_idx, rq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
|
||||
qp = (struct bnxt_qplib_qp *)((unsigned long)
|
||||
le64_to_cpu(hwcqe->qp_handle));
|
||||
if (!qp) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: process_cq Raw/QP1 qp is NULL");
|
||||
dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (qp->rq.flushed) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
|
||||
"%s: QP in Flush QP = %p\n", __func__, qp);
|
||||
goto done;
|
||||
}
|
||||
cqe = *pcqe;
|
||||
@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
|
||||
srq = qp->srq;
|
||||
if (!srq) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: SRQ used but not defined??");
|
||||
"FP: SRQ used but not defined??\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (wr_id_idx >= srq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process Raw/QP1 ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
|
||||
"FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
|
||||
wr_id_idx, srq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
|
||||
rq = &qp->rq;
|
||||
if (wr_id_idx >= rq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: ix 0x%x exceeded RQ max 0x%x",
|
||||
"FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
|
||||
wr_id_idx, rq->hwq.max_elements);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
|
||||
/* Check the Status */
|
||||
if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
|
||||
dev_warn(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process Terminal Error status = 0x%x",
|
||||
"FP: CQ Process Terminal Error status = 0x%x\n",
|
||||
hwcqe->status);
|
||||
|
||||
qp = (struct bnxt_qplib_qp *)((unsigned long)
|
||||
le64_to_cpu(hwcqe->qp_handle));
|
||||
if (!qp) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process terminal qp is NULL");
|
||||
"FP: CQ Process terminal qp is NULL\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
|
||||
|
||||
if (cqe_cons > sq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process terminal reported ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
|
||||
"FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
|
||||
cqe_cons, sq->hwq.max_elements);
|
||||
goto do_rq;
|
||||
}
|
||||
|
||||
if (qp->sq.flushed) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
|
||||
"%s: QP in Flush QP = %p\n", __func__, qp);
|
||||
goto sq_done;
|
||||
}
|
||||
|
||||
@ -2673,16 +2653,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
|
||||
goto done;
|
||||
} else if (cqe_cons > rq->hwq.max_elements) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Processed terminal ");
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
|
||||
"FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
|
||||
cqe_cons, rq->hwq.max_elements);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (qp->rq.flushed) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
|
||||
"%s: QP in Flush QP = %p\n", __func__, qp);
|
||||
rc = 0;
|
||||
goto done;
|
||||
}
|
||||
@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
|
||||
/* Check the Status */
|
||||
if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
|
||||
"FP: CQ Process Cutoff Error status = 0x%x\n",
|
||||
hwcqe->status);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
|
||||
|
||||
spin_lock_irqsave(&cq->flush_lock, flags);
|
||||
list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Flushing SQ QP= %p",
|
||||
qp);
|
||||
dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
|
||||
__flush_sq(&qp->sq, qp, &cqe, &budget);
|
||||
}
|
||||
|
||||
list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
|
||||
dev_dbg(&cq->hwq.pdev->dev,
|
||||
"QPLIB: FP: Flushing RQ QP= %p",
|
||||
qp);
|
||||
dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
|
||||
__flush_rq(&qp->rq, qp, &cqe, &budget);
|
||||
}
|
||||
spin_unlock_irqrestore(&cq->flush_lock, flags);
|
||||
@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
|
||||
goto exit;
|
||||
default:
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: process_cq unknown type 0x%lx",
|
||||
"process_cq unknown type 0x%lx\n",
|
||||
hw_cqe->cqe_type_toggle &
|
||||
CQ_BASE_CQE_TYPE_MASK);
|
||||
rc = -EINVAL;
|
||||
@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
|
||||
* next one
|
||||
*/
|
||||
dev_err(&cq->hwq.pdev->dev,
|
||||
"QPLIB: process_cqe error rc = 0x%x", rc);
|
||||
"process_cqe error rc = 0x%x\n", rc);
|
||||
}
|
||||
raw_cons++;
|
||||
}
|
||||
|
@ -35,6 +35,9 @@
|
||||
*
|
||||
* Description: RDMA Controller HW interface
|
||||
*/
|
||||
|
||||
#define dev_fmt(fmt) "QPLIB: " fmt
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
|
||||
opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
|
||||
opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: RCFW not initialized, reject opcode 0x%x",
|
||||
opcode);
|
||||
"RCFW not initialized, reject opcode 0x%x\n", opcode);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
|
||||
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
|
||||
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
|
||||
dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
|
||||
*/
|
||||
spin_lock_irqsave(&cmdq->lock, flags);
|
||||
if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
|
||||
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
|
||||
dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
|
||||
spin_unlock_irqrestore(&cmdq->lock, flags);
|
||||
return -EAGAIN;
|
||||
}
|
||||
@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
|
||||
cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
|
||||
if (!cmdqe) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: RCFW request failed with no cmdqe!");
|
||||
"RCFW request failed with no cmdqe!\n");
|
||||
goto done;
|
||||
}
|
||||
/* Copy a segment of the req cmd to the cmdq */
|
||||
@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
|
||||
|
||||
if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
|
||||
/* send failed */
|
||||
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
|
||||
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
|
||||
cookie, opcode);
|
||||
return rc;
|
||||
}
|
||||
@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
|
||||
rc = __wait_for_resp(rcfw, cookie);
|
||||
if (rc) {
|
||||
/* timed out */
|
||||
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
|
||||
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
|
||||
cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
|
||||
set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
|
||||
return rc;
|
||||
@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
|
||||
|
||||
if (evnt->status) {
|
||||
/* failed with status */
|
||||
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
|
||||
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
|
||||
cookie, opcode, evnt->status);
|
||||
rc = -EFAULT;
|
||||
}
|
||||
@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
|
||||
qp_id = le32_to_cpu(err_event->xid);
|
||||
qp = rcfw->qp_tbl[qp_id].qp_handle;
|
||||
dev_dbg(&rcfw->pdev->dev,
|
||||
"QPLIB: Received QP error notification");
|
||||
"Received QP error notification\n");
|
||||
dev_dbg(&rcfw->pdev->dev,
|
||||
"QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
|
||||
"qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
|
||||
qp_id, err_event->req_err_state_reason,
|
||||
err_event->res_err_state_reason);
|
||||
if (!qp)
|
||||
@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
|
||||
rcfw->aeq_handler(rcfw, qp_event, qp);
|
||||
break;
|
||||
default:
|
||||
/* Command Response */
|
||||
spin_lock_irqsave(&cmdq->lock, flags);
|
||||
/*
|
||||
* Command Response
|
||||
* cmdq->lock needs to be acquired to synchronie
|
||||
* the command send and completion reaping. This function
|
||||
* is always called with creq->lock held. Using
|
||||
* the nested variant of spin_lock.
|
||||
*
|
||||
*/
|
||||
|
||||
spin_lock_irqsave_nested(&cmdq->lock, flags,
|
||||
SINGLE_DEPTH_NESTING);
|
||||
cookie = le16_to_cpu(qp_event->cookie);
|
||||
mcookie = qp_event->cookie;
|
||||
blocked = cookie & RCFW_CMD_IS_BLOCKING;
|
||||
@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
|
||||
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
|
||||
crsqe->resp = NULL;
|
||||
} else {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
|
||||
crsqe->resp ? "mismatch" : "collision",
|
||||
crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
|
||||
if (crsqe->resp && crsqe->resp->cookie)
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"CMD %s cookie sent=%#x, recd=%#x\n",
|
||||
crsqe->resp ? "mismatch" : "collision",
|
||||
crsqe->resp ? crsqe->resp->cookie : 0,
|
||||
mcookie);
|
||||
}
|
||||
if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
|
||||
dev_warn(&rcfw->pdev->dev,
|
||||
"QPLIB: CMD bit %d was not requested", cbit);
|
||||
"CMD bit %d was not requested\n", cbit);
|
||||
cmdq->cons += crsqe->req_size;
|
||||
crsqe->req_size = 0;
|
||||
|
||||
@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
|
||||
(rcfw, (struct creq_func_event *)creqe))
|
||||
rcfw->creq_func_event_processed++;
|
||||
else
|
||||
dev_warn
|
||||
(&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
|
||||
type);
|
||||
dev_warn(&rcfw->pdev->dev,
|
||||
"aeqe:%#x Not handled\n", type);
|
||||
break;
|
||||
default:
|
||||
dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
|
||||
dev_warn(&rcfw->pdev->dev,
|
||||
"QPLIB: op_event = 0x%x not handled", type);
|
||||
if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
|
||||
dev_warn(&rcfw->pdev->dev,
|
||||
"creqe with event 0x%x not handled\n",
|
||||
type);
|
||||
break;
|
||||
}
|
||||
raw_cons++;
|
||||
@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
|
||||
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
|
||||
HWQ_TYPE_L2_CMPL)) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: HW channel CREQ allocation failed");
|
||||
"HW channel CREQ allocation failed\n");
|
||||
goto fail;
|
||||
}
|
||||
rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
|
||||
@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
|
||||
BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
|
||||
HWQ_TYPE_CTX)) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: HW channel CMDQ allocation failed");
|
||||
"HW channel CMDQ allocation failed\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
|
||||
|
||||
bnxt_qplib_rcfw_stop_irq(rcfw, true);
|
||||
|
||||
if (rcfw->cmdq_bar_reg_iomem)
|
||||
iounmap(rcfw->cmdq_bar_reg_iomem);
|
||||
rcfw->cmdq_bar_reg_iomem = NULL;
|
||||
|
||||
if (rcfw->creq_bar_reg_iomem)
|
||||
iounmap(rcfw->creq_bar_reg_iomem);
|
||||
rcfw->creq_bar_reg_iomem = NULL;
|
||||
iounmap(rcfw->cmdq_bar_reg_iomem);
|
||||
iounmap(rcfw->creq_bar_reg_iomem);
|
||||
|
||||
indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
|
||||
if (indx != rcfw->bmap_size)
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
|
||||
"disabling RCFW with pending cmd-bit %lx\n", indx);
|
||||
kfree(rcfw->cmdq_bitmap);
|
||||
rcfw->bmap_size = 0;
|
||||
|
||||
rcfw->cmdq_bar_reg_iomem = NULL;
|
||||
rcfw->creq_bar_reg_iomem = NULL;
|
||||
rcfw->aeq_handler = NULL;
|
||||
rcfw->vector = 0;
|
||||
}
|
||||
@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
|
||||
RCFW_COMM_BASE_OFFSET,
|
||||
RCFW_COMM_SIZE);
|
||||
if (!rcfw->cmdq_bar_reg_iomem) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: CMDQ BAR region %d mapping failed",
|
||||
dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
|
||||
rcfw->cmdq_bar_reg);
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
|
||||
res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
|
||||
if (!res_base)
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: CREQ BAR region %d resc start is 0!",
|
||||
"CREQ BAR region %d resc start is 0!\n",
|
||||
rcfw->creq_bar_reg);
|
||||
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
|
||||
4);
|
||||
if (!rcfw->creq_bar_reg_iomem) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: CREQ BAR region %d mapping failed",
|
||||
dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
|
||||
rcfw->creq_bar_reg);
|
||||
iounmap(rcfw->cmdq_bar_reg_iomem);
|
||||
rcfw->cmdq_bar_reg_iomem = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
rcfw->creq_qp_event_processed = 0;
|
||||
@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
|
||||
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
|
||||
if (rc) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
|
||||
"Failed to request IRQ for CREQ rc = 0x%x\n", rc);
|
||||
bnxt_qplib_disable_rcfw_channel(rcfw);
|
||||
return rc;
|
||||
}
|
||||
|
@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
|
||||
void *qp_handle; /* ptr to qplib_qp */
|
||||
};
|
||||
|
||||
#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
|
||||
|
||||
/* RCFW Communication Channels */
|
||||
struct bnxt_qplib_rcfw {
|
||||
struct pci_dev *pdev;
|
||||
@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
|
||||
struct bnxt_qplib_crsq *crsqe_tbl;
|
||||
int qp_tbl_size;
|
||||
struct bnxt_qplib_qp_node *qp_tbl;
|
||||
u64 oos_prev;
|
||||
u32 init_oos_stats;
|
||||
};
|
||||
|
||||
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
|
||||
|
@ -36,6 +36,8 @@
|
||||
* Description: QPLib resource manager
|
||||
*/
|
||||
|
||||
#define dev_fmt(fmt) "QPLIB: " fmt
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/interrupt.h>
|
||||
@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
|
||||
pbl->pg_map_arr[i]);
|
||||
else
|
||||
dev_warn(&pdev->dev,
|
||||
"QPLIB: PBL free pg_arr[%d] empty?!",
|
||||
i);
|
||||
"PBL free pg_arr[%d] empty?!\n", i);
|
||||
pbl->pg_arr[i] = NULL;
|
||||
}
|
||||
}
|
||||
@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
|
||||
struct bnxt_qplib_pkey_tbl *pkey_tbl)
|
||||
{
|
||||
if (!pkey_tbl->tbl)
|
||||
dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
|
||||
dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
|
||||
else
|
||||
kfree(pkey_tbl->tbl);
|
||||
|
||||
@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
|
||||
struct bnxt_qplib_pd *pd)
|
||||
{
|
||||
if (test_and_set_bit(pd->id, pdt->tbl)) {
|
||||
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
|
||||
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
|
||||
pd->id);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
|
||||
struct bnxt_qplib_dpi *dpi)
|
||||
{
|
||||
if (dpi->dpi >= dpit->max) {
|
||||
dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
|
||||
dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
|
||||
dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
|
||||
dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
|
||||
dpi->dpi);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
|
||||
u32 dbr_len, bytes;
|
||||
|
||||
if (dpit->dbr_bar_reg_iomem) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
|
||||
dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
|
||||
dbr_bar_reg);
|
||||
return -EALREADY;
|
||||
}
|
||||
|
||||
bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
|
||||
if (!bar_reg_base) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: BAR region %d resc start failed", dbr_bar_reg);
|
||||
dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
|
||||
dbr_bar_reg);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
|
||||
if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
|
||||
dbr_len);
|
||||
dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
|
||||
dbr_len);
|
||||
if (!dpit->dbr_bar_reg_iomem) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: FP: DBR BAR region %d mapping failed",
|
||||
dbr_bar_reg);
|
||||
"FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
|
||||
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
|
||||
&stats->dma_map, GFP_KERNEL);
|
||||
if (!stats->dma) {
|
||||
dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
|
||||
dev_err(&pdev->dev, "Stats DMA allocation failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
|
@ -36,6 +36,8 @@
|
||||
* Description: Slow Path Operators
|
||||
*/
|
||||
|
||||
#define dev_fmt(fmt) "QPLIB: " fmt
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/sched.h>
|
||||
@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
|
||||
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
|
||||
if (!sbuf) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: SP: QUERY_FUNC alloc side buffer failed");
|
||||
"SP: QUERY_FUNC alloc side buffer failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
|
||||
attr->max_srq = le16_to_cpu(sb->max_srq);
|
||||
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
|
||||
attr->max_srq_sges = sb->max_srq_sge;
|
||||
/* Bono only reports 1 PKEY for now, but it can support > 1 */
|
||||
attr->max_pkey = le32_to_cpu(sb->max_pkeys);
|
||||
/*
|
||||
* Some versions of FW reports more than 0xFFFF.
|
||||
* Restrict it for now to 0xFFFF to avoid
|
||||
* reporting trucated value
|
||||
*/
|
||||
if (attr->max_pkey > 0xFFFF) {
|
||||
/* ib_port_attr::pkey_tbl_len is u16 */
|
||||
attr->max_pkey = 0xFFFF;
|
||||
}
|
||||
|
||||
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
|
||||
attr->l2_db_size = (sb->l2_db_space_size + 1) *
|
||||
@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
|
||||
(void *)&resp,
|
||||
NULL, 0);
|
||||
if (rc) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: Failed to set function resources");
|
||||
dev_err(&res->pdev->dev, "Failed to set function resources\n");
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
|
||||
{
|
||||
if (index >= sgid_tbl->max) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: Index %d exceeded SGID table max (%d)",
|
||||
"Index %d exceeded SGID table max (%d)\n",
|
||||
index, sgid_tbl->max);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
int index;
|
||||
|
||||
if (!sgid_tbl) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
|
||||
dev_err(&res->pdev->dev, "SGID table not allocated\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Do we need a sgid_lock here? */
|
||||
if (!sgid_tbl->active) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: SGID table has no active entries");
|
||||
dev_err(&res->pdev->dev, "SGID table has no active entries\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (index = 0; index < sgid_tbl->max; index++) {
|
||||
@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
break;
|
||||
}
|
||||
if (index == sgid_tbl->max) {
|
||||
dev_warn(&res->pdev->dev, "GID not found in the SGID table");
|
||||
dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
|
||||
return 0;
|
||||
}
|
||||
/* Remove GID from the SGID table */
|
||||
@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
|
||||
if (sgid_tbl->hw_id[index] == 0xFFFF) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: GID entry contains an invalid HW id");
|
||||
"GID entry contains an invalid HW id\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
|
||||
@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
sgid_tbl->vlan[index] = 0;
|
||||
sgid_tbl->active--;
|
||||
dev_dbg(&res->pdev->dev,
|
||||
"QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
|
||||
"SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
|
||||
index, sgid_tbl->hw_id[index], sgid_tbl->active);
|
||||
sgid_tbl->hw_id[index] = (u16)-1;
|
||||
|
||||
@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
int i, free_idx;
|
||||
|
||||
if (!sgid_tbl) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
|
||||
dev_err(&res->pdev->dev, "SGID table not allocated\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Do we need a sgid_lock here? */
|
||||
if (sgid_tbl->active == sgid_tbl->max) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
|
||||
dev_err(&res->pdev->dev, "SGID table is full\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
free_idx = sgid_tbl->max;
|
||||
for (i = 0; i < sgid_tbl->max; i++) {
|
||||
if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
|
||||
dev_dbg(&res->pdev->dev,
|
||||
"QPLIB: SGID entry already exist in entry %d!",
|
||||
i);
|
||||
"SGID entry already exist in entry %d!\n", i);
|
||||
*index = i;
|
||||
return -EALREADY;
|
||||
} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
|
||||
@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
}
|
||||
if (free_idx == sgid_tbl->max) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: SGID table is FULL but count is not MAX??");
|
||||
"SGID table is FULL but count is not MAX??\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (update) {
|
||||
@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
|
||||
sgid_tbl->vlan[free_idx] = 1;
|
||||
|
||||
dev_dbg(&res->pdev->dev,
|
||||
"QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
|
||||
"SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
|
||||
free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
|
||||
|
||||
*index = free_idx;
|
||||
@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
|
||||
}
|
||||
if (index >= pkey_tbl->max) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: Index %d exceeded PKEY table max (%d)",
|
||||
"Index %d exceeded PKEY table max (%d)\n",
|
||||
index, pkey_tbl->max);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
|
||||
int i, rc = 0;
|
||||
|
||||
if (!pkey_tbl) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
|
||||
dev_err(&res->pdev->dev, "PKEY table not allocated\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Do we need a pkey_lock here? */
|
||||
if (!pkey_tbl->active) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: PKEY table has no active entries");
|
||||
dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (i = 0; i < pkey_tbl->max; i++) {
|
||||
@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
|
||||
}
|
||||
if (i == pkey_tbl->max) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: PKEY 0x%04x not found in the pkey table",
|
||||
*pkey);
|
||||
"PKEY 0x%04x not found in the pkey table\n", *pkey);
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
|
||||
@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
|
||||
int i, free_idx, rc = 0;
|
||||
|
||||
if (!pkey_tbl) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
|
||||
dev_err(&res->pdev->dev, "PKEY table not allocated\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Do we need a pkey_lock here? */
|
||||
if (pkey_tbl->active == pkey_tbl->max) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
|
||||
dev_err(&res->pdev->dev, "PKEY table is full\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
free_idx = pkey_tbl->max;
|
||||
@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
|
||||
}
|
||||
if (free_idx == pkey_tbl->max) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"QPLIB: PKEY table is FULL but count is not MAX??");
|
||||
"PKEY table is FULL but count is not MAX??\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* Add PKEY to the pkey_tbl */
|
||||
@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
|
||||
int rc;
|
||||
|
||||
if (mrw->lkey == 0xFFFFFFFF) {
|
||||
dev_info(&res->pdev->dev,
|
||||
"QPLIB: SP: Free a reserved lkey MRW");
|
||||
dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
|
||||
pages++;
|
||||
|
||||
if (pages > MAX_PBL_LVL_1_PGS) {
|
||||
dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
|
||||
dev_err(&res->pdev->dev,
|
||||
"requested (0x%x) exceeded max (0x%x)",
|
||||
"SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
|
||||
pages, MAX_PBL_LVL_1_PGS);
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
|
||||
HWQ_TYPE_CTX);
|
||||
if (rc) {
|
||||
dev_err(&res->pdev->dev,
|
||||
"SP: Reg MR memory allocation failed");
|
||||
"SP: Reg MR memory allocation failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* Write to the hwq */
|
||||
@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
|
||||
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
|
||||
if (!sbuf) {
|
||||
dev_err(&rcfw->pdev->dev,
|
||||
"QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
|
||||
"SP: QUERY_ROCE_STATS alloc side buffer failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
|
||||
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
|
||||
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
|
||||
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
|
||||
if (!rcfw->init_oos_stats) {
|
||||
rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
|
||||
rcfw->init_oos_stats = 1;
|
||||
} else {
|
||||
stats->res_oos_drop_count +=
|
||||
(le64_to_cpu(sb->res_oos_drop_count) -
|
||||
rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
|
||||
rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
|
||||
}
|
||||
|
||||
bail:
|
||||
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
|
||||
return rc;
|
||||
|
@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
|
||||
/* res_tx_pci_err is 64 b */
|
||||
u64 res_rx_pci_err;
|
||||
/* res_rx_pci_err is 64 b */
|
||||
u64 res_oos_drop_count;
|
||||
/* res_oos_drop_count */
|
||||
u64 active_qp_count_p0;
|
||||
/* port 0 active qps */
|
||||
u64 active_qp_count_p1;
|
||||
/* port 1 active qps */
|
||||
u64 active_qp_count_p2;
|
||||
/* port 2 active qps */
|
||||
u64 active_qp_count_p3;
|
||||
/* port 3 active qps */
|
||||
};
|
||||
|
||||
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
|
||||
|
@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
|
||||
__le64 res_srq_load_err;
|
||||
__le64 res_tx_pci_err;
|
||||
__le64 res_rx_pci_err;
|
||||
__le64 res_oos_drop_count;
|
||||
__le64 active_qp_count_p0;
|
||||
__le64 active_qp_count_p1;
|
||||
__le64 active_qp_count_p2;
|
||||
__le64 active_qp_count_p3;
|
||||
};
|
||||
|
||||
/* QP error notification event (16 bytes) */
|
||||
|
@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hw_rev_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
|
||||
ibdev.dev);
|
||||
pr_debug("%s dev 0x%p\n", __func__, dev);
|
||||
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hw_rev);
|
||||
|
||||
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hca_type_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
|
||||
ibdev.dev);
|
||||
@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
|
||||
lldev->ethtool_ops->get_drvinfo(lldev, &info);
|
||||
return sprintf(buf, "%s\n", info.driver);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hca_type);
|
||||
|
||||
static ssize_t show_board(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t board_id_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
|
||||
ibdev.dev);
|
||||
@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
|
||||
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
|
||||
iwch_dev->rdev.rnic_info.pdev->device);
|
||||
}
|
||||
static DEVICE_ATTR_RO(board_id);
|
||||
|
||||
enum counters {
|
||||
IPINRECEIVES,
|
||||
@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
||||
return stats->num_counters;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
|
||||
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
|
||||
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
|
||||
static struct attribute *iwch_class_attributes[] = {
|
||||
&dev_attr_hw_rev.attr,
|
||||
&dev_attr_hca_type.attr,
|
||||
&dev_attr_board_id.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct device_attribute *iwch_class_attributes[] = {
|
||||
&dev_attr_hw_rev,
|
||||
&dev_attr_hca_type,
|
||||
&dev_attr_board_id,
|
||||
static const struct attribute_group iwch_attr_group = {
|
||||
.attrs = iwch_class_attributes,
|
||||
};
|
||||
|
||||
static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
|
||||
@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
|
||||
int iwch_register_device(struct iwch_dev *dev)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
pr_debug("%s iwch_dev %p\n", __func__, dev);
|
||||
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
|
||||
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
|
||||
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
|
||||
dev->ibdev.owner = THIS_MODULE;
|
||||
@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
|
||||
sizeof(dev->ibdev.iwcm->ifname));
|
||||
|
||||
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
|
||||
ret = ib_register_device(&dev->ibdev, NULL);
|
||||
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
|
||||
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
|
||||
if (ret)
|
||||
goto bail1;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
|
||||
ret = device_create_file(&dev->ibdev.dev,
|
||||
iwch_class_attributes[i]);
|
||||
if (ret) {
|
||||
goto bail2;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
bail2:
|
||||
ib_unregister_device(&dev->ibdev);
|
||||
bail1:
|
||||
kfree(dev->ibdev.iwcm);
|
||||
kfree(dev->ibdev.iwcm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iwch_unregister_device(struct iwch_dev *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
pr_debug("%s iwch_dev %p\n", __func__, dev);
|
||||
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
|
||||
device_remove_file(&dev->ibdev.dev,
|
||||
iwch_class_attributes[i]);
|
||||
ib_unregister_device(&dev->ibdev);
|
||||
kfree(dev->ibdev.iwcm);
|
||||
return;
|
||||
|
@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
|
||||
ep->com.local_addr.ss_family);
|
||||
dst_release(ep->dst);
|
||||
cxgb4_l2t_release(ep->l2t);
|
||||
if (ep->mpa_skb)
|
||||
kfree_skb(ep->mpa_skb);
|
||||
kfree_skb(ep->mpa_skb);
|
||||
}
|
||||
if (!skb_queue_empty(&ep->com.ep_skb_list))
|
||||
skb_queue_purge(&ep->com.ep_skb_list);
|
||||
|
@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
|
||||
cq->gts = rdev->lldi.gts_reg;
|
||||
cq->rdev = rdev;
|
||||
|
||||
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
|
||||
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
|
||||
&cq->bar2_qid,
|
||||
user ? &cq->bar2_pa : NULL);
|
||||
if (user && !cq->bar2_pa) {
|
||||
|
@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hw_rev_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
|
||||
ibdev.dev);
|
||||
@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
|
||||
return sprintf(buf, "%d\n",
|
||||
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
|
||||
}
|
||||
static DEVICE_ATTR_RO(hw_rev);
|
||||
|
||||
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hca_type_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
|
||||
ibdev.dev);
|
||||
@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
|
||||
lldev->ethtool_ops->get_drvinfo(lldev, &info);
|
||||
return sprintf(buf, "%s\n", info.driver);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hca_type);
|
||||
|
||||
static ssize_t show_board(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
|
||||
ibdev.dev);
|
||||
@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
|
||||
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
|
||||
c4iw_dev->rdev.lldi.pdev->device);
|
||||
}
|
||||
static DEVICE_ATTR_RO(board_id);
|
||||
|
||||
enum counters {
|
||||
IP4INSEGS,
|
||||
@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
|
||||
return stats->num_counters;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
|
||||
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
|
||||
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
|
||||
static struct attribute *c4iw_class_attributes[] = {
|
||||
&dev_attr_hw_rev.attr,
|
||||
&dev_attr_hca_type.attr,
|
||||
&dev_attr_board_id.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct device_attribute *c4iw_class_attributes[] = {
|
||||
&dev_attr_hw_rev,
|
||||
&dev_attr_hca_type,
|
||||
&dev_attr_board_id,
|
||||
static const struct attribute_group c4iw_attr_group = {
|
||||
.attrs = c4iw_class_attributes,
|
||||
};
|
||||
|
||||
static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
|
||||
@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
|
||||
void c4iw_register_device(struct work_struct *work)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
|
||||
struct c4iw_dev *dev = ctx->dev;
|
||||
|
||||
pr_debug("c4iw_dev %p\n", dev);
|
||||
strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
|
||||
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
|
||||
memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
|
||||
dev->ibdev.owner = THIS_MODULE;
|
||||
@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
|
||||
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
|
||||
sizeof(dev->ibdev.iwcm->ifname));
|
||||
|
||||
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
|
||||
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
|
||||
ret = ib_register_device(&dev->ibdev, NULL);
|
||||
ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
|
||||
if (ret)
|
||||
goto err_kfree_iwcm;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
|
||||
ret = device_create_file(&dev->ibdev.dev,
|
||||
c4iw_class_attributes[i]);
|
||||
if (ret)
|
||||
goto err_unregister_device;
|
||||
}
|
||||
return;
|
||||
err_unregister_device:
|
||||
ib_unregister_device(&dev->ibdev);
|
||||
|
||||
err_kfree_iwcm:
|
||||
kfree(dev->ibdev.iwcm);
|
||||
err_dealloc_ctx:
|
||||
@ -651,12 +646,7 @@ void c4iw_register_device(struct work_struct *work)
|
||||
|
||||
void c4iw_unregister_device(struct c4iw_dev *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
pr_debug("c4iw_dev %p\n", dev);
|
||||
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
|
||||
device_remove_file(&dev->ibdev.dev,
|
||||
c4iw_class_attributes[i]);
|
||||
ib_unregister_device(&dev->ibdev);
|
||||
kfree(dev->ibdev.iwcm);
|
||||
return;
|
||||
|
@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
|
||||
|
||||
wq->db = rdev->lldi.db_reg;
|
||||
|
||||
wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
|
||||
wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
|
||||
CXGB4_BAR2_QTYPE_EGRESS,
|
||||
&wq->sq.bar2_qid,
|
||||
user ? &wq->sq.bar2_pa : NULL);
|
||||
if (need_rq)
|
||||
wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
|
||||
T4_BAR2_QTYPE_EGRESS,
|
||||
CXGB4_BAR2_QTYPE_EGRESS,
|
||||
&wq->rq.bar2_qid,
|
||||
user ? &wq->rq.bar2_pa : NULL);
|
||||
|
||||
@ -2572,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
|
||||
memset(wq->queue, 0, wq->memsize);
|
||||
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
|
||||
|
||||
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
|
||||
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
|
||||
&wq->bar2_qid,
|
||||
user ? &wq->bar2_pa : NULL);
|
||||
|
||||
@ -2813,8 +2814,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
|
||||
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
|
||||
srq->wr_waitp);
|
||||
err_free_skb:
|
||||
if (srq->destroy_skb)
|
||||
kfree_skb(srq->destroy_skb);
|
||||
kfree_skb(srq->destroy_skb);
|
||||
err_free_srq_idx:
|
||||
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
|
||||
err_free_wr_wait:
|
||||
|
@ -8,12 +8,42 @@
|
||||
#
|
||||
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
|
||||
|
||||
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
|
||||
eprom.o exp_rcv.o file_ops.o firmware.o \
|
||||
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
|
||||
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
|
||||
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
|
||||
verbs_txreq.o vnic_main.o vnic_sdma.o
|
||||
hfi1-y := \
|
||||
affinity.o \
|
||||
chip.o \
|
||||
device.o \
|
||||
driver.o \
|
||||
efivar.o \
|
||||
eprom.o \
|
||||
exp_rcv.o \
|
||||
file_ops.o \
|
||||
firmware.o \
|
||||
init.o \
|
||||
intr.o \
|
||||
iowait.o \
|
||||
mad.o \
|
||||
mmu_rb.o \
|
||||
msix.o \
|
||||
pcie.o \
|
||||
pio.o \
|
||||
pio_copy.o \
|
||||
platform.o \
|
||||
qp.o \
|
||||
qsfp.o \
|
||||
rc.o \
|
||||
ruc.o \
|
||||
sdma.o \
|
||||
sysfs.o \
|
||||
trace.o \
|
||||
uc.o \
|
||||
ud.o \
|
||||
user_exp_rcv.o \
|
||||
user_pages.o \
|
||||
user_sdma.o \
|
||||
verbs.o \
|
||||
verbs_txreq.o \
|
||||
vnic_main.o \
|
||||
vnic_sdma.o
|
||||
|
||||
ifdef CONFIG_DEBUG_FS
|
||||
hfi1-y += debugfs.o
|
||||
|
@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
|
||||
set = &entry->def_intr;
|
||||
cpumask_set_cpu(cpu, &set->mask);
|
||||
cpumask_set_cpu(cpu, &set->used);
|
||||
for (i = 0; i < dd->num_msix_entries; i++) {
|
||||
for (i = 0; i < dd->msix_info.max_requested; i++) {
|
||||
struct hfi1_msix_entry *other_msix;
|
||||
|
||||
other_msix = &dd->msix_entries[i];
|
||||
other_msix = &dd->msix_info.msix_entries[i];
|
||||
if (other_msix->type != IRQ_SDMA || other_msix == msix)
|
||||
continue;
|
||||
|
||||
|
@ -67,8 +67,6 @@
|
||||
#include "debugfs.h"
|
||||
#include "fault.h"
|
||||
|
||||
#define NUM_IB_PORTS 1
|
||||
|
||||
uint kdeth_qp;
|
||||
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
|
||||
@ -1100,9 +1098,9 @@ struct err_reg_info {
|
||||
const char *desc;
|
||||
};
|
||||
|
||||
#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
|
||||
#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
|
||||
#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
|
||||
#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
|
||||
#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
|
||||
#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
|
||||
|
||||
/*
|
||||
* Helpers for building HFI and DC error interrupt table entries. Different
|
||||
@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
|
||||
/**
|
||||
* is_rcv_urgent_int() - User receive context urgent IRQ handler
|
||||
* @dd: valid dd
|
||||
* @source: logical IRQ source (ofse from IS_RCVURGENT_START)
|
||||
* @source: logical IRQ source (offset from IS_RCVURGENT_START)
|
||||
*
|
||||
* RX block receive urgent interrupt. Source is < 160.
|
||||
*
|
||||
@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
|
||||
is_sdma_eng_err_name, is_sdma_eng_err_int },
|
||||
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
|
||||
is_sendctxt_err_name, is_sendctxt_err_int },
|
||||
{ IS_SDMA_START, IS_SDMA_END,
|
||||
{ IS_SDMA_START, IS_SDMA_IDLE_END,
|
||||
is_sdma_eng_name, is_sdma_eng_int },
|
||||
{ IS_VARIOUS_START, IS_VARIOUS_END,
|
||||
is_various_name, is_various_int },
|
||||
@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
|
||||
|
||||
/* avoids a double compare by walking the table in-order */
|
||||
for (entry = &is_table[0]; entry->is_name; entry++) {
|
||||
if (source < entry->end) {
|
||||
if (source <= entry->end) {
|
||||
trace_hfi1_interrupt(dd, entry, source);
|
||||
entry->is_int(dd, source - entry->start);
|
||||
return;
|
||||
@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
|
||||
* context DATA IRQs are threaded and are not supported by this handler.
|
||||
*
|
||||
*/
|
||||
static irqreturn_t general_interrupt(int irq, void *data)
|
||||
irqreturn_t general_interrupt(int irq, void *data)
|
||||
{
|
||||
struct hfi1_devdata *dd = data;
|
||||
u64 regs[CCE_NUM_INT_CSRS];
|
||||
@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
|
||||
return handled;
|
||||
}
|
||||
|
||||
static irqreturn_t sdma_interrupt(int irq, void *data)
|
||||
irqreturn_t sdma_interrupt(int irq, void *data)
|
||||
{
|
||||
struct sdma_engine *sde = data;
|
||||
struct hfi1_devdata *dd = sde->dd;
|
||||
@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
|
||||
* invoked) is finished. The intent is to avoid extra interrupts while we
|
||||
* are processing packets anyway.
|
||||
*/
|
||||
static irqreturn_t receive_context_interrupt(int irq, void *data)
|
||||
irqreturn_t receive_context_interrupt(int irq, void *data)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = data;
|
||||
struct hfi1_devdata *dd = rcd->dd;
|
||||
@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
|
||||
* Receive packet thread handler. This expects to be invoked with the
|
||||
* receive interrupt still blocked.
|
||||
*/
|
||||
static irqreturn_t receive_context_thread(int irq, void *data)
|
||||
irqreturn_t receive_context_thread(int irq, void *data)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = data;
|
||||
int present;
|
||||
@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
static void init_qsfp_int(struct hfi1_devdata *dd)
|
||||
void init_qsfp_int(struct hfi1_devdata *dd)
|
||||
{
|
||||
struct hfi1_pportdata *ppd = dd->pport;
|
||||
u64 qsfp_mask, cce_int_mask;
|
||||
const int qsfp1_int_smask = QSFP1_INT % 64;
|
||||
const int qsfp2_int_smask = QSFP2_INT % 64;
|
||||
|
||||
/*
|
||||
* disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
|
||||
* Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
|
||||
* therefore just one of QSFP1_INT/QSFP2_INT can be used to find
|
||||
* the index of the appropriate CSR in the CCEIntMask CSR array
|
||||
*/
|
||||
cce_int_mask = read_csr(dd, CCE_INT_MASK +
|
||||
(8 * (QSFP1_INT / 64)));
|
||||
if (dd->hfi1_id) {
|
||||
cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
|
||||
write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
|
||||
cce_int_mask);
|
||||
} else {
|
||||
cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
|
||||
write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
|
||||
cce_int_mask);
|
||||
}
|
||||
u64 qsfp_mask;
|
||||
|
||||
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
|
||||
/* Clear current status to avoid spurious interrupts */
|
||||
@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
|
||||
write_csr(dd,
|
||||
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
|
||||
qsfp_mask);
|
||||
|
||||
/* Enable the appropriate QSFP IRQ source */
|
||||
if (!dd->hfi1_id)
|
||||
set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
|
||||
else
|
||||
set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify if BCT for data VLs is non-zero.
|
||||
/**
|
||||
* data_vls_operational() - Verify if data VL BCT credits and MTU
|
||||
* are both set.
|
||||
* @ppd: pointer to hfi1_pportdata structure
|
||||
*
|
||||
* Return: true - Ok, false -otherwise.
|
||||
*/
|
||||
static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
|
||||
{
|
||||
return !!ppd->actual_vls_operational;
|
||||
int i;
|
||||
u64 reg;
|
||||
|
||||
if (!ppd->actual_vls_operational)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < ppd->vls_supported; i++) {
|
||||
reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
|
||||
if ((reg && !ppd->dd->vld[i].mtu) ||
|
||||
(!reg && ppd->dd->vld[i].mtu))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
|
||||
|
||||
if (!data_vls_operational(ppd)) {
|
||||
dd_dev_err(dd,
|
||||
"%s: data VLs not operational\n", __func__);
|
||||
"%s: Invalid data VL credits or mtu\n",
|
||||
__func__);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
|
||||
|
||||
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
|
||||
}
|
||||
if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
|
||||
if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
|
||||
set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
|
||||
IS_RCVAVAIL_START + rcd->ctxt, true);
|
||||
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
|
||||
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
|
||||
}
|
||||
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
|
||||
set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
|
||||
IS_RCVAVAIL_START + rcd->ctxt, false);
|
||||
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
|
||||
}
|
||||
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
|
||||
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
|
||||
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
|
||||
@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
|
||||
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
|
||||
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
|
||||
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
|
||||
if (op & HFI1_RCVCTRL_URGENT_ENB)
|
||||
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
|
||||
IS_RCVURGENT_START + rcd->ctxt, true);
|
||||
if (op & HFI1_RCVCTRL_URGENT_DIS)
|
||||
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
|
||||
IS_RCVURGENT_START + rcd->ctxt, false);
|
||||
|
||||
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
|
||||
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
|
||||
|
||||
@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_int_mask - get 64 bit int mask
|
||||
* @dd - the devdata
|
||||
* @i - the csr (relative to CCE_INT_MASK)
|
||||
*
|
||||
* Returns the mask with the urgent interrupt mask
|
||||
* bit clear for kernel receive contexts.
|
||||
*/
|
||||
static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
|
||||
{
|
||||
u64 mask = U64_MAX; /* default to no change */
|
||||
|
||||
if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
|
||||
int j = (i - (IS_RCVURGENT_START / 64)) * 64;
|
||||
int k = !j ? IS_RCVURGENT_START % 64 : 0;
|
||||
|
||||
if (j)
|
||||
j -= IS_RCVURGENT_START % 64;
|
||||
/* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
|
||||
for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
|
||||
/* convert to bit in mask and clear */
|
||||
mask &= ~BIT_ULL(k);
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
/*
|
||||
* Enable/disable chip from delivering interrupts.
|
||||
/**
|
||||
* read_mod_write() - Calculate the IRQ register index and set/clear the bits
|
||||
* @dd: valid devdata
|
||||
* @src: IRQ source to determine register index from
|
||||
* @bits: the bits to set or clear
|
||||
* @set: true == set the bits, false == clear the bits
|
||||
*
|
||||
*/
|
||||
void set_intr_state(struct hfi1_devdata *dd, u32 enable)
|
||||
static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
|
||||
bool set)
|
||||
{
|
||||
int i;
|
||||
u64 reg;
|
||||
u16 idx = src / BITS_PER_REGISTER;
|
||||
|
||||
/*
|
||||
* In HFI, the mask needs to be 1 to allow interrupts.
|
||||
*/
|
||||
if (enable) {
|
||||
/* enable all interrupts but urgent on kernel contexts */
|
||||
for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
|
||||
u64 mask = get_int_mask(dd, i);
|
||||
spin_lock(&dd->irq_src_lock);
|
||||
reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
|
||||
if (set)
|
||||
reg |= bits;
|
||||
else
|
||||
reg &= ~bits;
|
||||
write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
|
||||
spin_unlock(&dd->irq_src_lock);
|
||||
}
|
||||
|
||||
write_csr(dd, CCE_INT_MASK + (8 * i), mask);
|
||||
/**
|
||||
* set_intr_bits() - Enable/disable a range (one or more) IRQ sources
|
||||
* @dd: valid devdata
|
||||
* @first: first IRQ source to set/clear
|
||||
* @last: last IRQ source (inclusive) to set/clear
|
||||
* @set: true == set the bits, false == clear the bits
|
||||
*
|
||||
* If first == last, set the exact source.
|
||||
*/
|
||||
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
|
||||
{
|
||||
u64 bits = 0;
|
||||
u64 bit;
|
||||
u16 src;
|
||||
|
||||
if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
|
||||
return -EINVAL;
|
||||
|
||||
if (last < first)
|
||||
return -ERANGE;
|
||||
|
||||
for (src = first; src <= last; src++) {
|
||||
bit = src % BITS_PER_REGISTER;
|
||||
/* wrapped to next register? */
|
||||
if (!bit && bits) {
|
||||
read_mod_write(dd, src - 1, bits, set);
|
||||
bits = 0;
|
||||
}
|
||||
|
||||
init_qsfp_int(dd);
|
||||
} else {
|
||||
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
|
||||
write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
|
||||
bits |= BIT_ULL(bit);
|
||||
}
|
||||
read_mod_write(dd, last, bits, set);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear all interrupt sources on the chip.
|
||||
*/
|
||||
static void clear_all_interrupts(struct hfi1_devdata *dd)
|
||||
void clear_all_interrupts(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
|
||||
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
|
||||
}
|
||||
|
||||
/**
|
||||
* hfi1_clean_up_interrupts() - Free all IRQ resources
|
||||
* @dd: valid device data data structure
|
||||
*
|
||||
* Free the MSIx and assoicated PCI resources, if they have been allocated.
|
||||
*/
|
||||
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
struct hfi1_msix_entry *me = dd->msix_entries;
|
||||
|
||||
/* remove irqs - must happen before disabling/turning off */
|
||||
for (i = 0; i < dd->num_msix_entries; i++, me++) {
|
||||
if (!me->arg) /* => no irq, no affinity */
|
||||
continue;
|
||||
hfi1_put_irq_affinity(dd, me);
|
||||
pci_free_irq(dd->pcidev, i, me->arg);
|
||||
}
|
||||
|
||||
/* clean structures */
|
||||
kfree(dd->msix_entries);
|
||||
dd->msix_entries = NULL;
|
||||
dd->num_msix_entries = 0;
|
||||
|
||||
pci_free_irq_vectors(dd->pcidev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remap the interrupt source from the general handler to the given MSI-X
|
||||
* interrupt.
|
||||
*/
|
||||
static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
|
||||
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
|
||||
{
|
||||
u64 reg;
|
||||
int m, n;
|
||||
@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
|
||||
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
|
||||
}
|
||||
|
||||
static void remap_sdma_interrupts(struct hfi1_devdata *dd,
|
||||
int engine, int msix_intr)
|
||||
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
|
||||
{
|
||||
/*
|
||||
* SDMA engine interrupt sources grouped by type, rather than
|
||||
@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
|
||||
* SDMAProgress
|
||||
* SDMAIdle
|
||||
*/
|
||||
remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
|
||||
msix_intr);
|
||||
remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
|
||||
msix_intr);
|
||||
remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
|
||||
msix_intr);
|
||||
}
|
||||
|
||||
static int request_msix_irqs(struct hfi1_devdata *dd)
|
||||
{
|
||||
int first_general, last_general;
|
||||
int first_sdma, last_sdma;
|
||||
int first_rx, last_rx;
|
||||
int i, ret = 0;
|
||||
|
||||
/* calculate the ranges we are going to use */
|
||||
first_general = 0;
|
||||
last_general = first_general + 1;
|
||||
first_sdma = last_general;
|
||||
last_sdma = first_sdma + dd->num_sdma;
|
||||
first_rx = last_sdma;
|
||||
last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
|
||||
|
||||
/* VNIC MSIx interrupts get mapped when VNIC contexts are created */
|
||||
dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
|
||||
|
||||
/*
|
||||
* Sanity check - the code expects all SDMA chip source
|
||||
* interrupts to be in the same CSR, starting at bit 0. Verify
|
||||
* that this is true by checking the bit location of the start.
|
||||
*/
|
||||
BUILD_BUG_ON(IS_SDMA_START % 64);
|
||||
|
||||
for (i = 0; i < dd->num_msix_entries; i++) {
|
||||
struct hfi1_msix_entry *me = &dd->msix_entries[i];
|
||||
const char *err_info;
|
||||
irq_handler_t handler;
|
||||
irq_handler_t thread = NULL;
|
||||
void *arg = NULL;
|
||||
int idx;
|
||||
struct hfi1_ctxtdata *rcd = NULL;
|
||||
struct sdma_engine *sde = NULL;
|
||||
char name[MAX_NAME_SIZE];
|
||||
|
||||
/* obtain the arguments to pci_request_irq */
|
||||
if (first_general <= i && i < last_general) {
|
||||
idx = i - first_general;
|
||||
handler = general_interrupt;
|
||||
arg = dd;
|
||||
snprintf(name, sizeof(name),
|
||||
DRIVER_NAME "_%d", dd->unit);
|
||||
err_info = "general";
|
||||
me->type = IRQ_GENERAL;
|
||||
} else if (first_sdma <= i && i < last_sdma) {
|
||||
idx = i - first_sdma;
|
||||
sde = &dd->per_sdma[idx];
|
||||
handler = sdma_interrupt;
|
||||
arg = sde;
|
||||
snprintf(name, sizeof(name),
|
||||
DRIVER_NAME "_%d sdma%d", dd->unit, idx);
|
||||
err_info = "sdma";
|
||||
remap_sdma_interrupts(dd, idx, i);
|
||||
me->type = IRQ_SDMA;
|
||||
} else if (first_rx <= i && i < last_rx) {
|
||||
idx = i - first_rx;
|
||||
rcd = hfi1_rcd_get_by_index_safe(dd, idx);
|
||||
if (rcd) {
|
||||
/*
|
||||
* Set the interrupt register and mask for this
|
||||
* context's interrupt.
|
||||
*/
|
||||
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
|
||||
rcd->imask = ((u64)1) <<
|
||||
((IS_RCVAVAIL_START + idx) % 64);
|
||||
handler = receive_context_interrupt;
|
||||
thread = receive_context_thread;
|
||||
arg = rcd;
|
||||
snprintf(name, sizeof(name),
|
||||
DRIVER_NAME "_%d kctxt%d",
|
||||
dd->unit, idx);
|
||||
err_info = "receive context";
|
||||
remap_intr(dd, IS_RCVAVAIL_START + idx, i);
|
||||
me->type = IRQ_RCVCTXT;
|
||||
rcd->msix_intr = i;
|
||||
hfi1_rcd_put(rcd);
|
||||
}
|
||||
} else {
|
||||
/* not in our expected range - complain, then
|
||||
* ignore it
|
||||
*/
|
||||
dd_dev_err(dd,
|
||||
"Unexpected extra MSI-X interrupt %d\n", i);
|
||||
continue;
|
||||
}
|
||||
/* no argument, no interrupt */
|
||||
if (!arg)
|
||||
continue;
|
||||
/* make sure the name is terminated */
|
||||
name[sizeof(name) - 1] = 0;
|
||||
me->irq = pci_irq_vector(dd->pcidev, i);
|
||||
ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
|
||||
name);
|
||||
if (ret) {
|
||||
dd_dev_err(dd,
|
||||
"unable to allocate %s interrupt, irq %d, index %d, err %d\n",
|
||||
err_info, me->irq, idx, ret);
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* assign arg after pci_request_irq call, so it will be
|
||||
* cleaned up
|
||||
*/
|
||||
me->arg = arg;
|
||||
|
||||
ret = hfi1_get_irq_affinity(dd, me);
|
||||
if (ret)
|
||||
dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dd->vnic.num_ctxt; i++) {
|
||||
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
|
||||
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
|
||||
|
||||
synchronize_irq(me->irq);
|
||||
}
|
||||
}
|
||||
|
||||
void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
struct hfi1_devdata *dd = rcd->dd;
|
||||
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
|
||||
|
||||
if (!me->arg) /* => no irq, no affinity */
|
||||
return;
|
||||
|
||||
hfi1_put_irq_affinity(dd, me);
|
||||
pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
|
||||
|
||||
me->arg = NULL;
|
||||
}
|
||||
|
||||
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
struct hfi1_devdata *dd = rcd->dd;
|
||||
struct hfi1_msix_entry *me;
|
||||
int idx = rcd->ctxt;
|
||||
void *arg = rcd;
|
||||
int ret;
|
||||
|
||||
rcd->msix_intr = dd->vnic.msix_idx++;
|
||||
me = &dd->msix_entries[rcd->msix_intr];
|
||||
|
||||
/*
|
||||
* Set the interrupt register and mask for this
|
||||
* context's interrupt.
|
||||
*/
|
||||
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
|
||||
rcd->imask = ((u64)1) <<
|
||||
((IS_RCVAVAIL_START + idx) % 64);
|
||||
me->type = IRQ_RCVCTXT;
|
||||
me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
|
||||
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
|
||||
|
||||
ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
|
||||
receive_context_interrupt,
|
||||
receive_context_thread, arg,
|
||||
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
|
||||
if (ret) {
|
||||
dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
|
||||
me->irq, idx, ret);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* assign arg after pci_request_irq call, so it will be
|
||||
* cleaned up
|
||||
*/
|
||||
me->arg = arg;
|
||||
|
||||
ret = hfi1_get_irq_affinity(dd, me);
|
||||
if (ret) {
|
||||
dd_dev_err(dd,
|
||||
"unable to pin IRQ %d\n", ret);
|
||||
pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
|
||||
}
|
||||
remap_intr(dd, IS_SDMA_START + engine, msix_intr);
|
||||
remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
|
||||
remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the general handler to accept all interrupts, remap all
|
||||
* chip interrupts back to MSI-X 0.
|
||||
*/
|
||||
static void reset_interrupts(struct hfi1_devdata *dd)
|
||||
void reset_interrupts(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
|
||||
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* set_up_interrupts() - Initialize the IRQ resources and state
|
||||
* @dd: valid devdata
|
||||
*
|
||||
*/
|
||||
static int set_up_interrupts(struct hfi1_devdata *dd)
|
||||
{
|
||||
u32 total;
|
||||
int ret, request;
|
||||
|
||||
/*
|
||||
* Interrupt count:
|
||||
* 1 general, "slow path" interrupt (includes the SDMA engines
|
||||
* slow source, SDMACleanupDone)
|
||||
* N interrupts - one per used SDMA engine
|
||||
* M interrupt - one per kernel receive context
|
||||
* V interrupt - one for each VNIC context
|
||||
*/
|
||||
total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
|
||||
|
||||
/* ask for MSI-X interrupts */
|
||||
request = request_msix(dd, total);
|
||||
if (request < 0) {
|
||||
ret = request;
|
||||
goto fail;
|
||||
} else {
|
||||
dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
|
||||
GFP_KERNEL);
|
||||
if (!dd->msix_entries) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
/* using MSI-X */
|
||||
dd->num_msix_entries = total;
|
||||
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
|
||||
}
|
||||
int ret;
|
||||
|
||||
/* mask all interrupts */
|
||||
set_intr_state(dd, 0);
|
||||
set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
|
||||
|
||||
/* clear all pending interrupts */
|
||||
clear_all_interrupts(dd);
|
||||
|
||||
/* reset general handler mask, chip MSI-X mappings */
|
||||
reset_interrupts(dd);
|
||||
|
||||
ret = request_msix_irqs(dd);
|
||||
/* ask for MSI-X interrupts */
|
||||
ret = msix_initialize(dd);
|
||||
if (ret)
|
||||
goto fail;
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
ret = msix_request_irqs(dd);
|
||||
if (ret)
|
||||
msix_clean_up_interrupts(dd);
|
||||
|
||||
fail:
|
||||
hfi1_clean_up_interrupts(dd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -14918,20 +14704,16 @@ static int check_int_registers(struct hfi1_devdata *dd)
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate and initialize the device structure for the hfi.
|
||||
* hfi1_init_dd() - Initialize most of the dd structure.
|
||||
* @dev: the pci_dev for hfi1_ib device
|
||||
* @ent: pci_device_id struct for this dev
|
||||
*
|
||||
* Also allocates, initializes, and returns the devdata struct for this
|
||||
* device instance
|
||||
*
|
||||
* This is global, and is called directly at init to set up the
|
||||
* chip-specific function pointers for later use.
|
||||
*/
|
||||
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
|
||||
const struct pci_device_id *ent)
|
||||
int hfi1_init_dd(struct hfi1_devdata *dd)
|
||||
{
|
||||
struct hfi1_devdata *dd;
|
||||
struct pci_dev *pdev = dd->pcidev;
|
||||
struct hfi1_pportdata *ppd;
|
||||
u64 reg;
|
||||
int i, ret;
|
||||
@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
|
||||
"Functional simulator"
|
||||
};
|
||||
struct pci_dev *parent = pdev->bus->self;
|
||||
u32 sdma_engines;
|
||||
u32 sdma_engines = chip_sdma_engines(dd);
|
||||
|
||||
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
|
||||
sizeof(struct hfi1_pportdata));
|
||||
if (IS_ERR(dd))
|
||||
goto bail;
|
||||
sdma_engines = chip_sdma_engines(dd);
|
||||
ppd = dd->pport;
|
||||
for (i = 0; i < dd->num_pports; i++, ppd++) {
|
||||
int vl;
|
||||
@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
|
||||
if (ret)
|
||||
goto bail_cleanup;
|
||||
|
||||
/*
|
||||
* This should probably occur in hfi1_pcie_init(), but historically
|
||||
* occurs after the do_pcie_gen3_transition() code.
|
||||
*/
|
||||
tune_pcie_caps(dd);
|
||||
|
||||
/* start setting dd values and adjusting CSRs */
|
||||
init_early_variables(dd);
|
||||
|
||||
@ -15239,14 +15022,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
|
||||
free_cntrs(dd);
|
||||
bail_clear_intr:
|
||||
hfi1_comp_vectors_clean_up(dd);
|
||||
hfi1_clean_up_interrupts(dd);
|
||||
msix_clean_up_interrupts(dd);
|
||||
bail_cleanup:
|
||||
hfi1_pcie_ddcleanup(dd);
|
||||
bail_free:
|
||||
hfi1_free_devdata(dd);
|
||||
dd = ERR_PTR(ret);
|
||||
bail:
|
||||
return dd;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
|
||||
|
@ -52,9 +52,7 @@
|
||||
*/
|
||||
|
||||
/* sizes */
|
||||
#define CCE_NUM_MSIX_VECTORS 256
|
||||
#define CCE_NUM_INT_CSRS 12
|
||||
#define CCE_NUM_INT_MAP_CSRS 96
|
||||
#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
|
||||
#define NUM_INTERRUPT_SOURCES 768
|
||||
#define RXE_NUM_CONTEXTS 160
|
||||
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
|
||||
@ -161,34 +159,49 @@
|
||||
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
|
||||
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
|
||||
|
||||
/* interrupt source numbers */
|
||||
#define IS_GENERAL_ERR_START 0
|
||||
#define IS_SDMAENG_ERR_START 16
|
||||
#define IS_SENDCTXT_ERR_START 32
|
||||
#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */
|
||||
/* Specific IRQ sources */
|
||||
#define CCE_ERR_INT 0
|
||||
#define RXE_ERR_INT 1
|
||||
#define MISC_ERR_INT 2
|
||||
#define PIO_ERR_INT 4
|
||||
#define SDMA_ERR_INT 5
|
||||
#define EGRESS_ERR_INT 6
|
||||
#define TXE_ERR_INT 7
|
||||
#define PBC_INT 240
|
||||
#define GPIO_ASSERT_INT 241
|
||||
#define QSFP1_INT 242
|
||||
#define QSFP2_INT 243
|
||||
#define TCRIT_INT 244
|
||||
|
||||
/* interrupt source ranges */
|
||||
#define IS_FIRST_SOURCE CCE_ERR_INT
|
||||
#define IS_GENERAL_ERR_START 0
|
||||
#define IS_SDMAENG_ERR_START 16
|
||||
#define IS_SENDCTXT_ERR_START 32
|
||||
#define IS_SDMA_START 192
|
||||
#define IS_SDMA_PROGRESS_START 208
|
||||
#define IS_SDMA_IDLE_START 224
|
||||
#define IS_VARIOUS_START 240
|
||||
#define IS_DC_START 248
|
||||
#define IS_RCVAVAIL_START 256
|
||||
#define IS_RCVURGENT_START 416
|
||||
#define IS_SENDCREDIT_START 576
|
||||
#define IS_RESERVED_START 736
|
||||
#define IS_MAX_SOURCES 768
|
||||
#define IS_LAST_SOURCE 767
|
||||
|
||||
/* derived interrupt source values */
|
||||
#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START
|
||||
#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START
|
||||
#define IS_SENDCTXT_ERR_END IS_SDMA_START
|
||||
#define IS_SDMA_END IS_VARIOUS_START
|
||||
#define IS_VARIOUS_END IS_DC_START
|
||||
#define IS_DC_END IS_RCVAVAIL_START
|
||||
#define IS_RCVAVAIL_END IS_RCVURGENT_START
|
||||
#define IS_RCVURGENT_END IS_SENDCREDIT_START
|
||||
#define IS_SENDCREDIT_END IS_RESERVED_START
|
||||
#define IS_RESERVED_END IS_MAX_SOURCES
|
||||
|
||||
/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
|
||||
#define QSFP1_INT 242
|
||||
#define QSFP2_INT 243
|
||||
#define IS_GENERAL_ERR_END 7
|
||||
#define IS_SDMAENG_ERR_END 31
|
||||
#define IS_SENDCTXT_ERR_END 191
|
||||
#define IS_SDMA_END 207
|
||||
#define IS_SDMA_PROGRESS_END 223
|
||||
#define IS_SDMA_IDLE_END 239
|
||||
#define IS_VARIOUS_END 244
|
||||
#define IS_DC_END 255
|
||||
#define IS_RCVAVAIL_END 415
|
||||
#define IS_RCVURGENT_END 575
|
||||
#define IS_SENDCREDIT_END 735
|
||||
#define IS_RESERVED_END IS_LAST_SOURCE
|
||||
|
||||
/* DCC_CFG_PORT_CONFIG logical link states */
|
||||
#define LSTATE_DOWN 0x1
|
||||
@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
|
||||
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
|
||||
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
|
||||
|
||||
irqreturn_t general_interrupt(int irq, void *data);
|
||||
irqreturn_t sdma_interrupt(int irq, void *data);
|
||||
irqreturn_t receive_context_interrupt(int irq, void *data);
|
||||
irqreturn_t receive_context_thread(int irq, void *data);
|
||||
|
||||
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
|
||||
void init_qsfp_int(struct hfi1_devdata *dd);
|
||||
void clear_all_interrupts(struct hfi1_devdata *dd);
|
||||
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
|
||||
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
|
||||
void reset_interrupts(struct hfi1_devdata *dd);
|
||||
|
||||
/*
|
||||
* Interrupt source table.
|
||||
*
|
||||
|
@ -878,6 +878,10 @@
|
||||
#define SEND_CTRL (TXE + 0x000000000000)
|
||||
#define SEND_CTRL_CM_RESET_SMASK 0x4ull
|
||||
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
|
||||
#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
|
||||
#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
|
||||
#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
|
||||
<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
|
||||
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
|
||||
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
|
||||
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
|
||||
|
@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
|
||||
HFI1_RCVCTRL_TAILUPD_DIS |
|
||||
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
|
||||
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
|
||||
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
|
||||
HFI1_RCVCTRL_NO_EGR_DROP_DIS |
|
||||
HFI1_RCVCTRL_URGENT_DIS, uctxt);
|
||||
/* Clear the context's J_KEY */
|
||||
hfi1_clear_ctxt_jkey(dd, uctxt);
|
||||
/*
|
||||
@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
|
||||
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
|
||||
|
||||
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
|
||||
rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
|
||||
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
|
||||
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
|
||||
/*
|
||||
|
@ -80,6 +80,7 @@
|
||||
#include "qsfp.h"
|
||||
#include "platform.h"
|
||||
#include "affinity.h"
|
||||
#include "msix.h"
|
||||
|
||||
/* bumped 1 from s/w major version of TrueScale */
|
||||
#define HFI1_CHIP_VERS_MAJ 3U
|
||||
@ -620,6 +621,8 @@ struct rvt_sge_state;
|
||||
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
|
||||
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
|
||||
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
|
||||
#define HFI1_RCVCTRL_URGENT_ENB 0x40000
|
||||
#define HFI1_RCVCTRL_URGENT_DIS 0x80000
|
||||
|
||||
/* partition enforcement flags */
|
||||
#define HFI1_PART_ENFORCE_IN 0x1
|
||||
@ -667,6 +670,14 @@ struct hfi1_msix_entry {
|
||||
struct irq_affinity_notify notify;
|
||||
};
|
||||
|
||||
struct hfi1_msix_info {
|
||||
/* lock to synchronize in_use_msix access */
|
||||
spinlock_t msix_lock;
|
||||
DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
|
||||
struct hfi1_msix_entry *msix_entries;
|
||||
u16 max_requested;
|
||||
};
|
||||
|
||||
/* per-SL CCA information */
|
||||
struct cca_timer {
|
||||
struct hrtimer hrtimer;
|
||||
@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
|
||||
struct idr vesw_idr;
|
||||
u8 rmt_start;
|
||||
u8 num_ctxt;
|
||||
u32 msix_idx;
|
||||
};
|
||||
|
||||
struct hfi1_vnic_vport_info;
|
||||
@ -1205,11 +1215,6 @@ struct hfi1_devdata {
|
||||
|
||||
struct diag_client *diag_client;
|
||||
|
||||
/* MSI-X information */
|
||||
struct hfi1_msix_entry *msix_entries;
|
||||
u32 num_msix_entries;
|
||||
u32 first_dyn_msix_idx;
|
||||
|
||||
/* general interrupt: mask of handled interrupts */
|
||||
u64 gi_mask[CCE_NUM_INT_CSRS];
|
||||
|
||||
@ -1223,6 +1228,9 @@ struct hfi1_devdata {
|
||||
*/
|
||||
struct timer_list synth_stats_timer;
|
||||
|
||||
/* MSI-X information */
|
||||
struct hfi1_msix_info msix_info;
|
||||
|
||||
/*
|
||||
* device counters
|
||||
*/
|
||||
@ -1349,6 +1357,8 @@ struct hfi1_devdata {
|
||||
|
||||
/* vnic data */
|
||||
struct hfi1_vnic_data vnic;
|
||||
/* Lock to protect IRQ SRC register access */
|
||||
spinlock_t irq_src_lock;
|
||||
};
|
||||
|
||||
static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
|
||||
@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
|
||||
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
|
||||
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
|
||||
void set_all_slowpath(struct hfi1_devdata *dd);
|
||||
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
|
||||
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
|
||||
void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
|
||||
|
||||
extern const struct pci_device_id hfi1_pci_tbl[];
|
||||
void hfi1_make_ud_req_9B(struct rvt_qp *qp,
|
||||
@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
|
||||
#define HFI1_CTXT_WAITING_URG 4
|
||||
|
||||
/* free up any allocated data at closes */
|
||||
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
|
||||
const struct pci_device_id *ent);
|
||||
int hfi1_init_dd(struct hfi1_devdata *dd);
|
||||
void hfi1_free_devdata(struct hfi1_devdata *dd);
|
||||
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
|
||||
|
||||
/* LED beaconing functions */
|
||||
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
|
||||
@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
|
||||
*/
|
||||
|
||||
extern const char ib_hfi1_version[];
|
||||
extern const struct attribute_group ib_hfi1_attr_group;
|
||||
|
||||
int hfi1_device_create(struct hfi1_devdata *dd);
|
||||
void hfi1_device_remove(struct hfi1_devdata *dd);
|
||||
@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
|
||||
/* Hook for sysfs read of QSFP */
|
||||
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
|
||||
|
||||
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
|
||||
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
|
||||
int hfi1_pcie_init(struct hfi1_devdata *dd);
|
||||
void hfi1_pcie_cleanup(struct pci_dev *pdev);
|
||||
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
|
||||
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
|
||||
int pcie_speeds(struct hfi1_devdata *dd);
|
||||
int request_msix(struct hfi1_devdata *dd, u32 msireq);
|
||||
int restore_pci_variables(struct hfi1_devdata *dd);
|
||||
int save_pci_variables(struct hfi1_devdata *dd);
|
||||
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
|
||||
void tune_pcie_caps(struct hfi1_devdata *dd);
|
||||
int parse_platform_config(struct hfi1_devdata *dd);
|
||||
int get_platform_config_field(struct hfi1_devdata *dd,
|
||||
enum platform_config_table_type_encoding
|
||||
@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
|
||||
return base_sdma_integrity;
|
||||
}
|
||||
|
||||
/*
|
||||
* hfi1_early_err is used (only!) to print early errors before devdata is
|
||||
* allocated, or when dd->pcidev may not be valid, and at the tail end of
|
||||
* cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
|
||||
* the same as dd_dev_err, but is used when the message really needs
|
||||
* the IB port# to be definitive as to what's happening..
|
||||
*/
|
||||
#define hfi1_early_err(dev, fmt, ...) \
|
||||
dev_err(dev, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define hfi1_early_info(dev, fmt, ...) \
|
||||
dev_info(dev, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define dd_dev_emerg(dd, fmt, ...) \
|
||||
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
|
||||
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
|
||||
|
@ -83,6 +83,8 @@
|
||||
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
|
||||
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
|
||||
|
||||
#define NUM_IB_PORTS 1
|
||||
|
||||
/*
|
||||
* Number of user receive contexts we are configured to use (to allow for more
|
||||
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
|
||||
@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
|
||||
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
|
||||
|
||||
if (loopback) {
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"Faking data partition 0x8001 in idx %u\n",
|
||||
!default_pkey_idx);
|
||||
dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
|
||||
!default_pkey_idx);
|
||||
ppd->pkeys[!default_pkey_idx] = 0x8001;
|
||||
}
|
||||
|
||||
@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
|
||||
return;
|
||||
|
||||
bail:
|
||||
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"Congestion Control Agent disabled for port %d\n", port);
|
||||
dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -832,6 +831,23 @@ static int create_workqueues(struct hfi1_devdata *dd)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* enable_general_intr() - Enable the IRQs that will be handled by the
|
||||
* general interrupt handler.
|
||||
* @dd: valid devdata
|
||||
*
|
||||
*/
|
||||
static void enable_general_intr(struct hfi1_devdata *dd)
|
||||
{
|
||||
set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
|
||||
set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
|
||||
set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
|
||||
set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
|
||||
set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
|
||||
set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
|
||||
set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* hfi1_init - do the actual initialization sequence on the chip
|
||||
* @dd: the hfi1_ib device
|
||||
@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
|
||||
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
|
||||
ret = lastfail;
|
||||
}
|
||||
/* enable IRQ */
|
||||
hfi1_rcd_put(rcd);
|
||||
}
|
||||
|
||||
@ -954,7 +971,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
|
||||
HFI1_STATUS_INITTED;
|
||||
if (!ret) {
|
||||
/* enable all interrupts from the chip */
|
||||
set_intr_state(dd, 1);
|
||||
enable_general_intr(dd);
|
||||
init_qsfp_int(dd);
|
||||
|
||||
/* chip is OK for user apps; mark it as initialized */
|
||||
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
|
||||
@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
|
||||
}
|
||||
dd->flags &= ~HFI1_INITTED;
|
||||
|
||||
/* mask and clean up interrupts, but not errors */
|
||||
set_intr_state(dd, 0);
|
||||
hfi1_clean_up_interrupts(dd);
|
||||
/* mask and clean up interrupts */
|
||||
set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
|
||||
msix_clean_up_interrupts(dd);
|
||||
|
||||
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
|
||||
ppd = dd->pport + pidx;
|
||||
@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
|
||||
kobject_put(&dd->kobj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate our primary per-unit data structure. Must be done via verbs
|
||||
* allocator, because the verbs cleanup process both does cleanup and
|
||||
* free of the data structure.
|
||||
/**
|
||||
* hfi1_alloc_devdata - Allocate our primary per-unit data structure.
|
||||
* @pdev: Valid PCI device
|
||||
* @extra: How many bytes to alloc past the default
|
||||
*
|
||||
* Must be done via verbs allocator, because the verbs cleanup process
|
||||
* both does cleanup and free of the data structure.
|
||||
* "extra" is for chip-specific data.
|
||||
*
|
||||
* Use the idr mechanism to get a unit number for this unit.
|
||||
*/
|
||||
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
|
||||
static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
|
||||
size_t extra)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hfi1_devdata *dd;
|
||||
@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
|
||||
idr_preload_end();
|
||||
|
||||
if (ret < 0) {
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"Could not allocate unit ID: error %d\n", -ret);
|
||||
dev_err(&pdev->dev,
|
||||
"Could not allocate unit ID: error %d\n", -ret);
|
||||
goto bail;
|
||||
}
|
||||
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
|
||||
@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
|
||||
spin_lock_init(&dd->pio_map_lock);
|
||||
mutex_init(&dd->dc8051_lock);
|
||||
init_waitqueue_head(&dd->event_queue);
|
||||
spin_lock_init(&dd->irq_src_lock);
|
||||
|
||||
dd->int_counter = alloc_percpu(u64);
|
||||
if (!dd->int_counter) {
|
||||
@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
|
||||
idr_init(&hfi1_unit_table);
|
||||
|
||||
hfi1_dbg_init();
|
||||
ret = hfi1_wss_init();
|
||||
if (ret < 0)
|
||||
goto bail_wss;
|
||||
ret = pci_register_driver(&hfi1_pci_driver);
|
||||
if (ret < 0) {
|
||||
pr_err("Unable to register driver: error %d\n", -ret);
|
||||
@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
|
||||
goto bail; /* all OK */
|
||||
|
||||
bail_dev:
|
||||
hfi1_wss_exit();
|
||||
bail_wss:
|
||||
hfi1_dbg_exit();
|
||||
idr_destroy(&hfi1_unit_table);
|
||||
dev_cleanup();
|
||||
@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
|
||||
{
|
||||
pci_unregister_driver(&hfi1_pci_driver);
|
||||
node_affinity_destroy_all();
|
||||
hfi1_wss_exit();
|
||||
hfi1_dbg_exit();
|
||||
|
||||
idr_destroy(&hfi1_unit_table);
|
||||
@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
|
||||
hfi1_free_devdata(dd);
|
||||
}
|
||||
|
||||
static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
|
||||
static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
|
||||
{
|
||||
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
|
||||
hfi1_early_err(dev, "Receive header queue count too small\n");
|
||||
dd_dev_err(dd, "Receive header queue count too small\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
|
||||
hfi1_early_err(dev,
|
||||
"Receive header queue count cannot be greater than %u\n",
|
||||
HFI1_MAX_HDRQ_EGRBUF_CNT);
|
||||
dd_dev_err(dd,
|
||||
"Receive header queue count cannot be greater than %u\n",
|
||||
HFI1_MAX_HDRQ_EGRBUF_CNT);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (thecnt % HDRQ_INCREMENT) {
|
||||
hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
|
||||
thecnt, HDRQ_INCREMENT);
|
||||
dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
|
||||
thecnt, HDRQ_INCREMENT);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
/* Validate dev ids */
|
||||
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
|
||||
ent->device == PCI_DEVICE_ID_INTEL1)) {
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"Failing on unknown Intel deviceid 0x%x\n",
|
||||
ent->device);
|
||||
dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
|
||||
ent->device);
|
||||
ret = -ENODEV;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Allocate the dd so we can get to work */
|
||||
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
|
||||
sizeof(struct hfi1_pportdata));
|
||||
if (IS_ERR(dd)) {
|
||||
ret = PTR_ERR(dd);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Validate some global module parameters */
|
||||
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
|
||||
ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
|
||||
if (ret)
|
||||
goto bail;
|
||||
|
||||
/* use the encoding function as a sanitization check */
|
||||
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
|
||||
hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
|
||||
hfi1_hdrq_entsize);
|
||||
dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
|
||||
hfi1_hdrq_entsize);
|
||||
ret = -EINVAL;
|
||||
goto bail;
|
||||
}
|
||||
@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
clamp_val(eager_buffer_size,
|
||||
MIN_EAGER_BUFFER * 8,
|
||||
MAX_EAGER_BUFFER_TOTAL);
|
||||
hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
|
||||
eager_buffer_size);
|
||||
dd_dev_info(dd, "Eager buffer size %u\n",
|
||||
eager_buffer_size);
|
||||
} else {
|
||||
hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
|
||||
dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
|
||||
ret = -EINVAL;
|
||||
goto bail;
|
||||
}
|
||||
@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
/* restrict value of hfi1_rcvarr_split */
|
||||
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
|
||||
|
||||
ret = hfi1_pcie_init(pdev, ent);
|
||||
ret = hfi1_pcie_init(dd);
|
||||
if (ret)
|
||||
goto bail;
|
||||
|
||||
@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
* Do device-specific initialization, function table setup, dd
|
||||
* allocation, etc.
|
||||
*/
|
||||
dd = hfi1_init_dd(pdev, ent);
|
||||
|
||||
if (IS_ERR(dd)) {
|
||||
ret = PTR_ERR(dd);
|
||||
ret = hfi1_init_dd(dd);
|
||||
if (ret)
|
||||
goto clean_bail; /* error already printed */
|
||||
}
|
||||
|
||||
ret = create_workqueues(dd);
|
||||
if (ret)
|
||||
@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
|
||||
|
||||
if (initfail || ret) {
|
||||
hfi1_clean_up_interrupts(dd);
|
||||
msix_clean_up_interrupts(dd);
|
||||
stop_timers(dd);
|
||||
flush_workqueue(ib_wq);
|
||||
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
|
||||
|
94
drivers/infiniband/hw/hfi1/iowait.c
Normal file
94
drivers/infiniband/hw/hfi1/iowait.c
Normal file
@ -0,0 +1,94 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
*/
|
||||
#include "iowait.h"
|
||||
#include "trace_iowait.h"
|
||||
|
||||
void iowait_set_flag(struct iowait *wait, u32 flag)
|
||||
{
|
||||
trace_hfi1_iowait_set(wait, flag);
|
||||
set_bit(flag, &wait->flags);
|
||||
}
|
||||
|
||||
bool iowait_flag_set(struct iowait *wait, u32 flag)
|
||||
{
|
||||
return test_bit(flag, &wait->flags);
|
||||
}
|
||||
|
||||
inline void iowait_clear_flag(struct iowait *wait, u32 flag)
|
||||
{
|
||||
trace_hfi1_iowait_clear(wait, flag);
|
||||
clear_bit(flag, &wait->flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_init() - initialize wait structure
|
||||
* @wait: wait struct to initialize
|
||||
* @tx_limit: limit for overflow queuing
|
||||
* @func: restart function for workqueue
|
||||
* @sleep: sleep function for no space
|
||||
* @resume: wakeup function for no space
|
||||
*
|
||||
* This function initializes the iowait
|
||||
* structure embedded in the QP or PQ.
|
||||
*
|
||||
*/
|
||||
void iowait_init(struct iowait *wait, u32 tx_limit,
|
||||
void (*func)(struct work_struct *work),
|
||||
void (*tidfunc)(struct work_struct *work),
|
||||
int (*sleep)(struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
uint seq,
|
||||
bool pkts_sent),
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait))
|
||||
{
|
||||
int i;
|
||||
|
||||
wait->count = 0;
|
||||
INIT_LIST_HEAD(&wait->list);
|
||||
init_waitqueue_head(&wait->wait_dma);
|
||||
init_waitqueue_head(&wait->wait_pio);
|
||||
atomic_set(&wait->sdma_busy, 0);
|
||||
atomic_set(&wait->pio_busy, 0);
|
||||
wait->tx_limit = tx_limit;
|
||||
wait->sleep = sleep;
|
||||
wait->wakeup = wakeup;
|
||||
wait->sdma_drained = sdma_drained;
|
||||
wait->flags = 0;
|
||||
for (i = 0; i < IOWAIT_SES; i++) {
|
||||
wait->wait[i].iow = wait;
|
||||
INIT_LIST_HEAD(&wait->wait[i].tx_head);
|
||||
if (i == IOWAIT_IB_SE)
|
||||
INIT_WORK(&wait->wait[i].iowork, func);
|
||||
else
|
||||
INIT_WORK(&wait->wait[i].iowork, tidfunc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_cancel_work - cancel all work in iowait
|
||||
* @w: the iowait struct
|
||||
*/
|
||||
void iowait_cancel_work(struct iowait *w)
|
||||
{
|
||||
cancel_work_sync(&iowait_get_ib_work(w)->iowork);
|
||||
cancel_work_sync(&iowait_get_tid_work(w)->iowork);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_set_work_flag - set work flag based on leg
|
||||
* @w - the iowait work struct
|
||||
*/
|
||||
int iowait_set_work_flag(struct iowait_work *w)
|
||||
{
|
||||
if (w == &w->iow->wait[IOWAIT_IB_SE]) {
|
||||
iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
|
||||
return IOWAIT_IB_SE;
|
||||
}
|
||||
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
|
||||
return IOWAIT_TID_SE;
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
#ifndef _HFI1_IOWAIT_H
|
||||
#define _HFI1_IOWAIT_H
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -49,6 +49,7 @@
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include "sdma_txreq.h"
|
||||
@ -59,16 +60,47 @@
|
||||
*/
|
||||
typedef void (*restart_t)(struct work_struct *work);
|
||||
|
||||
#define IOWAIT_PENDING_IB 0x0
|
||||
#define IOWAIT_PENDING_TID 0x1
|
||||
|
||||
/*
|
||||
* A QP can have multiple Send Engines (SEs).
|
||||
*
|
||||
* The current use case is for supporting a TID RDMA
|
||||
* packet build/xmit mechanism independent from verbs.
|
||||
*/
|
||||
#define IOWAIT_SES 2
|
||||
#define IOWAIT_IB_SE 0
|
||||
#define IOWAIT_TID_SE 1
|
||||
|
||||
struct sdma_txreq;
|
||||
struct sdma_engine;
|
||||
/**
|
||||
* struct iowait - linkage for delayed progress/waiting
|
||||
* @iowork: the work struct
|
||||
* @tx_head: list of prebuilt packets
|
||||
* @iow: the parent iowait structure
|
||||
*
|
||||
* This structure is the work item (process) specific
|
||||
* details associated with the each of the two SEs of the
|
||||
* QP.
|
||||
*
|
||||
* The workstruct and the queued TXs are unique to each
|
||||
* SE.
|
||||
*/
|
||||
struct iowait;
|
||||
struct iowait_work {
|
||||
struct work_struct iowork;
|
||||
struct list_head tx_head;
|
||||
struct iowait *iow;
|
||||
};
|
||||
|
||||
/**
|
||||
* @list: used to add/insert into QP/PQ wait lists
|
||||
* @lock: uses to record the list head lock
|
||||
* @tx_head: overflow list of sdma_txreq's
|
||||
* @sleep: no space callback
|
||||
* @wakeup: space callback wakeup
|
||||
* @sdma_drained: sdma count drained
|
||||
* @lock: lock protected head of wait queue
|
||||
* @iowork: workqueue overhead
|
||||
* @wait_dma: wait for sdma_busy == 0
|
||||
* @wait_pio: wait for pio_busy == 0
|
||||
@ -76,6 +108,8 @@ struct sdma_engine;
|
||||
* @count: total number of descriptors in tx_head'ed list
|
||||
* @tx_limit: limit for overflow queuing
|
||||
* @tx_count: number of tx entry's in tx_head'ed list
|
||||
* @flags: wait flags (one per QP)
|
||||
* @wait: SE array
|
||||
*
|
||||
* This is to be embedded in user's state structure
|
||||
* (QP or PQ).
|
||||
@ -98,13 +132,11 @@ struct sdma_engine;
|
||||
* Waiters explicity know that, but the destroy
|
||||
* code that unwaits QPs does not.
|
||||
*/
|
||||
|
||||
struct iowait {
|
||||
struct list_head list;
|
||||
struct list_head tx_head;
|
||||
int (*sleep)(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
uint seq,
|
||||
bool pkts_sent
|
||||
@ -112,7 +144,6 @@ struct iowait {
|
||||
void (*wakeup)(struct iowait *wait, int reason);
|
||||
void (*sdma_drained)(struct iowait *wait);
|
||||
seqlock_t *lock;
|
||||
struct work_struct iowork;
|
||||
wait_queue_head_t wait_dma;
|
||||
wait_queue_head_t wait_pio;
|
||||
atomic_t sdma_busy;
|
||||
@ -121,63 +152,37 @@ struct iowait {
|
||||
u32 tx_limit;
|
||||
u32 tx_count;
|
||||
u8 starved_cnt;
|
||||
unsigned long flags;
|
||||
struct iowait_work wait[IOWAIT_SES];
|
||||
};
|
||||
|
||||
#define SDMA_AVAIL_REASON 0
|
||||
|
||||
/**
|
||||
* iowait_init() - initialize wait structure
|
||||
* @wait: wait struct to initialize
|
||||
* @tx_limit: limit for overflow queuing
|
||||
* @func: restart function for workqueue
|
||||
* @sleep: sleep function for no space
|
||||
* @resume: wakeup function for no space
|
||||
*
|
||||
* This function initializes the iowait
|
||||
* structure embedded in the QP or PQ.
|
||||
*
|
||||
*/
|
||||
void iowait_set_flag(struct iowait *wait, u32 flag);
|
||||
bool iowait_flag_set(struct iowait *wait, u32 flag);
|
||||
void iowait_clear_flag(struct iowait *wait, u32 flag);
|
||||
|
||||
static inline void iowait_init(
|
||||
struct iowait *wait,
|
||||
u32 tx_limit,
|
||||
void (*func)(struct work_struct *work),
|
||||
int (*sleep)(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct sdma_txreq *tx,
|
||||
uint seq,
|
||||
bool pkts_sent),
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait))
|
||||
{
|
||||
wait->count = 0;
|
||||
wait->lock = NULL;
|
||||
INIT_LIST_HEAD(&wait->list);
|
||||
INIT_LIST_HEAD(&wait->tx_head);
|
||||
INIT_WORK(&wait->iowork, func);
|
||||
init_waitqueue_head(&wait->wait_dma);
|
||||
init_waitqueue_head(&wait->wait_pio);
|
||||
atomic_set(&wait->sdma_busy, 0);
|
||||
atomic_set(&wait->pio_busy, 0);
|
||||
wait->tx_limit = tx_limit;
|
||||
wait->sleep = sleep;
|
||||
wait->wakeup = wakeup;
|
||||
wait->sdma_drained = sdma_drained;
|
||||
}
|
||||
void iowait_init(struct iowait *wait, u32 tx_limit,
|
||||
void (*func)(struct work_struct *work),
|
||||
void (*tidfunc)(struct work_struct *work),
|
||||
int (*sleep)(struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
uint seq,
|
||||
bool pkts_sent),
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait));
|
||||
|
||||
/**
|
||||
* iowait_schedule() - initialize wait structure
|
||||
* iowait_schedule() - schedule the default send engine work
|
||||
* @wait: wait struct to schedule
|
||||
* @wq: workqueue for schedule
|
||||
* @cpu: cpu
|
||||
*/
|
||||
static inline void iowait_schedule(
|
||||
struct iowait *wait,
|
||||
struct workqueue_struct *wq,
|
||||
int cpu)
|
||||
static inline bool iowait_schedule(struct iowait *wait,
|
||||
struct workqueue_struct *wq, int cpu)
|
||||
{
|
||||
queue_work_on(cpu, wq, &wait->iowork);
|
||||
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
|
||||
*/
|
||||
static inline int iowait_sdma_dec(struct iowait *wait)
|
||||
{
|
||||
if (!wait)
|
||||
return 0;
|
||||
return atomic_dec_and_test(&wait->sdma_busy);
|
||||
}
|
||||
|
||||
@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_sdma_dec - note pio complete
|
||||
* iowait_pio_dec - note pio complete
|
||||
* @wait: iowait structure
|
||||
*/
|
||||
static inline int iowait_pio_dec(struct iowait *wait)
|
||||
{
|
||||
if (!wait)
|
||||
return 0;
|
||||
return atomic_dec_and_test(&wait->pio_busy);
|
||||
}
|
||||
|
||||
@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
|
||||
/**
|
||||
* iowait_get_txhead() - get packet off of iowait list
|
||||
*
|
||||
* @wait wait struture
|
||||
* @wait iowait_work struture
|
||||
*/
|
||||
static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
|
||||
static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
|
||||
{
|
||||
struct sdma_txreq *tx = NULL;
|
||||
|
||||
@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
|
||||
return tx;
|
||||
}
|
||||
|
||||
static inline u16 iowait_get_desc(struct iowait_work *w)
|
||||
{
|
||||
u16 num_desc = 0;
|
||||
struct sdma_txreq *tx = NULL;
|
||||
|
||||
if (!list_empty(&w->tx_head)) {
|
||||
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
|
||||
list);
|
||||
num_desc = tx->num_desc;
|
||||
}
|
||||
return num_desc;
|
||||
}
|
||||
|
||||
static inline u32 iowait_get_all_desc(struct iowait *w)
|
||||
{
|
||||
u32 num_desc = 0;
|
||||
|
||||
num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
|
||||
num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
|
||||
return num_desc;
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_queue - Put the iowait on a wait queue
|
||||
* @pkts_sent: have some packets been sent before queuing?
|
||||
@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_packet_queued() - determine if a packet is already built
|
||||
* @wait: the wait structure
|
||||
* iowait_packet_queued() - determine if a packet is queued
|
||||
* @wait: the iowait_work structure
|
||||
*/
|
||||
static inline bool iowait_packet_queued(struct iowait *wait)
|
||||
static inline bool iowait_packet_queued(struct iowait_work *wait)
|
||||
{
|
||||
return !list_empty(&wait->tx_head);
|
||||
}
|
||||
|
||||
/**
|
||||
* inc_wait_count - increment wait counts
|
||||
* @w: the log work struct
|
||||
* @n: the count
|
||||
*/
|
||||
static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
|
||||
{
|
||||
if (!w)
|
||||
return;
|
||||
w->iow->tx_count++;
|
||||
w->iow->count += n;
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_get_tid_work - return iowait_work for tid SE
|
||||
* @w: the iowait struct
|
||||
*/
|
||||
static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
|
||||
{
|
||||
return &w->wait[IOWAIT_TID_SE];
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_get_ib_work - return iowait_work for ib SE
|
||||
* @w: the iowait struct
|
||||
*/
|
||||
static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
|
||||
{
|
||||
return &w->wait[IOWAIT_IB_SE];
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_ioww_to_iow - return iowait given iowait_work
|
||||
* @w: the iowait_work struct
|
||||
*/
|
||||
static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
|
||||
{
|
||||
if (likely(w))
|
||||
return w->iow;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void iowait_cancel_work(struct iowait *w);
|
||||
int iowait_set_work_flag(struct iowait_work *w);
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2015-2017 Intel Corporation.
|
||||
* Copyright(c) 2015-2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
|
||||
int ret;
|
||||
int pkey_idx;
|
||||
int local_mad = 0;
|
||||
u32 resp_len = 0;
|
||||
u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
|
||||
struct hfi1_ibport *ibp = to_iport(ibdev, port);
|
||||
|
||||
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
|
||||
|
363
drivers/infiniband/hw/hfi1/msix.c
Normal file
363
drivers/infiniband/hw/hfi1/msix.c
Normal file
@ -0,0 +1,363 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "hfi.h"
|
||||
#include "affinity.h"
|
||||
#include "sdma.h"
|
||||
|
||||
/**
|
||||
* msix_initialize() - Calculate, request and configure MSIx IRQs
|
||||
* @dd: valid hfi1 devdata
|
||||
*
|
||||
*/
|
||||
int msix_initialize(struct hfi1_devdata *dd)
|
||||
{
|
||||
u32 total;
|
||||
int ret;
|
||||
struct hfi1_msix_entry *entries;
|
||||
|
||||
/*
|
||||
* MSIx interrupt count:
|
||||
* one for the general, "slow path" interrupt
|
||||
* one per used SDMA engine
|
||||
* one per kernel receive context
|
||||
* one for each VNIC context
|
||||
* ...any new IRQs should be added here.
|
||||
*/
|
||||
total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
|
||||
|
||||
if (total >= CCE_NUM_MSIX_VECTORS)
|
||||
return -EINVAL;
|
||||
|
||||
ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
|
||||
if (ret < 0) {
|
||||
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
|
||||
GFP_KERNEL);
|
||||
if (!entries) {
|
||||
pci_free_irq_vectors(dd->pcidev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dd->msix_info.msix_entries = entries;
|
||||
spin_lock_init(&dd->msix_info.msix_lock);
|
||||
bitmap_zero(dd->msix_info.in_use_msix, total);
|
||||
dd->msix_info.max_requested = total;
|
||||
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* msix_request_irq() - Allocate a free MSIx IRQ
|
||||
* @dd: valid devdata
|
||||
* @arg: context information for the IRQ
|
||||
* @handler: IRQ handler
|
||||
* @thread: IRQ thread handler (could be NULL)
|
||||
* @idx: zero base idx if multiple devices are needed
|
||||
* @type: affinty IRQ type
|
||||
*
|
||||
* Allocated an MSIx vector if available, and then create the appropriate
|
||||
* meta data needed to keep track of the pci IRQ request.
|
||||
*
|
||||
* Return:
|
||||
* < 0 Error
|
||||
* >= 0 MSIx vector
|
||||
*
|
||||
*/
|
||||
static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
|
||||
irq_handler_t handler, irq_handler_t thread,
|
||||
u32 idx, enum irq_type type)
|
||||
{
|
||||
unsigned long nr;
|
||||
int irq;
|
||||
int ret;
|
||||
const char *err_info;
|
||||
char name[MAX_NAME_SIZE];
|
||||
struct hfi1_msix_entry *me;
|
||||
|
||||
/* Allocate an MSIx vector */
|
||||
spin_lock(&dd->msix_info.msix_lock);
|
||||
nr = find_first_zero_bit(dd->msix_info.in_use_msix,
|
||||
dd->msix_info.max_requested);
|
||||
if (nr < dd->msix_info.max_requested)
|
||||
__set_bit(nr, dd->msix_info.in_use_msix);
|
||||
spin_unlock(&dd->msix_info.msix_lock);
|
||||
|
||||
if (nr == dd->msix_info.max_requested)
|
||||
return -ENOSPC;
|
||||
|
||||
/* Specific verification and determine the name */
|
||||
switch (type) {
|
||||
case IRQ_GENERAL:
|
||||
/* general interrupt must be MSIx vector 0 */
|
||||
if (nr) {
|
||||
spin_lock(&dd->msix_info.msix_lock);
|
||||
__clear_bit(nr, dd->msix_info.in_use_msix);
|
||||
spin_unlock(&dd->msix_info.msix_lock);
|
||||
dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
|
||||
nr);
|
||||
return -EINVAL;
|
||||
}
|
||||
snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
|
||||
err_info = "general";
|
||||
break;
|
||||
case IRQ_SDMA:
|
||||
snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
|
||||
dd->unit, idx);
|
||||
err_info = "sdma";
|
||||
break;
|
||||
case IRQ_RCVCTXT:
|
||||
snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
|
||||
dd->unit, idx);
|
||||
err_info = "receive context";
|
||||
break;
|
||||
case IRQ_OTHER:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
name[sizeof(name) - 1] = 0;
|
||||
|
||||
irq = pci_irq_vector(dd->pcidev, nr);
|
||||
ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
|
||||
if (ret) {
|
||||
dd_dev_err(dd,
|
||||
"%s: request for IRQ %d failed, MSIx %d, err %d\n",
|
||||
err_info, irq, idx, ret);
|
||||
spin_lock(&dd->msix_info.msix_lock);
|
||||
__clear_bit(nr, dd->msix_info.in_use_msix);
|
||||
spin_unlock(&dd->msix_info.msix_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* assign arg after pci_request_irq call, so it will be
|
||||
* cleaned up
|
||||
*/
|
||||
me = &dd->msix_info.msix_entries[nr];
|
||||
me->irq = irq;
|
||||
me->arg = arg;
|
||||
me->type = type;
|
||||
|
||||
/* This is a request, so a failure is not fatal */
|
||||
ret = hfi1_get_irq_affinity(dd, me);
|
||||
if (ret)
|
||||
dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
/**
|
||||
* msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
|
||||
* @rcd: valid rcd context
|
||||
*
|
||||
*/
|
||||
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
int nr;
|
||||
|
||||
nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
|
||||
receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
|
||||
if (nr < 0)
|
||||
return nr;
|
||||
|
||||
/*
|
||||
* Set the interrupt register and mask for this
|
||||
* context's interrupt.
|
||||
*/
|
||||
rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
|
||||
rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
|
||||
rcd->msix_intr = nr;
|
||||
remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* msix_request_smda_ira() - Helper for getting SDMA IRQ resources
|
||||
* @sde: valid sdma engine
|
||||
*
|
||||
*/
|
||||
int msix_request_sdma_irq(struct sdma_engine *sde)
|
||||
{
|
||||
int nr;
|
||||
|
||||
nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
|
||||
sde->this_idx, IRQ_SDMA);
|
||||
if (nr < 0)
|
||||
return nr;
|
||||
sde->msix_intr = nr;
|
||||
remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* enable_sdma_src() - Helper to enable SDMA IRQ srcs
|
||||
* @dd: valid devdata structure
|
||||
* @i: index of SDMA engine
|
||||
*/
|
||||
static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
|
||||
{
|
||||
set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
|
||||
set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
|
||||
IS_SDMA_PROGRESS_START + i, true);
|
||||
set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
|
||||
set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
|
||||
true);
|
||||
}
|
||||
|
||||
/**
|
||||
* msix_request_irqs() - Allocate all MSIx IRQs
|
||||
* @dd: valid devdata structure
|
||||
*
|
||||
* Helper function to request the used MSIx IRQs.
|
||||
*
|
||||
*/
|
||||
int msix_request_irqs(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < dd->num_sdma; i++) {
|
||||
struct sdma_engine *sde = &dd->per_sdma[i];
|
||||
|
||||
ret = msix_request_sdma_irq(sde);
|
||||
if (ret)
|
||||
return ret;
|
||||
enable_sdma_srcs(sde->dd, i);
|
||||
}
|
||||
|
||||
for (i = 0; i < dd->n_krcv_queues; i++) {
|
||||
struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
|
||||
|
||||
if (rcd)
|
||||
ret = msix_request_rcd_irq(rcd);
|
||||
hfi1_rcd_put(rcd);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* msix_free_irq() - Free the specified MSIx resources and IRQ
|
||||
* @dd: valid devdata
|
||||
* @msix_intr: MSIx vector to free.
|
||||
*
|
||||
*/
|
||||
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
|
||||
{
|
||||
struct hfi1_msix_entry *me;
|
||||
|
||||
if (msix_intr >= dd->msix_info.max_requested)
|
||||
return;
|
||||
|
||||
me = &dd->msix_info.msix_entries[msix_intr];
|
||||
|
||||
if (!me->arg) /* => no irq, no affinity */
|
||||
return;
|
||||
|
||||
hfi1_put_irq_affinity(dd, me);
|
||||
pci_free_irq(dd->pcidev, msix_intr, me->arg);
|
||||
|
||||
me->arg = NULL;
|
||||
|
||||
spin_lock(&dd->msix_info.msix_lock);
|
||||
__clear_bit(msix_intr, dd->msix_info.in_use_msix);
|
||||
spin_unlock(&dd->msix_info.msix_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
|
||||
* @dd: valid device data data structure
|
||||
*
|
||||
* Free the MSIx and associated PCI resources, if they have been allocated.
|
||||
*/
|
||||
void msix_clean_up_interrupts(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
|
||||
|
||||
/* remove irqs - must happen before disabling/turning off */
|
||||
for (i = 0; i < dd->msix_info.max_requested; i++, me++)
|
||||
msix_free_irq(dd, i);
|
||||
|
||||
/* clean structures */
|
||||
kfree(dd->msix_info.msix_entries);
|
||||
dd->msix_info.msix_entries = NULL;
|
||||
dd->msix_info.max_requested = 0;
|
||||
|
||||
pci_free_irq_vectors(dd->pcidev);
|
||||
}
|
||||
|
||||
/**
|
||||
* msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
|
||||
* @dd: valid devdata
|
||||
*/
|
||||
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dd->vnic.num_ctxt; i++) {
|
||||
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
|
||||
struct hfi1_msix_entry *me;
|
||||
|
||||
me = &dd->msix_info.msix_entries[rcd->msix_intr];
|
||||
|
||||
synchronize_irq(me->irq);
|
||||
}
|
||||
}
|
64
drivers/infiniband/hw/hfi1/msix.h
Normal file
64
drivers/infiniband/hw/hfi1/msix.h
Normal file
@ -0,0 +1,64 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
#ifndef _HFI1_MSIX_H
|
||||
#define _HFI1_MSIX_H
|
||||
|
||||
#include "hfi.h"
|
||||
|
||||
/* MSIx interface */
|
||||
int msix_initialize(struct hfi1_devdata *dd);
|
||||
int msix_request_irqs(struct hfi1_devdata *dd);
|
||||
void msix_clean_up_interrupts(struct hfi1_devdata *dd);
|
||||
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
|
||||
int msix_request_sdma_irq(struct sdma_engine *sde);
|
||||
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
|
||||
|
||||
/* VNIC interface */
|
||||
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2015 - 2017 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -60,20 +60,13 @@
|
||||
* This file contains PCIe utility routines.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Code to adjust PCIe capabilities.
|
||||
*/
|
||||
static void tune_pcie_caps(struct hfi1_devdata *);
|
||||
|
||||
/*
|
||||
* Do all the common PCIe setup and initialization.
|
||||
* devdata is not yet allocated, and is not allocated until after this
|
||||
* routine returns success. Therefore dd_dev_err() can't be used for error
|
||||
* printing.
|
||||
*/
|
||||
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
int hfi1_pcie_init(struct hfi1_devdata *dd)
|
||||
{
|
||||
int ret;
|
||||
struct pci_dev *pdev = dd->pcidev;
|
||||
|
||||
ret = pci_enable_device(pdev);
|
||||
if (ret) {
|
||||
@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
* about that, it appears. If the original BAR was retained
|
||||
* in the kernel data structures, this may be OK.
|
||||
*/
|
||||
hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
|
||||
-ret);
|
||||
goto done;
|
||||
dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pci_request_regions(pdev, DRIVER_NAME);
|
||||
if (ret) {
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"pci_request_regions fails: err %d\n", -ret);
|
||||
dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
*/
|
||||
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
if (ret) {
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"Unable to set DMA mask: %d\n", ret);
|
||||
dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
|
||||
goto bail;
|
||||
}
|
||||
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
}
|
||||
if (ret) {
|
||||
hfi1_early_err(&pdev->dev,
|
||||
"Unable to set DMA consistent mask: %d\n", ret);
|
||||
dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
pci_set_master(pdev);
|
||||
(void)pci_enable_pcie_error_reporting(pdev);
|
||||
goto done;
|
||||
return 0;
|
||||
|
||||
bail:
|
||||
hfi1_pcie_cleanup(pdev);
|
||||
done:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
|
||||
dd_dev_err(dd, "WC mapping of send buffers failed\n");
|
||||
goto nomem;
|
||||
}
|
||||
dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
|
||||
dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
|
||||
|
||||
dd->physaddr = addr; /* used for io_remap, etc. */
|
||||
|
||||
@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* - actual number of interrupts allocated or
|
||||
* - error
|
||||
*/
|
||||
int request_msix(struct hfi1_devdata *dd, u32 msireq)
|
||||
{
|
||||
int nvec;
|
||||
|
||||
nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
|
||||
if (nvec < 0) {
|
||||
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
|
||||
return nvec;
|
||||
}
|
||||
|
||||
tune_pcie_caps(dd);
|
||||
|
||||
return nvec;
|
||||
}
|
||||
|
||||
/* restore command and BARs after a reset has wiped them out */
|
||||
int restore_pci_variables(struct hfi1_devdata *dd)
|
||||
{
|
||||
@ -479,14 +447,19 @@ int save_pci_variables(struct hfi1_devdata *dd)
|
||||
* Check and optionally adjust them to maximize our throughput.
|
||||
*/
|
||||
static int hfi1_pcie_caps;
|
||||
module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
|
||||
module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
|
||||
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
|
||||
|
||||
uint aspm_mode = ASPM_MODE_DISABLED;
|
||||
module_param_named(aspm, aspm_mode, uint, S_IRUGO);
|
||||
module_param_named(aspm, aspm_mode, uint, 0444);
|
||||
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
|
||||
|
||||
static void tune_pcie_caps(struct hfi1_devdata *dd)
|
||||
/**
|
||||
* tune_pcie_caps() - Code to adjust PCIe capabilities.
|
||||
* @dd: Valid device data structure
|
||||
*
|
||||
*/
|
||||
void tune_pcie_caps(struct hfi1_devdata *dd)
|
||||
{
|
||||
struct pci_dev *parent;
|
||||
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
|
||||
@ -1028,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
|
||||
const u8 (*ctle_tunings)[4];
|
||||
uint static_ctle_mode;
|
||||
int return_error = 0;
|
||||
u32 target_width;
|
||||
|
||||
/* PCIe Gen3 is for the ASIC only */
|
||||
if (dd->icode != ICODE_RTL_SILICON)
|
||||
@ -1067,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Previous Gen1/Gen2 bus width */
|
||||
target_width = dd->lbus_width;
|
||||
|
||||
/*
|
||||
* Do the Gen3 transition. Steps are those of the PCIe Gen3
|
||||
* recipe.
|
||||
@ -1435,11 +1412,12 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
|
||||
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
|
||||
dd->lbus_info);
|
||||
|
||||
if (dd->lbus_speed != target_speed) { /* not target */
|
||||
if (dd->lbus_speed != target_speed ||
|
||||
dd->lbus_width < target_width) { /* not target */
|
||||
/* maybe retry */
|
||||
do_retry = retry_count < pcie_retry;
|
||||
dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
|
||||
pcie_target, do_retry ? ", retrying" : "");
|
||||
dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
|
||||
do_retry ? ", retrying" : "");
|
||||
retry_count++;
|
||||
if (do_retry) {
|
||||
msleep(100); /* allow time to settle */
|
||||
|
@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
|
||||
}
|
||||
}
|
||||
|
||||
/* defined in header release 48 and higher */
|
||||
#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
|
||||
#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
|
||||
#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
|
||||
#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
|
||||
<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
|
||||
#endif
|
||||
|
||||
/* global control of PIO send */
|
||||
void pio_send_control(struct hfi1_devdata *dd, int op)
|
||||
{
|
||||
|
@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
|
||||
static void flush_tx_list(struct rvt_qp *qp);
|
||||
static int iowait_sleep(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *stx,
|
||||
unsigned int seq,
|
||||
bool pkts_sent);
|
||||
@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
|
||||
|
||||
};
|
||||
|
||||
static void flush_tx_list(struct rvt_qp *qp)
|
||||
static void flush_list_head(struct list_head *l)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
while (!list_empty(&priv->s_iowait.tx_head)) {
|
||||
while (!list_empty(l)) {
|
||||
struct sdma_txreq *tx;
|
||||
|
||||
tx = list_first_entry(
|
||||
&priv->s_iowait.tx_head,
|
||||
l,
|
||||
struct sdma_txreq,
|
||||
list);
|
||||
list_del_init(&tx->list);
|
||||
@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
|
||||
}
|
||||
}
|
||||
|
||||
static void flush_tx_list(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
|
||||
flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
|
||||
}
|
||||
|
||||
static void flush_iowait(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
|
||||
}
|
||||
|
||||
/**
|
||||
* hfi1_check_send_wqe - validate wqe
|
||||
* hfi1_setup_wqe - set up the wqe
|
||||
* @qp - The qp
|
||||
* @wqe - The built wqe
|
||||
* @call_send - Determine if the send should be posted or scheduled.
|
||||
*
|
||||
* validate wqe. This is called
|
||||
* prior to inserting the wqe into
|
||||
* the ring but after the wqe has been
|
||||
* setup.
|
||||
* Perform setup of the wqe. This is called
|
||||
* prior to inserting the wqe into the ring but after
|
||||
* the wqe has been setup by RDMAVT. This function
|
||||
* allows the driver the opportunity to perform
|
||||
* validation and additional setup of the wqe.
|
||||
*
|
||||
* Returns 0 on success, -EINVAL on failure
|
||||
*
|
||||
*/
|
||||
int hfi1_check_send_wqe(struct rvt_qp *qp,
|
||||
struct rvt_swqe *wqe)
|
||||
int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
|
||||
{
|
||||
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
|
||||
struct rvt_ah *ah;
|
||||
struct hfi1_pportdata *ppd;
|
||||
struct hfi1_devdata *dd;
|
||||
|
||||
switch (qp->ibqp.qp_type) {
|
||||
case IB_QPT_RC:
|
||||
case IB_QPT_UC:
|
||||
if (wqe->length > 0x80000000U)
|
||||
return -EINVAL;
|
||||
if (wqe->length > qp->pmtu)
|
||||
*call_send = false;
|
||||
break;
|
||||
case IB_QPT_SMI:
|
||||
ah = ibah_to_rvtah(wqe->ud_wr.ah);
|
||||
if (wqe->length > (1 << ah->log_pmtu))
|
||||
/*
|
||||
* SM packets should exclusively use VL15 and their SL is
|
||||
* ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
|
||||
* is created, SL is 0 in most cases and as a result some
|
||||
* fields (vl and pmtu) in ah may not be set correctly,
|
||||
* depending on the SL2SC and SC2VL tables at the time.
|
||||
*/
|
||||
ppd = ppd_from_ibp(ibp);
|
||||
dd = dd_from_ppd(ppd);
|
||||
if (wqe->length > dd->vld[15].mtu)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case IB_QPT_GSI:
|
||||
@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return wqe->length <= piothreshold;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
|
||||
* It is only used in the post send, which doesn't hold
|
||||
* the s_lock.
|
||||
*/
|
||||
void _hfi1_schedule_send(struct rvt_qp *qp)
|
||||
bool _hfi1_schedule_send(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct hfi1_ibport *ibp =
|
||||
@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
|
||||
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
|
||||
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
|
||||
|
||||
iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
|
||||
priv->s_sde ?
|
||||
priv->s_sde->cpu :
|
||||
cpumask_first(cpumask_of_node(dd->node)));
|
||||
return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
|
||||
priv->s_sde ?
|
||||
priv->s_sde->cpu :
|
||||
cpumask_first(cpumask_of_node(dd->node)));
|
||||
}
|
||||
|
||||
static void qp_pio_drain(struct rvt_qp *qp)
|
||||
@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
|
||||
*
|
||||
* This schedules qp progress and caller should hold
|
||||
* the s_lock.
|
||||
* @return true if the first leg is scheduled;
|
||||
* false if the first leg is not scheduled.
|
||||
*/
|
||||
void hfi1_schedule_send(struct rvt_qp *qp)
|
||||
bool hfi1_schedule_send(struct rvt_qp *qp)
|
||||
{
|
||||
lockdep_assert_held(&qp->s_lock);
|
||||
if (hfi1_send_ok(qp))
|
||||
if (hfi1_send_ok(qp)) {
|
||||
_hfi1_schedule_send(qp);
|
||||
return true;
|
||||
}
|
||||
if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
|
||||
iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
|
||||
IOWAIT_PENDING_IB);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void hfi1_qp_schedule(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
bool ret;
|
||||
|
||||
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
|
||||
ret = hfi1_schedule_send(qp);
|
||||
if (ret)
|
||||
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
|
||||
}
|
||||
}
|
||||
|
||||
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
|
||||
@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
|
||||
if (qp->s_flags & flag) {
|
||||
qp->s_flags &= ~flag;
|
||||
trace_hfi1_qpwakeup(qp, flag);
|
||||
hfi1_schedule_send(qp);
|
||||
hfi1_qp_schedule(qp);
|
||||
}
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
/* Notify hfi1_destroy_qp() if it is waiting. */
|
||||
rvt_put_qp(qp);
|
||||
}
|
||||
|
||||
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
|
||||
{
|
||||
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
}
|
||||
|
||||
static int iowait_sleep(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *stx,
|
||||
uint seq,
|
||||
bool pkts_sent)
|
||||
@ -438,7 +483,7 @@ static int iowait_sleep(
|
||||
rvt_get_qp(qp);
|
||||
}
|
||||
write_sequnlock(&dev->iowait_lock);
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
hfi1_qp_unbusy(qp, wait);
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
|
||||
&priv->s_iowait,
|
||||
1,
|
||||
_hfi1_do_send,
|
||||
NULL,
|
||||
iowait_sleep,
|
||||
iowait_wakeup,
|
||||
iowait_sdma_drained);
|
||||
@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
cancel_work_sync(&priv->s_iowait.iowork);
|
||||
iowait_cancel_work(&priv->s_iowait);
|
||||
}
|
||||
|
||||
void quiesce_qp(struct rvt_qp *qp)
|
||||
|
@ -57,18 +57,6 @@ extern unsigned int hfi1_qp_table_size;
|
||||
|
||||
extern const struct rvt_operation_params hfi1_post_parms[];
|
||||
|
||||
/*
|
||||
* Send if not busy or waiting for I/O and either
|
||||
* a RC response is pending or we can process send work requests.
|
||||
*/
|
||||
static inline int hfi1_send_ok(struct rvt_qp *qp)
|
||||
{
|
||||
return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
|
||||
(verbs_txreq_queued(qp) ||
|
||||
(qp->s_flags & RVT_S_RESP_PENDING) ||
|
||||
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
|
||||
}
|
||||
|
||||
/*
|
||||
* Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
|
||||
*
|
||||
@ -89,6 +77,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
|
||||
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
|
||||
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
|
||||
|
||||
/*
|
||||
* Send if not busy or waiting for I/O and either
|
||||
* a RC response is pending or we can process send work requests.
|
||||
*/
|
||||
static inline int hfi1_send_ok(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
|
||||
(verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
|
||||
(qp->s_flags & RVT_S_RESP_PENDING) ||
|
||||
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
|
||||
}
|
||||
|
||||
/*
|
||||
* free_ahg - clear ahg from QP
|
||||
*/
|
||||
@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
|
||||
|
||||
void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
|
||||
|
||||
void _hfi1_schedule_send(struct rvt_qp *qp);
|
||||
void hfi1_schedule_send(struct rvt_qp *qp);
|
||||
bool _hfi1_schedule_send(struct rvt_qp *qp);
|
||||
bool hfi1_schedule_send(struct rvt_qp *qp);
|
||||
|
||||
void hfi1_migrate_qp(struct rvt_qp *qp);
|
||||
|
||||
@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
|
||||
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
|
||||
int mtu_to_path_mtu(u32 mtu);
|
||||
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
|
||||
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
|
||||
#endif /* _QP_H */
|
||||
|
@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
}
|
||||
clear_ahg(qp);
|
||||
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
|
||||
hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
|
||||
rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
|
||||
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
|
||||
/* will get called again */
|
||||
goto done_free_tx;
|
||||
@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
wqe->wr.ex.invalidate_rkey);
|
||||
local_ops = 1;
|
||||
}
|
||||
hfi1_send_complete(qp, wqe,
|
||||
err ? IB_WC_LOC_PROT_ERR
|
||||
: IB_WC_SUCCESS);
|
||||
rvt_send_complete(qp, wqe,
|
||||
err ? IB_WC_LOC_PROT_ERR
|
||||
: IB_WC_SUCCESS);
|
||||
if (local_ops)
|
||||
atomic_dec(&qp->local_ops_pending);
|
||||
goto done_free_tx;
|
||||
@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
|
||||
hfi1_migrate_qp(qp);
|
||||
qp->s_retry = qp->s_retry_cnt;
|
||||
} else if (qp->s_last == qp->s_acked) {
|
||||
hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
|
||||
rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
|
||||
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
||||
return;
|
||||
} else { /* need to handle delayed completion */
|
||||
@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
|
||||
ibp->rvp.n_other_naks++;
|
||||
class_b:
|
||||
if (qp->s_last == qp->s_acked) {
|
||||
hfi1_send_complete(qp, wqe, status);
|
||||
rvt_send_complete(qp, wqe, status);
|
||||
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
||||
}
|
||||
break;
|
||||
@ -1644,7 +1644,8 @@ static void rc_rcv_resp(struct hfi1_packet *packet)
|
||||
qp->s_rdma_read_len -= pmtu;
|
||||
update_last_psn(qp, psn);
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
|
||||
rvt_copy_sge(qp, &qp->s_rdma_read_sge,
|
||||
data, pmtu, false, false);
|
||||
goto bail;
|
||||
|
||||
case OP(RDMA_READ_RESPONSE_ONLY):
|
||||
@ -1684,7 +1685,8 @@ static void rc_rcv_resp(struct hfi1_packet *packet)
|
||||
if (unlikely(tlen != qp->s_rdma_read_len))
|
||||
goto ack_len_err;
|
||||
aeth = be32_to_cpu(ohdr->u.aeth);
|
||||
hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
|
||||
rvt_copy_sge(qp, &qp->s_rdma_read_sge,
|
||||
data, tlen, false, false);
|
||||
WARN_ON(qp->s_rdma_read_sge.num_sge);
|
||||
(void)do_rc_ack(qp, aeth, psn,
|
||||
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
|
||||
@ -1704,7 +1706,7 @@ static void rc_rcv_resp(struct hfi1_packet *packet)
|
||||
status = IB_WC_LOC_LEN_ERR;
|
||||
ack_err:
|
||||
if (qp->s_last == qp->s_acked) {
|
||||
hfi1_send_complete(qp, wqe, status);
|
||||
rvt_send_complete(qp, wqe, status);
|
||||
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
|
||||
}
|
||||
ack_done:
|
||||
@ -2144,7 +2146,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
|
||||
qp->r_rcv_len += pmtu;
|
||||
if (unlikely(qp->r_rcv_len > qp->r_len))
|
||||
goto nack_inv;
|
||||
hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
|
||||
break;
|
||||
|
||||
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
|
||||
@ -2200,7 +2202,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
|
||||
wc.byte_len = tlen + qp->r_rcv_len;
|
||||
if (unlikely(wc.byte_len > qp->r_len))
|
||||
goto nack_inv;
|
||||
hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
|
||||
rvt_put_ss(&qp->r_sge);
|
||||
qp->r_msn++;
|
||||
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
|
||||
|
@ -155,333 +155,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ruc_loopback - handle UC and RC loopback requests
|
||||
* @sqp: the sending QP
|
||||
*
|
||||
* This is called from hfi1_do_send() to
|
||||
* forward a WQE addressed to the same HFI.
|
||||
* Note that although we are single threaded due to the send engine, we still
|
||||
* have to protect against post_send(). We don't have to worry about
|
||||
* receive interrupts since this is a connected protocol and all packets
|
||||
* will pass through here.
|
||||
*/
|
||||
static void ruc_loopback(struct rvt_qp *sqp)
|
||||
{
|
||||
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
|
||||
struct rvt_qp *qp;
|
||||
struct rvt_swqe *wqe;
|
||||
struct rvt_sge *sge;
|
||||
unsigned long flags;
|
||||
struct ib_wc wc;
|
||||
u64 sdata;
|
||||
atomic64_t *maddr;
|
||||
enum ib_wc_status send_status;
|
||||
bool release;
|
||||
int ret;
|
||||
bool copy_last = false;
|
||||
int local_ops = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/*
|
||||
* Note that we check the responder QP state after
|
||||
* checking the requester's state.
|
||||
*/
|
||||
qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
|
||||
sqp->remote_qpn);
|
||||
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
|
||||
/* Return if we are already busy processing a work request. */
|
||||
if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
|
||||
!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
|
||||
goto unlock;
|
||||
|
||||
sqp->s_flags |= RVT_S_BUSY;
|
||||
|
||||
again:
|
||||
if (sqp->s_last == READ_ONCE(sqp->s_head))
|
||||
goto clr_busy;
|
||||
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
|
||||
|
||||
/* Return if it is not OK to start a new work request. */
|
||||
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
|
||||
if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
|
||||
goto clr_busy;
|
||||
/* We are in the error state, flush the work request. */
|
||||
send_status = IB_WC_WR_FLUSH_ERR;
|
||||
goto flush_send;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can rely on the entry not changing without the s_lock
|
||||
* being held until we update s_last.
|
||||
* We increment s_cur to indicate s_last is in progress.
|
||||
*/
|
||||
if (sqp->s_last == sqp->s_cur) {
|
||||
if (++sqp->s_cur >= sqp->s_size)
|
||||
sqp->s_cur = 0;
|
||||
}
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
|
||||
if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
|
||||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
|
||||
ibp->rvp.n_pkt_drops++;
|
||||
/*
|
||||
* For RC, the requester would timeout and retry so
|
||||
* shortcut the timeouts and just signal too many retries.
|
||||
*/
|
||||
if (sqp->ibqp.qp_type == IB_QPT_RC)
|
||||
send_status = IB_WC_RETRY_EXC_ERR;
|
||||
else
|
||||
send_status = IB_WC_SUCCESS;
|
||||
goto serr;
|
||||
}
|
||||
|
||||
memset(&wc, 0, sizeof(wc));
|
||||
send_status = IB_WC_SUCCESS;
|
||||
|
||||
release = true;
|
||||
sqp->s_sge.sge = wqe->sg_list[0];
|
||||
sqp->s_sge.sg_list = wqe->sg_list + 1;
|
||||
sqp->s_sge.num_sge = wqe->wr.num_sge;
|
||||
sqp->s_len = wqe->length;
|
||||
switch (wqe->wr.opcode) {
|
||||
case IB_WR_REG_MR:
|
||||
goto send_comp;
|
||||
|
||||
case IB_WR_LOCAL_INV:
|
||||
if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
|
||||
if (rvt_invalidate_rkey(sqp,
|
||||
wqe->wr.ex.invalidate_rkey))
|
||||
send_status = IB_WC_LOC_PROT_ERR;
|
||||
local_ops = 1;
|
||||
}
|
||||
goto send_comp;
|
||||
|
||||
case IB_WR_SEND_WITH_INV:
|
||||
if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
|
||||
wc.wc_flags = IB_WC_WITH_INVALIDATE;
|
||||
wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
|
||||
}
|
||||
goto send;
|
||||
|
||||
case IB_WR_SEND_WITH_IMM:
|
||||
wc.wc_flags = IB_WC_WITH_IMM;
|
||||
wc.ex.imm_data = wqe->wr.ex.imm_data;
|
||||
/* FALLTHROUGH */
|
||||
case IB_WR_SEND:
|
||||
send:
|
||||
ret = rvt_get_rwqe(qp, false);
|
||||
if (ret < 0)
|
||||
goto op_err;
|
||||
if (!ret)
|
||||
goto rnr_nak;
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_WRITE_WITH_IMM:
|
||||
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
|
||||
goto inv_err;
|
||||
wc.wc_flags = IB_WC_WITH_IMM;
|
||||
wc.ex.imm_data = wqe->wr.ex.imm_data;
|
||||
ret = rvt_get_rwqe(qp, true);
|
||||
if (ret < 0)
|
||||
goto op_err;
|
||||
if (!ret)
|
||||
goto rnr_nak;
|
||||
/* skip copy_last set and qp_access_flags recheck */
|
||||
goto do_write;
|
||||
case IB_WR_RDMA_WRITE:
|
||||
copy_last = rvt_is_user_qp(qp);
|
||||
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
|
||||
goto inv_err;
|
||||
do_write:
|
||||
if (wqe->length == 0)
|
||||
break;
|
||||
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
|
||||
wqe->rdma_wr.remote_addr,
|
||||
wqe->rdma_wr.rkey,
|
||||
IB_ACCESS_REMOTE_WRITE)))
|
||||
goto acc_err;
|
||||
qp->r_sge.sg_list = NULL;
|
||||
qp->r_sge.num_sge = 1;
|
||||
qp->r_sge.total_len = wqe->length;
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_READ:
|
||||
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
|
||||
goto inv_err;
|
||||
if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
|
||||
wqe->rdma_wr.remote_addr,
|
||||
wqe->rdma_wr.rkey,
|
||||
IB_ACCESS_REMOTE_READ)))
|
||||
goto acc_err;
|
||||
release = false;
|
||||
sqp->s_sge.sg_list = NULL;
|
||||
sqp->s_sge.num_sge = 1;
|
||||
qp->r_sge.sge = wqe->sg_list[0];
|
||||
qp->r_sge.sg_list = wqe->sg_list + 1;
|
||||
qp->r_sge.num_sge = wqe->wr.num_sge;
|
||||
qp->r_sge.total_len = wqe->length;
|
||||
break;
|
||||
|
||||
case IB_WR_ATOMIC_CMP_AND_SWP:
|
||||
case IB_WR_ATOMIC_FETCH_AND_ADD:
|
||||
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
|
||||
goto inv_err;
|
||||
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
|
||||
wqe->atomic_wr.remote_addr,
|
||||
wqe->atomic_wr.rkey,
|
||||
IB_ACCESS_REMOTE_ATOMIC)))
|
||||
goto acc_err;
|
||||
/* Perform atomic OP and save result. */
|
||||
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
|
||||
sdata = wqe->atomic_wr.compare_add;
|
||||
*(u64 *)sqp->s_sge.sge.vaddr =
|
||||
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
|
||||
(u64)atomic64_add_return(sdata, maddr) - sdata :
|
||||
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
|
||||
sdata, wqe->atomic_wr.swap);
|
||||
rvt_put_mr(qp->r_sge.sge.mr);
|
||||
qp->r_sge.num_sge = 0;
|
||||
goto send_comp;
|
||||
|
||||
default:
|
||||
send_status = IB_WC_LOC_QP_OP_ERR;
|
||||
goto serr;
|
||||
}
|
||||
|
||||
sge = &sqp->s_sge.sge;
|
||||
while (sqp->s_len) {
|
||||
u32 len = sqp->s_len;
|
||||
|
||||
if (len > sge->length)
|
||||
len = sge->length;
|
||||
if (len > sge->sge_length)
|
||||
len = sge->sge_length;
|
||||
WARN_ON_ONCE(len == 0);
|
||||
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
|
||||
sge->vaddr += len;
|
||||
sge->length -= len;
|
||||
sge->sge_length -= len;
|
||||
if (sge->sge_length == 0) {
|
||||
if (!release)
|
||||
rvt_put_mr(sge->mr);
|
||||
if (--sqp->s_sge.num_sge)
|
||||
*sge = *sqp->s_sge.sg_list++;
|
||||
} else if (sge->length == 0 && sge->mr->lkey) {
|
||||
if (++sge->n >= RVT_SEGSZ) {
|
||||
if (++sge->m >= sge->mr->mapsz)
|
||||
break;
|
||||
sge->n = 0;
|
||||
}
|
||||
sge->vaddr =
|
||||
sge->mr->map[sge->m]->segs[sge->n].vaddr;
|
||||
sge->length =
|
||||
sge->mr->map[sge->m]->segs[sge->n].length;
|
||||
}
|
||||
sqp->s_len -= len;
|
||||
}
|
||||
if (release)
|
||||
rvt_put_ss(&qp->r_sge);
|
||||
|
||||
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
|
||||
goto send_comp;
|
||||
|
||||
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
|
||||
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
|
||||
else
|
||||
wc.opcode = IB_WC_RECV;
|
||||
wc.wr_id = qp->r_wr_id;
|
||||
wc.status = IB_WC_SUCCESS;
|
||||
wc.byte_len = wqe->length;
|
||||
wc.qp = &qp->ibqp;
|
||||
wc.src_qp = qp->remote_qpn;
|
||||
wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
|
||||
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
|
||||
wc.port_num = 1;
|
||||
/* Signal completion event if the solicited bit is set. */
|
||||
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
|
||||
wqe->wr.send_flags & IB_SEND_SOLICITED);
|
||||
|
||||
send_comp:
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
ibp->rvp.n_loop_pkts++;
|
||||
flush_send:
|
||||
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
|
||||
hfi1_send_complete(sqp, wqe, send_status);
|
||||
if (local_ops) {
|
||||
atomic_dec(&sqp->local_ops_pending);
|
||||
local_ops = 0;
|
||||
}
|
||||
goto again;
|
||||
|
||||
rnr_nak:
|
||||
/* Handle RNR NAK */
|
||||
if (qp->ibqp.qp_type == IB_QPT_UC)
|
||||
goto send_comp;
|
||||
ibp->rvp.n_rnr_naks++;
|
||||
/*
|
||||
* Note: we don't need the s_lock held since the BUSY flag
|
||||
* makes this single threaded.
|
||||
*/
|
||||
if (sqp->s_rnr_retry == 0) {
|
||||
send_status = IB_WC_RNR_RETRY_EXC_ERR;
|
||||
goto serr;
|
||||
}
|
||||
if (sqp->s_rnr_retry_cnt < 7)
|
||||
sqp->s_rnr_retry--;
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
|
||||
goto clr_busy;
|
||||
rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
|
||||
IB_AETH_CREDIT_SHIFT);
|
||||
goto clr_busy;
|
||||
|
||||
op_err:
|
||||
send_status = IB_WC_REM_OP_ERR;
|
||||
wc.status = IB_WC_LOC_QP_OP_ERR;
|
||||
goto err;
|
||||
|
||||
inv_err:
|
||||
send_status = IB_WC_REM_INV_REQ_ERR;
|
||||
wc.status = IB_WC_LOC_QP_OP_ERR;
|
||||
goto err;
|
||||
|
||||
acc_err:
|
||||
send_status = IB_WC_REM_ACCESS_ERR;
|
||||
wc.status = IB_WC_LOC_PROT_ERR;
|
||||
err:
|
||||
/* responder goes to error state */
|
||||
rvt_rc_error(qp, wc.status);
|
||||
|
||||
serr:
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
hfi1_send_complete(sqp, wqe, send_status);
|
||||
if (sqp->ibqp.qp_type == IB_QPT_RC) {
|
||||
int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
|
||||
|
||||
sqp->s_flags &= ~RVT_S_BUSY;
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
if (lastwqe) {
|
||||
struct ib_event ev;
|
||||
|
||||
ev.device = sqp->ibqp.device;
|
||||
ev.element.qp = &sqp->ibqp;
|
||||
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
|
||||
sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
clr_busy:
|
||||
sqp->s_flags &= ~RVT_S_BUSY;
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
done:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* hfi1_make_grh - construct a GRH header
|
||||
* @ibp: a pointer to the IB port
|
||||
@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
|
||||
|
||||
void _hfi1_do_send(struct work_struct *work)
|
||||
{
|
||||
struct iowait *wait = container_of(work, struct iowait, iowork);
|
||||
struct rvt_qp *qp = iowait_to_qp(wait);
|
||||
struct iowait_work *w = container_of(work, struct iowait_work, iowork);
|
||||
struct rvt_qp *qp = iowait_to_qp(w->iow);
|
||||
|
||||
hfi1_do_send(qp, true);
|
||||
}
|
||||
@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
|
||||
ps.ppd = ppd_from_ibp(ps.ibp);
|
||||
ps.in_thread = in_thread;
|
||||
ps.wait = iowait_get_ib_work(&priv->s_iowait);
|
||||
|
||||
trace_hfi1_rc_do_send(qp, in_thread);
|
||||
|
||||
@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
|
||||
~((1 << ps.ppd->lmc) - 1)) ==
|
||||
ps.ppd->lid)) {
|
||||
ruc_loopback(qp);
|
||||
rvt_ruc_loopback(qp);
|
||||
return;
|
||||
}
|
||||
make_req = hfi1_make_rc_req;
|
||||
@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
|
||||
~((1 << ps.ppd->lmc) - 1)) ==
|
||||
ps.ppd->lid)) {
|
||||
ruc_loopback(qp);
|
||||
rvt_ruc_loopback(qp);
|
||||
return;
|
||||
}
|
||||
make_req = hfi1_make_uc_req;
|
||||
@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
|
||||
/* Return if we are already busy processing a work request. */
|
||||
if (!hfi1_send_ok(qp)) {
|
||||
if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
|
||||
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
|
||||
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
|
||||
return;
|
||||
}
|
||||
@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
ps.pkts_sent = false;
|
||||
|
||||
/* insure a pre-built packet is handled */
|
||||
ps.s_txreq = get_waiting_verbs_txreq(qp);
|
||||
ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
|
||||
do {
|
||||
/* Check for a constructed packet to be sent. */
|
||||
if (ps.s_txreq) {
|
||||
@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
*/
|
||||
if (hfi1_verbs_send(qp, &ps))
|
||||
return;
|
||||
|
||||
/* allow other tasks to run */
|
||||
if (schedule_send_yield(qp, &ps))
|
||||
return;
|
||||
@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
|
||||
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* This should be called with s_lock held.
|
||||
*/
|
||||
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
enum ib_wc_status status)
|
||||
{
|
||||
u32 old_last, last;
|
||||
|
||||
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
|
||||
return;
|
||||
|
||||
last = qp->s_last;
|
||||
old_last = last;
|
||||
trace_hfi1_qp_send_completion(qp, wqe, last);
|
||||
if (++last >= qp->s_size)
|
||||
last = 0;
|
||||
trace_hfi1_qp_send_completion(qp, wqe, last);
|
||||
qp->s_last = last;
|
||||
/* See post_send() */
|
||||
barrier();
|
||||
rvt_put_swqe(wqe);
|
||||
if (qp->ibqp.qp_type == IB_QPT_UD ||
|
||||
qp->ibqp.qp_type == IB_QPT_SMI ||
|
||||
qp->ibqp.qp_type == IB_QPT_GSI)
|
||||
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
|
||||
|
||||
rvt_qp_swqe_complete(qp,
|
||||
wqe,
|
||||
ib_hfi1_wc_opcode[wqe->wr.opcode],
|
||||
status);
|
||||
|
||||
if (qp->s_acked == old_last)
|
||||
qp->s_acked = last;
|
||||
if (qp->s_cur == old_last)
|
||||
qp->s_cur = last;
|
||||
if (qp->s_tail == old_last)
|
||||
qp->s_tail = last;
|
||||
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
|
||||
qp->s_draining = 0;
|
||||
}
|
||||
|
@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
|
||||
__sdma_txclean(sde->dd, tx);
|
||||
if (complete)
|
||||
(*complete)(tx, res);
|
||||
if (wait && iowait_sdma_dec(wait))
|
||||
if (iowait_sdma_dec(wait))
|
||||
iowait_drain_wakeup(wait);
|
||||
}
|
||||
|
||||
@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
|
||||
struct iowait *wait, *nw;
|
||||
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
|
||||
uint i, n = 0, seq, max_idx = 0;
|
||||
struct sdma_txreq *stx;
|
||||
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
|
||||
u8 max_starved_cnt = 0;
|
||||
|
||||
@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
|
||||
nw,
|
||||
&sde->dmawait,
|
||||
list) {
|
||||
u16 num_desc = 0;
|
||||
u32 num_desc;
|
||||
|
||||
if (!wait->wakeup)
|
||||
continue;
|
||||
if (n == ARRAY_SIZE(waits))
|
||||
break;
|
||||
if (!list_empty(&wait->tx_head)) {
|
||||
stx = list_first_entry(
|
||||
&wait->tx_head,
|
||||
struct sdma_txreq,
|
||||
list);
|
||||
num_desc = stx->num_desc;
|
||||
}
|
||||
num_desc = iowait_get_all_desc(wait);
|
||||
if (num_desc > avail)
|
||||
break;
|
||||
avail -= num_desc;
|
||||
@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
|
||||
*/
|
||||
static int sdma_check_progress(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
bool pkts_sent)
|
||||
{
|
||||
@ -2356,12 +2349,12 @@ static int sdma_check_progress(
|
||||
if (tx->num_desc <= sde->desc_avail)
|
||||
return -EAGAIN;
|
||||
/* pulse the head_lock */
|
||||
if (wait && wait->sleep) {
|
||||
if (wait && iowait_ioww_to_iow(wait)->sleep) {
|
||||
unsigned seq;
|
||||
|
||||
seq = raw_seqcount_begin(
|
||||
(const seqcount_t *)&sde->head_lock.seqcount);
|
||||
ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
|
||||
ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
|
||||
if (ret == -EAGAIN)
|
||||
sde->desc_avail = sdma_descq_freecnt(sde);
|
||||
} else {
|
||||
@ -2373,7 +2366,7 @@ static int sdma_check_progress(
|
||||
/**
|
||||
* sdma_send_txreq() - submit a tx req to ring
|
||||
* @sde: sdma engine to use
|
||||
* @wait: wait structure to use when full (may be NULL)
|
||||
* @wait: SE wait structure to use when full (may be NULL)
|
||||
* @tx: sdma_txreq to submit
|
||||
* @pkts_sent: has any packet been sent yet?
|
||||
*
|
||||
@ -2386,7 +2379,7 @@ static int sdma_check_progress(
|
||||
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
|
||||
*/
|
||||
int sdma_send_txreq(struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
bool pkts_sent)
|
||||
{
|
||||
@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
|
||||
/* user should have supplied entire packet */
|
||||
if (unlikely(tx->tlen))
|
||||
return -EINVAL;
|
||||
tx->wait = wait;
|
||||
tx->wait = iowait_ioww_to_iow(wait);
|
||||
spin_lock_irqsave(&sde->tail_lock, flags);
|
||||
retry:
|
||||
if (unlikely(!__sdma_running(sde)))
|
||||
@ -2406,14 +2399,14 @@ int sdma_send_txreq(struct sdma_engine *sde,
|
||||
goto nodesc;
|
||||
tail = submit_tx(sde, tx);
|
||||
if (wait)
|
||||
iowait_sdma_inc(wait);
|
||||
iowait_sdma_inc(iowait_ioww_to_iow(wait));
|
||||
sdma_update_tail(sde, tail);
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&sde->tail_lock, flags);
|
||||
return ret;
|
||||
unlock_noconn:
|
||||
if (wait)
|
||||
iowait_sdma_inc(wait);
|
||||
iowait_sdma_inc(iowait_ioww_to_iow(wait));
|
||||
tx->next_descq_idx = 0;
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
tx->sn = sde->tail_sn++;
|
||||
@ -2422,10 +2415,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
|
||||
spin_lock(&sde->flushlist_lock);
|
||||
list_add_tail(&tx->list, &sde->flushlist);
|
||||
spin_unlock(&sde->flushlist_lock);
|
||||
if (wait) {
|
||||
wait->tx_count++;
|
||||
wait->count += tx->num_desc;
|
||||
}
|
||||
iowait_inc_wait_count(wait, tx->num_desc);
|
||||
schedule_work(&sde->flush_worker);
|
||||
ret = -ECOMM;
|
||||
goto unlock;
|
||||
@ -2442,9 +2432,9 @@ int sdma_send_txreq(struct sdma_engine *sde,
|
||||
/**
|
||||
* sdma_send_txlist() - submit a list of tx req to ring
|
||||
* @sde: sdma engine to use
|
||||
* @wait: wait structure to use when full (may be NULL)
|
||||
* @wait: SE wait structure to use when full (may be NULL)
|
||||
* @tx_list: list of sdma_txreqs to submit
|
||||
* @count: pointer to a u32 which, after return will contain the total number of
|
||||
* @count: pointer to a u16 which, after return will contain the total number of
|
||||
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs
|
||||
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs
|
||||
* which are added to SDMA engine flush list if the SDMA engine state is
|
||||
@ -2467,8 +2457,8 @@ int sdma_send_txreq(struct sdma_engine *sde,
|
||||
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
|
||||
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
|
||||
*/
|
||||
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
|
||||
struct list_head *tx_list, u32 *count_out)
|
||||
int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
|
||||
struct list_head *tx_list, u16 *count_out)
|
||||
{
|
||||
struct sdma_txreq *tx, *tx_next;
|
||||
int ret = 0;
|
||||
@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
|
||||
spin_lock_irqsave(&sde->tail_lock, flags);
|
||||
retry:
|
||||
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
|
||||
tx->wait = wait;
|
||||
tx->wait = iowait_ioww_to_iow(wait);
|
||||
if (unlikely(!__sdma_running(sde)))
|
||||
goto unlock_noconn;
|
||||
if (unlikely(tx->num_desc > sde->desc_avail))
|
||||
@ -2500,8 +2490,9 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
|
||||
update_tail:
|
||||
total_count = submit_count + flush_count;
|
||||
if (wait) {
|
||||
iowait_sdma_add(wait, total_count);
|
||||
iowait_starve_clear(submit_count > 0, wait);
|
||||
iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
|
||||
iowait_starve_clear(submit_count > 0,
|
||||
iowait_ioww_to_iow(wait));
|
||||
}
|
||||
if (tail != INVALID_TAIL)
|
||||
sdma_update_tail(sde, tail);
|
||||
@ -2511,7 +2502,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
|
||||
unlock_noconn:
|
||||
spin_lock(&sde->flushlist_lock);
|
||||
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
|
||||
tx->wait = wait;
|
||||
tx->wait = iowait_ioww_to_iow(wait);
|
||||
list_del_init(&tx->list);
|
||||
tx->next_descq_idx = 0;
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
@ -2520,10 +2511,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
|
||||
#endif
|
||||
list_add_tail(&tx->list, &sde->flushlist);
|
||||
flush_count++;
|
||||
if (wait) {
|
||||
wait->tx_count++;
|
||||
wait->count += tx->num_desc;
|
||||
}
|
||||
iowait_inc_wait_count(wait, tx->num_desc);
|
||||
}
|
||||
spin_unlock(&sde->flushlist_lock);
|
||||
schedule_work(&sde->flush_worker);
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef _HFI1_SDMA_H
|
||||
#define _HFI1_SDMA_H
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -62,16 +62,6 @@
|
||||
/* Hardware limit for SDMA packet size */
|
||||
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
|
||||
|
||||
#define SDMA_TXREQ_S_OK 0
|
||||
#define SDMA_TXREQ_S_SENDERROR 1
|
||||
#define SDMA_TXREQ_S_ABORTED 2
|
||||
#define SDMA_TXREQ_S_SHUTDOWN 3
|
||||
|
||||
/* flags bits */
|
||||
#define SDMA_TXREQ_F_URGENT 0x0001
|
||||
#define SDMA_TXREQ_F_AHG_COPY 0x0002
|
||||
#define SDMA_TXREQ_F_USE_AHG 0x0004
|
||||
|
||||
#define SDMA_MAP_NONE 0
|
||||
#define SDMA_MAP_SINGLE 1
|
||||
#define SDMA_MAP_PAGE 2
|
||||
@ -415,6 +405,7 @@ struct sdma_engine {
|
||||
struct list_head flushlist;
|
||||
struct cpumask cpu_mask;
|
||||
struct kobject kobj;
|
||||
u32 msix_intr;
|
||||
};
|
||||
|
||||
int sdma_init(struct hfi1_devdata *dd, u8 port);
|
||||
@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
|
||||
dd, SDMA_MAP_SINGLE, tx, addr, len);
|
||||
}
|
||||
|
||||
struct iowait;
|
||||
struct iowait_work;
|
||||
|
||||
int sdma_send_txreq(struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
bool pkts_sent);
|
||||
int sdma_send_txlist(struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct list_head *tx_list,
|
||||
u32 *count);
|
||||
u16 *count_out);
|
||||
|
||||
int sdma_ahg_alloc(struct sdma_engine *sde);
|
||||
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
|
||||
|
@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
|
||||
* Start of per-unit (or driver, in some cases, but replicated
|
||||
* per unit) functions (these get a device *)
|
||||
*/
|
||||
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
|
||||
|
||||
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hw_rev);
|
||||
|
||||
static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t board_id_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
|
||||
@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
|
||||
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
|
||||
return ret;
|
||||
}
|
||||
static DEVICE_ATTR_RO(board_id);
|
||||
|
||||
static ssize_t show_boardversion(struct device *device,
|
||||
static ssize_t boardversion_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
|
||||
/* The string printed here is already newline-terminated. */
|
||||
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
|
||||
}
|
||||
static DEVICE_ATTR_RO(boardversion);
|
||||
|
||||
static ssize_t show_nctxts(struct device *device,
|
||||
static ssize_t nctxts_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
|
||||
min(dd->num_user_contexts,
|
||||
(u32)dd->sc_sizes[SC_USER].count));
|
||||
}
|
||||
static DEVICE_ATTR_RO(nctxts);
|
||||
|
||||
static ssize_t show_nfreectxts(struct device *device,
|
||||
static ssize_t nfreectxts_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
|
||||
/* Return the number of free user ports (contexts) available. */
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
|
||||
}
|
||||
static DEVICE_ATTR_RO(nfreectxts);
|
||||
|
||||
static ssize_t show_serial(struct device *device,
|
||||
static ssize_t serial_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
|
||||
}
|
||||
static DEVICE_ATTR_RO(serial);
|
||||
|
||||
static ssize_t store_chip_reset(struct device *device,
|
||||
static ssize_t chip_reset_store(struct device *device,
|
||||
struct device_attribute *attr, const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
|
||||
bail:
|
||||
return ret < 0 ? ret : count;
|
||||
}
|
||||
static DEVICE_ATTR_WO(chip_reset);
|
||||
|
||||
/*
|
||||
* Convert the reported temperature from an integer (reported in
|
||||
@ -598,7 +605,7 @@ static ssize_t store_chip_reset(struct device *device,
|
||||
/*
|
||||
* Dump tempsense values, in decimal, to ease shell-scripts.
|
||||
*/
|
||||
static ssize_t show_tempsense(struct device *device,
|
||||
static ssize_t tempsense_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hfi1_ibdev *dev =
|
||||
@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
static DEVICE_ATTR_RO(tempsense);
|
||||
|
||||
/*
|
||||
* end of per-unit (or driver, in some cases, but replicated
|
||||
@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
|
||||
*/
|
||||
|
||||
/* start of per-unit file structures and support code */
|
||||
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
|
||||
static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
|
||||
static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
|
||||
static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
|
||||
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
|
||||
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
|
||||
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
|
||||
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
|
||||
static struct attribute *hfi1_attributes[] = {
|
||||
&dev_attr_hw_rev.attr,
|
||||
&dev_attr_board_id.attr,
|
||||
&dev_attr_nctxts.attr,
|
||||
&dev_attr_nfreectxts.attr,
|
||||
&dev_attr_serial.attr,
|
||||
&dev_attr_boardversion.attr,
|
||||
&dev_attr_tempsense.attr,
|
||||
&dev_attr_chip_reset.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct device_attribute *hfi1_attributes[] = {
|
||||
&dev_attr_hw_rev,
|
||||
&dev_attr_board_id,
|
||||
&dev_attr_nctxts,
|
||||
&dev_attr_nfreectxts,
|
||||
&dev_attr_serial,
|
||||
&dev_attr_boardversion,
|
||||
&dev_attr_tempsense,
|
||||
&dev_attr_chip_reset,
|
||||
const struct attribute_group ib_hfi1_attr_group = {
|
||||
.attrs = hfi1_attributes,
|
||||
};
|
||||
|
||||
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
|
||||
@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
|
||||
struct device *class_dev = &dev->dev;
|
||||
int i, j, ret;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
|
||||
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
|
||||
if (ret)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
for (i = 0; i < dd->num_sdma; i++) {
|
||||
ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
|
||||
&sde_ktype, &class_dev->kobj,
|
||||
@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
|
||||
device_remove_file(&dev->dev, hfi1_attributes[i]);
|
||||
|
||||
for (i = 0; i < dd->num_sdma; i++)
|
||||
kobject_del(&dd->per_sdma[i].kobj);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2015 - 2017 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -62,3 +62,4 @@ __print_symbolic(etype, \
|
||||
#include "trace_rx.h"
|
||||
#include "trace_tx.h"
|
||||
#include "trace_mmu.h"
|
||||
#include "trace_iowait.h"
|
||||
|
54
drivers/infiniband/hw/hfi1/trace_iowait.h
Normal file
54
drivers/infiniband/hw/hfi1/trace_iowait.h
Normal file
@ -0,0 +1,54 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
*/
|
||||
#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define __HFI1_TRACE_IOWAIT_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include "iowait.h"
|
||||
#include "verbs.h"
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM hfi1_iowait
|
||||
|
||||
DECLARE_EVENT_CLASS(hfi1_iowait_template,
|
||||
TP_PROTO(struct iowait *wait, u32 flag),
|
||||
TP_ARGS(wait, flag),
|
||||
TP_STRUCT__entry(/* entry */
|
||||
__field(unsigned long, addr)
|
||||
__field(unsigned long, flags)
|
||||
__field(u32, flag)
|
||||
__field(u32, qpn)
|
||||
),
|
||||
TP_fast_assign(/* assign */
|
||||
__entry->addr = (unsigned long)wait;
|
||||
__entry->flags = wait->flags;
|
||||
__entry->flag = (1 << flag);
|
||||
__entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
|
||||
),
|
||||
TP_printk(/* print */
|
||||
"iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
|
||||
__entry->addr,
|
||||
__entry->qpn,
|
||||
__entry->flags,
|
||||
__entry->flag
|
||||
)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
|
||||
TP_PROTO(struct iowait *wait, u32 flag),
|
||||
TP_ARGS(wait, flag));
|
||||
|
||||
DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
|
||||
TP_PROTO(struct iowait *wait, u32 flag),
|
||||
TP_ARGS(wait, flag));
|
||||
|
||||
#endif /* __HFI1_TRACE_IOWAIT_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#define TRACE_INCLUDE_FILE trace_iowait
|
||||
#include <trace/define_trace.h>
|
@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
}
|
||||
clear_ahg(qp);
|
||||
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
|
||||
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
|
||||
rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
|
||||
goto done_free_tx;
|
||||
}
|
||||
|
||||
@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
qp, wqe->wr.ex.invalidate_rkey);
|
||||
local_ops = 1;
|
||||
}
|
||||
hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
|
||||
rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
|
||||
: IB_WC_SUCCESS);
|
||||
if (local_ops)
|
||||
atomic_dec(&qp->local_ops_pending);
|
||||
@ -426,7 +426,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
|
||||
qp->r_rcv_len += pmtu;
|
||||
if (unlikely(qp->r_rcv_len > qp->r_len))
|
||||
goto rewind;
|
||||
hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
|
||||
break;
|
||||
|
||||
case OP(SEND_LAST_WITH_IMMEDIATE):
|
||||
@ -449,7 +449,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
|
||||
if (unlikely(wc.byte_len > qp->r_len))
|
||||
goto rewind;
|
||||
wc.opcode = IB_WC_RECV;
|
||||
hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
|
||||
rvt_put_ss(&qp->s_rdma_read_sge);
|
||||
last_imm:
|
||||
wc.wr_id = qp->r_wr_id;
|
||||
@ -523,7 +523,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
|
||||
qp->r_rcv_len += pmtu;
|
||||
if (unlikely(qp->r_rcv_len > qp->r_len))
|
||||
goto drop;
|
||||
hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
|
||||
break;
|
||||
|
||||
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
|
||||
@ -550,7 +550,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
|
||||
}
|
||||
wc.byte_len = qp->r_len;
|
||||
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
|
||||
hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
|
||||
rvt_put_ss(&qp->r_sge);
|
||||
goto last_imm;
|
||||
|
||||
@ -564,7 +564,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
|
||||
tlen -= (hdrsize + extra_bytes);
|
||||
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
|
||||
goto drop;
|
||||
hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
|
||||
rvt_put_ss(&qp->r_sge);
|
||||
break;
|
||||
|
||||
|
@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
|
||||
}
|
||||
|
||||
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
|
||||
hfi1_copy_sge(&qp->r_sge, &grh,
|
||||
sizeof(grh), true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, &grh,
|
||||
sizeof(grh), true, false);
|
||||
wc.wc_flags |= IB_WC_GRH;
|
||||
} else {
|
||||
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
|
||||
@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
|
||||
if (len > sge->sge_length)
|
||||
len = sge->sge_length;
|
||||
WARN_ON_ONCE(len == 0);
|
||||
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
|
||||
sge->vaddr += len;
|
||||
sge->length -= len;
|
||||
sge->sge_length -= len;
|
||||
@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
goto bail;
|
||||
}
|
||||
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
|
||||
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
|
||||
rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
|
||||
goto done_free_tx;
|
||||
}
|
||||
|
||||
@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
ud_loopback(qp, wqe);
|
||||
spin_lock_irqsave(&qp->s_lock, tflags);
|
||||
ps->flags = tflags;
|
||||
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
|
||||
rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
|
||||
goto done_free_tx;
|
||||
}
|
||||
}
|
||||
@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
|
||||
goto drop;
|
||||
}
|
||||
if (packet->grh) {
|
||||
hfi1_copy_sge(&qp->r_sge, packet->grh,
|
||||
sizeof(struct ib_grh), true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, packet->grh,
|
||||
sizeof(struct ib_grh), true, false);
|
||||
wc.wc_flags |= IB_WC_GRH;
|
||||
} else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
|
||||
struct ib_grh grh;
|
||||
@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
|
||||
* out when creating 16B, add back the GRH here.
|
||||
*/
|
||||
hfi1_make_ext_grh(packet, &grh, slid, dlid);
|
||||
hfi1_copy_sge(&qp->r_sge, &grh,
|
||||
sizeof(struct ib_grh), true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, &grh,
|
||||
sizeof(struct ib_grh), true, false);
|
||||
wc.wc_flags |= IB_WC_GRH;
|
||||
} else {
|
||||
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
|
||||
}
|
||||
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
|
||||
true, false);
|
||||
rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
|
||||
true, false);
|
||||
rvt_put_ss(&qp->r_sge);
|
||||
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
|
||||
return;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2015 - 2017 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
|
||||
|
||||
static unsigned initial_pkt_count = 8;
|
||||
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *req,
|
||||
unsigned maxpkts);
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
|
||||
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
|
||||
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
|
||||
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
|
||||
@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
|
||||
|
||||
static int defer_packet_queue(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *txreq,
|
||||
uint seq,
|
||||
bool pkts_sent);
|
||||
@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
|
||||
|
||||
static int defer_packet_queue(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *txreq,
|
||||
uint seq,
|
||||
bool pkts_sent)
|
||||
{
|
||||
struct hfi1_user_sdma_pkt_q *pq =
|
||||
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
|
||||
container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
|
||||
struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
|
||||
struct user_sdma_txreq *tx =
|
||||
container_of(txreq, struct user_sdma_txreq, txreq);
|
||||
@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
|
||||
pq->ctxt = uctxt->ctxt;
|
||||
pq->subctxt = fd->subctxt;
|
||||
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
|
||||
pq->state = SDMA_PKT_Q_INACTIVE;
|
||||
atomic_set(&pq->n_reqs, 0);
|
||||
init_waitqueue_head(&pq->wait);
|
||||
atomic_set(&pq->n_locked, 0);
|
||||
pq->mm = fd->mm;
|
||||
|
||||
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
|
||||
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
|
||||
activate_packet_queue, NULL);
|
||||
pq->reqidx = 0;
|
||||
|
||||
@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
|
||||
/* Wait until all requests have been freed. */
|
||||
wait_event_interruptible(
|
||||
pq->wait,
|
||||
(READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
|
||||
!atomic_read(&pq->n_reqs));
|
||||
kfree(pq->reqs);
|
||||
kfree(pq->req_in_use);
|
||||
kmem_cache_destroy(pq->txreq_cache);
|
||||
@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
|
||||
return mapping[hash];
|
||||
}
|
||||
|
||||
/**
|
||||
* hfi1_user_sdma_process_request() - Process and start a user sdma request
|
||||
* @fd: valid file descriptor
|
||||
* @iovec: array of io vectors to process
|
||||
* @dim: overall iovec array size
|
||||
* @count: number of io vector array entries processed
|
||||
*/
|
||||
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
struct iovec *iovec, unsigned long dim,
|
||||
unsigned long *count)
|
||||
@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
u8 opcode, sc, vl;
|
||||
u16 pkey;
|
||||
u32 slid;
|
||||
int req_queued = 0;
|
||||
u16 dlid;
|
||||
u32 selector;
|
||||
|
||||
@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
req->data_len = 0;
|
||||
req->pq = pq;
|
||||
req->cq = cq;
|
||||
req->status = -1;
|
||||
req->ahg_idx = -1;
|
||||
req->iov_idx = 0;
|
||||
req->sent = 0;
|
||||
@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
req->seqcomp = 0;
|
||||
req->seqsubmitted = 0;
|
||||
req->tids = NULL;
|
||||
req->done = 0;
|
||||
req->has_error = 0;
|
||||
INIT_LIST_HEAD(&req->txps);
|
||||
|
||||
memcpy(&req->info, &info, sizeof(info));
|
||||
|
||||
/* The request is initialized, count it */
|
||||
atomic_inc(&pq->n_reqs);
|
||||
|
||||
if (req_opcode(info.ctrl) == EXPECTED) {
|
||||
/* expected must have a TID info and at least one data vector */
|
||||
if (req->data_iovs < 2) {
|
||||
@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
ret = pin_vector_pages(req, &req->iovs[i]);
|
||||
if (ret) {
|
||||
req->data_iovs = i;
|
||||
req->status = ret;
|
||||
goto free_req;
|
||||
}
|
||||
req->data_len += req->iovs[i].iov.iov_len;
|
||||
@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
req->ahg_idx = sdma_ahg_alloc(req->sde);
|
||||
|
||||
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
|
||||
atomic_inc(&pq->n_reqs);
|
||||
req_queued = 1;
|
||||
pq->state = SDMA_PKT_Q_ACTIVE;
|
||||
/* Send the first N packets in the request to buy us some time */
|
||||
ret = user_sdma_send_pkts(req, pcount);
|
||||
if (unlikely(ret < 0 && ret != -EBUSY)) {
|
||||
req->status = ret;
|
||||
if (unlikely(ret < 0 && ret != -EBUSY))
|
||||
goto free_req;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is possible that the SDMA engine would have processed all the
|
||||
* submitted packets by the time we get here. Therefore, only set
|
||||
* packet queue state to ACTIVE if there are still uncompleted
|
||||
* requests.
|
||||
*/
|
||||
if (atomic_read(&pq->n_reqs))
|
||||
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
|
||||
|
||||
/*
|
||||
* This is a somewhat blocking send implementation.
|
||||
@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
while (req->seqsubmitted != req->info.npkts) {
|
||||
ret = user_sdma_send_pkts(req, pcount);
|
||||
if (ret < 0) {
|
||||
if (ret != -EBUSY) {
|
||||
req->status = ret;
|
||||
WRITE_ONCE(req->has_error, 1);
|
||||
if (READ_ONCE(req->seqcomp) ==
|
||||
req->seqsubmitted - 1)
|
||||
goto free_req;
|
||||
return ret;
|
||||
}
|
||||
if (ret != -EBUSY)
|
||||
goto free_req;
|
||||
wait_event_interruptible_timeout(
|
||||
pq->busy.wait_dma,
|
||||
(pq->state == SDMA_PKT_Q_ACTIVE),
|
||||
@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
|
||||
*count += idx;
|
||||
return 0;
|
||||
free_req:
|
||||
user_sdma_free_request(req, true);
|
||||
if (req_queued)
|
||||
/*
|
||||
* If the submitted seqsubmitted == npkts, the completion routine
|
||||
* controls the final state. If sequbmitted < npkts, wait for any
|
||||
* outstanding packets to finish before cleaning up.
|
||||
*/
|
||||
if (req->seqsubmitted < req->info.npkts) {
|
||||
if (req->seqsubmitted)
|
||||
wait_event(pq->busy.wait_dma,
|
||||
(req->seqcomp == req->seqsubmitted - 1));
|
||||
user_sdma_free_request(req, true);
|
||||
pq_update(pq);
|
||||
set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
|
||||
set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
|
||||
{
|
||||
int ret = 0, count;
|
||||
int ret = 0;
|
||||
u16 count;
|
||||
unsigned npkts = 0;
|
||||
struct user_sdma_txreq *tx = NULL;
|
||||
struct hfi1_user_sdma_pkt_q *pq = NULL;
|
||||
@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
|
||||
changes = set_txreq_header_ahg(req, tx,
|
||||
datalen);
|
||||
if (changes < 0)
|
||||
if (changes < 0) {
|
||||
ret = changes;
|
||||
goto free_tx;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
|
||||
@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
npkts++;
|
||||
}
|
||||
dosend:
|
||||
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
|
||||
ret = sdma_send_txlist(req->sde,
|
||||
iowait_get_ib_work(&pq->busy),
|
||||
&req->txps, &count);
|
||||
req->seqsubmitted += count;
|
||||
if (req->seqsubmitted == req->info.npkts) {
|
||||
WRITE_ONCE(req->done, 1);
|
||||
/*
|
||||
* The txreq has already been submitted to the HW queue
|
||||
* so we can free the AHG entry now. Corruption will not
|
||||
@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
|
||||
return idx;
|
||||
}
|
||||
|
||||
/*
|
||||
* SDMA tx request completion callback. Called when the SDMA progress
|
||||
* state machine gets notification that the SDMA descriptors for this
|
||||
* tx request have been processed by the DMA engine. Called in
|
||||
* interrupt context.
|
||||
/**
|
||||
* user_sdma_txreq_cb() - SDMA tx request completion callback.
|
||||
* @txreq: valid sdma tx request
|
||||
* @status: success/failure of request
|
||||
*
|
||||
* Called when the SDMA progress state machine gets notification that
|
||||
* the SDMA descriptors for this tx request have been processed by the
|
||||
* DMA engine. Called in interrupt context.
|
||||
* Only do work on completed sequences.
|
||||
*/
|
||||
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
|
||||
{
|
||||
@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
|
||||
struct user_sdma_request *req;
|
||||
struct hfi1_user_sdma_pkt_q *pq;
|
||||
struct hfi1_user_sdma_comp_q *cq;
|
||||
u16 idx;
|
||||
enum hfi1_sdma_comp_state state = COMPLETE;
|
||||
|
||||
if (!tx->req)
|
||||
return;
|
||||
@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
|
||||
SDMA_DBG(req, "SDMA completion with error %d",
|
||||
status);
|
||||
WRITE_ONCE(req->has_error, 1);
|
||||
state = ERROR;
|
||||
}
|
||||
|
||||
req->seqcomp = tx->seqnum;
|
||||
kmem_cache_free(pq->txreq_cache, tx);
|
||||
tx = NULL;
|
||||
|
||||
idx = req->info.comp_idx;
|
||||
if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
|
||||
if (req->seqcomp == req->info.npkts - 1) {
|
||||
req->status = 0;
|
||||
user_sdma_free_request(req, false);
|
||||
pq_update(pq);
|
||||
set_comp_state(pq, cq, idx, COMPLETE, 0);
|
||||
}
|
||||
} else {
|
||||
if (status != SDMA_TXREQ_S_OK)
|
||||
req->status = status;
|
||||
if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
|
||||
(READ_ONCE(req->done) ||
|
||||
READ_ONCE(req->has_error))) {
|
||||
user_sdma_free_request(req, false);
|
||||
pq_update(pq);
|
||||
set_comp_state(pq, cq, idx, ERROR, req->status);
|
||||
}
|
||||
}
|
||||
/* sequence isn't complete? We are done */
|
||||
if (req->seqcomp != req->info.npkts - 1)
|
||||
return;
|
||||
|
||||
user_sdma_free_request(req, false);
|
||||
set_comp_state(pq, cq, req->info.comp_idx, state, status);
|
||||
pq_update(pq);
|
||||
}
|
||||
|
||||
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
|
||||
{
|
||||
if (atomic_dec_and_test(&pq->n_reqs)) {
|
||||
xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
|
||||
if (atomic_dec_and_test(&pq->n_reqs))
|
||||
wake_up(&pq->wait);
|
||||
}
|
||||
}
|
||||
|
||||
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
|
||||
@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
|
||||
if (!node)
|
||||
continue;
|
||||
|
||||
req->iovs[i].node = NULL;
|
||||
|
||||
if (unpin)
|
||||
hfi1_mmu_rb_remove(req->pq->handler,
|
||||
&node->rb);
|
||||
|
@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
|
||||
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
|
||||
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
|
||||
|
||||
#define SDMA_PKT_Q_INACTIVE BIT(0)
|
||||
#define SDMA_PKT_Q_ACTIVE BIT(1)
|
||||
#define SDMA_PKT_Q_DEFERRED BIT(2)
|
||||
enum pkt_q_sdma_state {
|
||||
SDMA_PKT_Q_ACTIVE,
|
||||
SDMA_PKT_Q_DEFERRED,
|
||||
};
|
||||
|
||||
/*
|
||||
* Maximum retry attempts to submit a TX request
|
||||
@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
|
||||
struct user_sdma_request *reqs;
|
||||
unsigned long *req_in_use;
|
||||
struct iowait busy;
|
||||
unsigned state;
|
||||
enum pkt_q_sdma_state state;
|
||||
wait_queue_head_t wait;
|
||||
unsigned long unpinned;
|
||||
struct mmu_rb_handler *handler;
|
||||
@ -203,14 +204,12 @@ struct user_sdma_request {
|
||||
s8 ahg_idx;
|
||||
|
||||
/* Writeable fields shared with interrupt */
|
||||
u64 seqcomp ____cacheline_aligned_in_smp;
|
||||
u64 seqsubmitted;
|
||||
/* status of the last txreq completed */
|
||||
int status;
|
||||
u16 seqcomp ____cacheline_aligned_in_smp;
|
||||
u16 seqsubmitted;
|
||||
|
||||
/* Send side fields */
|
||||
struct list_head txps ____cacheline_aligned_in_smp;
|
||||
u64 seqnum;
|
||||
u16 seqnum;
|
||||
/*
|
||||
* KDETH.OFFSET (TID) field
|
||||
* The offset can cover multiple packets, depending on the
|
||||
@ -228,7 +227,6 @@ struct user_sdma_request {
|
||||
u16 tididx;
|
||||
/* progress index moving along the iovs array */
|
||||
u8 iov_idx;
|
||||
u8 done;
|
||||
u8 has_error;
|
||||
|
||||
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
|
||||
@ -248,7 +246,7 @@ struct user_sdma_txreq {
|
||||
struct user_sdma_request *req;
|
||||
u16 flags;
|
||||
unsigned int busycount;
|
||||
u64 seqnum;
|
||||
u16 seqnum;
|
||||
};
|
||||
|
||||
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
|
||||
|
@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
|
||||
module_param(piothreshold, ushort, S_IRUGO);
|
||||
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
|
||||
|
||||
#define COPY_CACHELESS 1
|
||||
#define COPY_ADAPTIVE 2
|
||||
static unsigned int sge_copy_mode;
|
||||
module_param(sge_copy_mode, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(sge_copy_mode,
|
||||
@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
|
||||
/* 16B trailing buffer */
|
||||
static const u8 trail_buf[MAX_16B_PADDING];
|
||||
|
||||
static uint wss_threshold;
|
||||
static uint wss_threshold = 80;
|
||||
module_param(wss_threshold, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
|
||||
static uint wss_clean_period = 256;
|
||||
module_param(wss_clean_period, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
|
||||
|
||||
/* memory working set size */
|
||||
struct hfi1_wss {
|
||||
unsigned long *entries;
|
||||
atomic_t total_count;
|
||||
atomic_t clean_counter;
|
||||
atomic_t clean_entry;
|
||||
|
||||
int threshold;
|
||||
int num_entries;
|
||||
long pages_mask;
|
||||
};
|
||||
|
||||
static struct hfi1_wss wss;
|
||||
|
||||
int hfi1_wss_init(void)
|
||||
{
|
||||
long llc_size;
|
||||
long llc_bits;
|
||||
long table_size;
|
||||
long table_bits;
|
||||
|
||||
/* check for a valid percent range - default to 80 if none or invalid */
|
||||
if (wss_threshold < 1 || wss_threshold > 100)
|
||||
wss_threshold = 80;
|
||||
/* reject a wildly large period */
|
||||
if (wss_clean_period > 1000000)
|
||||
wss_clean_period = 256;
|
||||
/* reject a zero period */
|
||||
if (wss_clean_period == 0)
|
||||
wss_clean_period = 1;
|
||||
|
||||
/*
|
||||
* Calculate the table size - the next power of 2 larger than the
|
||||
* LLC size. LLC size is in KiB.
|
||||
*/
|
||||
llc_size = wss_llc_size() * 1024;
|
||||
table_size = roundup_pow_of_two(llc_size);
|
||||
|
||||
/* one bit per page in rounded up table */
|
||||
llc_bits = llc_size / PAGE_SIZE;
|
||||
table_bits = table_size / PAGE_SIZE;
|
||||
wss.pages_mask = table_bits - 1;
|
||||
wss.num_entries = table_bits / BITS_PER_LONG;
|
||||
|
||||
wss.threshold = (llc_bits * wss_threshold) / 100;
|
||||
if (wss.threshold == 0)
|
||||
wss.threshold = 1;
|
||||
|
||||
atomic_set(&wss.clean_counter, wss_clean_period);
|
||||
|
||||
wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
|
||||
GFP_KERNEL);
|
||||
if (!wss.entries) {
|
||||
hfi1_wss_exit();
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hfi1_wss_exit(void)
|
||||
{
|
||||
/* coded to handle partially initialized and repeat callers */
|
||||
kfree(wss.entries);
|
||||
wss.entries = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the clean counter. When the clean period has expired,
|
||||
* clean an entry.
|
||||
*
|
||||
* This is implemented in atomics to avoid locking. Because multiple
|
||||
* variables are involved, it can be racy which can lead to slightly
|
||||
* inaccurate information. Since this is only a heuristic, this is
|
||||
* OK. Any innaccuracies will clean themselves out as the counter
|
||||
* advances. That said, it is unlikely the entry clean operation will
|
||||
* race - the next possible racer will not start until the next clean
|
||||
* period.
|
||||
*
|
||||
* The clean counter is implemented as a decrement to zero. When zero
|
||||
* is reached an entry is cleaned.
|
||||
*/
|
||||
static void wss_advance_clean_counter(void)
|
||||
{
|
||||
int entry;
|
||||
int weight;
|
||||
unsigned long bits;
|
||||
|
||||
/* become the cleaner if we decrement the counter to zero */
|
||||
if (atomic_dec_and_test(&wss.clean_counter)) {
|
||||
/*
|
||||
* Set, not add, the clean period. This avoids an issue
|
||||
* where the counter could decrement below the clean period.
|
||||
* Doing a set can result in lost decrements, slowing the
|
||||
* clean advance. Since this a heuristic, this possible
|
||||
* slowdown is OK.
|
||||
*
|
||||
* An alternative is to loop, advancing the counter by a
|
||||
* clean period until the result is > 0. However, this could
|
||||
* lead to several threads keeping another in the clean loop.
|
||||
* This could be mitigated by limiting the number of times
|
||||
* we stay in the loop.
|
||||
*/
|
||||
atomic_set(&wss.clean_counter, wss_clean_period);
|
||||
|
||||
/*
|
||||
* Uniquely grab the entry to clean and move to next.
|
||||
* The current entry is always the lower bits of
|
||||
* wss.clean_entry. The table size, wss.num_entries,
|
||||
* is always a power-of-2.
|
||||
*/
|
||||
entry = (atomic_inc_return(&wss.clean_entry) - 1)
|
||||
& (wss.num_entries - 1);
|
||||
|
||||
/* clear the entry and count the bits */
|
||||
bits = xchg(&wss.entries[entry], 0);
|
||||
weight = hweight64((u64)bits);
|
||||
/* only adjust the contended total count if needed */
|
||||
if (weight)
|
||||
atomic_sub(weight, &wss.total_count);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the given address into the working set array.
|
||||
*/
|
||||
static void wss_insert(void *address)
|
||||
{
|
||||
u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
|
||||
u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
|
||||
u32 nr = page & (BITS_PER_LONG - 1);
|
||||
|
||||
if (!test_and_set_bit(nr, &wss.entries[entry]))
|
||||
atomic_inc(&wss.total_count);
|
||||
|
||||
wss_advance_clean_counter();
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the working set larger than the threshold?
|
||||
*/
|
||||
static inline bool wss_exceeds_threshold(void)
|
||||
{
|
||||
return atomic_read(&wss.total_count) >= wss.threshold;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate ib_wr_opcode into ib_wc_opcode.
|
||||
*/
|
||||
@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
|
||||
*/
|
||||
__be64 ib_hfi1_sys_image_guid;
|
||||
|
||||
/**
|
||||
* hfi1_copy_sge - copy data to SGE memory
|
||||
* @ss: the SGE state
|
||||
* @data: the data to copy
|
||||
* @length: the length of the data
|
||||
* @release: boolean to release MR
|
||||
* @copy_last: do a separate copy of the last 8 bytes
|
||||
*/
|
||||
void hfi1_copy_sge(
|
||||
struct rvt_sge_state *ss,
|
||||
void *data, u32 length,
|
||||
bool release,
|
||||
bool copy_last)
|
||||
{
|
||||
struct rvt_sge *sge = &ss->sge;
|
||||
int i;
|
||||
bool in_last = false;
|
||||
bool cacheless_copy = false;
|
||||
|
||||
if (sge_copy_mode == COPY_CACHELESS) {
|
||||
cacheless_copy = length >= PAGE_SIZE;
|
||||
} else if (sge_copy_mode == COPY_ADAPTIVE) {
|
||||
if (length >= PAGE_SIZE) {
|
||||
/*
|
||||
* NOTE: this *assumes*:
|
||||
* o The first vaddr is the dest.
|
||||
* o If multiple pages, then vaddr is sequential.
|
||||
*/
|
||||
wss_insert(sge->vaddr);
|
||||
if (length >= (2 * PAGE_SIZE))
|
||||
wss_insert(sge->vaddr + PAGE_SIZE);
|
||||
|
||||
cacheless_copy = wss_exceeds_threshold();
|
||||
} else {
|
||||
wss_advance_clean_counter();
|
||||
}
|
||||
}
|
||||
if (copy_last) {
|
||||
if (length > 8) {
|
||||
length -= 8;
|
||||
} else {
|
||||
copy_last = false;
|
||||
in_last = true;
|
||||
}
|
||||
}
|
||||
|
||||
again:
|
||||
while (length) {
|
||||
u32 len = rvt_get_sge_length(sge, length);
|
||||
|
||||
WARN_ON_ONCE(len == 0);
|
||||
if (unlikely(in_last)) {
|
||||
/* enforce byte transfer ordering */
|
||||
for (i = 0; i < len; i++)
|
||||
((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
|
||||
} else if (cacheless_copy) {
|
||||
cacheless_memcpy(sge->vaddr, data, len);
|
||||
} else {
|
||||
memcpy(sge->vaddr, data, len);
|
||||
}
|
||||
rvt_update_sge(ss, len, release);
|
||||
data += len;
|
||||
length -= len;
|
||||
}
|
||||
|
||||
if (copy_last) {
|
||||
copy_last = false;
|
||||
in_last = true;
|
||||
length = 8;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the QP is ready and able to accept the given opcode.
|
||||
*/
|
||||
@ -713,7 +492,7 @@ static void verbs_sdma_complete(
|
||||
|
||||
spin_lock(&qp->s_lock);
|
||||
if (tx->wqe) {
|
||||
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
|
||||
rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
|
||||
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
|
||||
struct hfi1_opa_header *hdr;
|
||||
|
||||
@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
|
||||
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
|
||||
write_seqlock(&dev->iowait_lock);
|
||||
list_add_tail(&ps->s_txreq->txreq.list,
|
||||
&priv->s_iowait.tx_head);
|
||||
&ps->wait->tx_head);
|
||||
if (list_empty(&priv->s_iowait.list)) {
|
||||
if (list_empty(&dev->memwait))
|
||||
mod_timer(&dev->mem_timer, jiffies + 1);
|
||||
@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
|
||||
rvt_get_qp(qp);
|
||||
}
|
||||
write_sequnlock(&dev->iowait_lock);
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
hfi1_qp_unbusy(qp, ps->wait);
|
||||
ret = -EBUSY;
|
||||
}
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
if (unlikely(ret))
|
||||
goto bail_build;
|
||||
}
|
||||
ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
|
||||
ps->pkts_sent);
|
||||
ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
|
||||
if (unlikely(ret < 0)) {
|
||||
if (ret == -ECOMM)
|
||||
goto bail_ecomm;
|
||||
@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
|
||||
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
|
||||
write_seqlock(&dev->iowait_lock);
|
||||
list_add_tail(&ps->s_txreq->txreq.list,
|
||||
&priv->s_iowait.tx_head);
|
||||
&ps->wait->tx_head);
|
||||
if (list_empty(&priv->s_iowait.list)) {
|
||||
struct hfi1_ibdev *dev = &dd->verbs_dev;
|
||||
int was_empty;
|
||||
@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
|
||||
hfi1_sc_wantpiobuf_intr(sc, 1);
|
||||
}
|
||||
write_sequnlock(&dev->iowait_lock);
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
hfi1_qp_unbusy(qp, ps->wait);
|
||||
ret = -EBUSY;
|
||||
}
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
pio_bail:
|
||||
if (qp->s_wqe) {
|
||||
spin_lock_irqsave(&qp->s_lock, flags);
|
||||
hfi1_send_complete(qp, qp->s_wqe, wc_status);
|
||||
rvt_send_complete(qp, qp->s_wqe, wc_status);
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
|
||||
spin_lock_irqsave(&qp->s_lock, flags);
|
||||
@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
|
||||
__func__);
|
||||
spin_lock_irqsave(&qp->s_lock, flags);
|
||||
hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
|
||||
rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
}
|
||||
return -EINVAL;
|
||||
@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
|
||||
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
|
||||
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
|
||||
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
|
||||
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
|
||||
dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
|
||||
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
|
||||
hfi1_comp_vect_mappings_lookup;
|
||||
|
||||
@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
|
||||
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
|
||||
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
|
||||
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
|
||||
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
|
||||
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
|
||||
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
|
||||
|
||||
/* post send table */
|
||||
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
|
||||
|
||||
/* opcode translation table */
|
||||
dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
|
||||
|
||||
ppd = dd->pport;
|
||||
for (i = 0; i < dd->num_pports; i++, ppd++)
|
||||
rvt_init_port(&dd->verbs_dev.rdi,
|
||||
@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
|
||||
i,
|
||||
ppd->pkeys);
|
||||
|
||||
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
|
||||
&ib_hfi1_attr_group);
|
||||
|
||||
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
|
||||
if (ret)
|
||||
goto err_verbs_txreq;
|
||||
|
@ -166,11 +166,13 @@ struct hfi1_qp_priv {
|
||||
* This structure is used to hold commonly lookedup and computed values during
|
||||
* the send engine progress.
|
||||
*/
|
||||
struct iowait_work;
|
||||
struct hfi1_pkt_state {
|
||||
struct hfi1_ibdev *dev;
|
||||
struct hfi1_ibport *ibp;
|
||||
struct hfi1_pportdata *ppd;
|
||||
struct verbs_txreq *s_txreq;
|
||||
struct iowait_work *wait;
|
||||
unsigned long flags;
|
||||
unsigned long timeout;
|
||||
unsigned long timeout_int;
|
||||
@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
|
||||
return container_of(rdi, struct hfi1_ibdev, rdi);
|
||||
}
|
||||
|
||||
static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
|
||||
static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
|
||||
{
|
||||
struct hfi1_qp_priv *priv;
|
||||
|
||||
@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
|
||||
|
||||
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
|
||||
|
||||
void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
|
||||
bool release, bool copy_last);
|
||||
|
||||
void hfi1_cnp_rcv(struct hfi1_packet *packet);
|
||||
|
||||
void hfi1_uc_rcv(struct hfi1_packet *packet);
|
||||
@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
|
||||
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
|
||||
int attr_mask, struct ib_udata *udata);
|
||||
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
|
||||
int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
|
||||
int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
bool *call_send);
|
||||
|
||||
extern const u32 rc_only_opcode;
|
||||
extern const u32 uc_only_opcode;
|
||||
@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
|
||||
|
||||
void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
|
||||
|
||||
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
enum ib_wc_status status);
|
||||
|
||||
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
|
||||
|
||||
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
|
||||
@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
u64 pbc);
|
||||
|
||||
int hfi1_wss_init(void);
|
||||
void hfi1_wss_exit(void);
|
||||
|
||||
/* platform specific: return the lowest level cache (llc) size, in KiB */
|
||||
static inline int wss_llc_size(void)
|
||||
{
|
||||
/* assume that the boot CPU value is universal for all CPUs */
|
||||
return boot_cpu_data.x86_cache_size;
|
||||
}
|
||||
|
||||
/* platform specific: cacheless copy */
|
||||
static inline void cacheless_memcpy(void *dst, void *src, size_t n)
|
||||
{
|
||||
/*
|
||||
* Use the only available X64 cacheless copy. Add a __user cast
|
||||
* to quiet sparse. The src agument is already in the kernel so
|
||||
* there are no security issues. The extra fault recovery machinery
|
||||
* is not invoked.
|
||||
*/
|
||||
__copy_user_nocache(dst, (void __user *)src, n, 0);
|
||||
}
|
||||
|
||||
static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
|
||||
{
|
||||
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
|
||||
|
@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
|
||||
return &tx->txreq;
|
||||
}
|
||||
|
||||
static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
|
||||
static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
|
||||
{
|
||||
struct sdma_txreq *stx;
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
stx = iowait_get_txhead(&priv->s_iowait);
|
||||
stx = iowait_get_txhead(w);
|
||||
if (stx)
|
||||
return container_of(stx, struct verbs_txreq, txreq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool verbs_txreq_queued(struct rvt_qp *qp)
|
||||
static inline bool verbs_txreq_queued(struct iowait_work *w)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
return iowait_packet_queued(&priv->s_iowait);
|
||||
return iowait_packet_queued(w);
|
||||
}
|
||||
|
||||
void hfi1_put_txreq(struct verbs_txreq *tx);
|
||||
|
@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
|
||||
uctxt->seq_cnt = 1;
|
||||
uctxt->is_vnic = true;
|
||||
|
||||
hfi1_set_vnic_msix_info(uctxt);
|
||||
msix_request_rcd_irq(uctxt);
|
||||
|
||||
hfi1_stats.sps_ctxts++;
|
||||
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
|
||||
@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
|
||||
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
|
||||
flush_wc();
|
||||
|
||||
hfi1_reset_vnic_msix_info(uctxt);
|
||||
|
||||
/*
|
||||
* Disable receive context and interrupt available, reset all
|
||||
* RcvCtxtCtrl bits to default values.
|
||||
@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
|
||||
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
|
||||
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
|
||||
|
||||
/* msix_intr will always be > 0, only clean up if this is true */
|
||||
if (uctxt->msix_intr)
|
||||
msix_free_irq(dd, uctxt->msix_intr);
|
||||
|
||||
uctxt->event_flags = 0;
|
||||
|
||||
hfi1_clear_tids(uctxt);
|
||||
@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
|
||||
idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
|
||||
|
||||
/* ensure irqs see the change */
|
||||
hfi1_vnic_synchronize_irq(dd);
|
||||
msix_vnic_synchronize_irq(dd);
|
||||
|
||||
/* remove unread skbs */
|
||||
for (i = 0; i < vinfo->num_rx_q; i++) {
|
||||
@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
|
||||
rc = hfi1_vnic_txreq_init(dd);
|
||||
if (rc)
|
||||
goto txreq_fail;
|
||||
|
||||
dd->vnic.msix_idx = dd->first_dyn_msix_idx;
|
||||
}
|
||||
|
||||
for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2017 Intel Corporation.
|
||||
* Copyright(c) 2017 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
|
||||
goto free_desc;
|
||||
tx->retry_count = 0;
|
||||
|
||||
ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
|
||||
vnic_sdma->pkts_sent);
|
||||
ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
|
||||
&tx->txreq, vnic_sdma->pkts_sent);
|
||||
/* When -ECOMM, sdma callback will be called with ABORT status */
|
||||
if (unlikely(ret && unlikely(ret != -ECOMM)))
|
||||
goto free_desc;
|
||||
@ -230,13 +230,13 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
|
||||
* become available.
|
||||
*/
|
||||
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
|
||||
struct iowait *wait,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *txreq,
|
||||
uint seq,
|
||||
bool pkts_sent)
|
||||
{
|
||||
struct hfi1_vnic_sdma *vnic_sdma =
|
||||
container_of(wait, struct hfi1_vnic_sdma, wait);
|
||||
container_of(wait->iow, struct hfi1_vnic_sdma, wait);
|
||||
struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
|
||||
struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
|
||||
|
||||
@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
|
||||
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
|
||||
write_seqlock(&dev->iowait_lock);
|
||||
if (list_empty(&vnic_sdma->wait.list))
|
||||
iowait_queue(pkts_sent, wait, &sde->dmawait);
|
||||
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
|
||||
write_sequnlock(&dev->iowait_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
|
||||
for (i = 0; i < vinfo->num_tx_q; i++) {
|
||||
struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
|
||||
|
||||
iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
|
||||
iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
|
||||
hfi1_vnic_sdma_sleep,
|
||||
hfi1_vnic_sdma_wakeup, NULL);
|
||||
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
|
||||
vnic_sdma->dd = vinfo->dd;
|
||||
@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
|
||||
|
||||
/* Add a free descriptor watermark for wakeups */
|
||||
if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
|
||||
struct iowait_work *work;
|
||||
|
||||
INIT_LIST_HEAD(&vnic_sdma->stx.list);
|
||||
vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
|
||||
list_add_tail(&vnic_sdma->stx.list,
|
||||
&vnic_sdma->wait.tx_head);
|
||||
work = iowait_get_ib_work(&vnic_sdma->wait);
|
||||
list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
config INFINIBAND_HNS
|
||||
tristate "HNS RoCE Driver"
|
||||
depends on NET_VENDOR_HISILICON
|
||||
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
|
||||
depends on ARM64 || (COMPILE_TEST && 64BIT)
|
||||
---help---
|
||||
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
|
||||
|
@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
|
||||
struct hns_roce_ah *ah;
|
||||
u16 vlan_tag = 0xffff;
|
||||
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
|
||||
bool vlan_en = false;
|
||||
|
||||
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
|
||||
if (!ah)
|
||||
@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
|
||||
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
|
||||
|
||||
gid_attr = ah_attr->grh.sgid_attr;
|
||||
if (is_vlan_dev(gid_attr->ndev))
|
||||
if (is_vlan_dev(gid_attr->ndev)) {
|
||||
vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
|
||||
vlan_en = true;
|
||||
}
|
||||
|
||||
if (vlan_tag < 0x1000)
|
||||
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
|
||||
@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
|
||||
HNS_ROCE_PORT_NUM_SHIFT));
|
||||
ah->av.gid_index = grh->sgid_index;
|
||||
ah->av.vlan = cpu_to_le16(vlan_tag);
|
||||
ah->av.vlan_en = vlan_en;
|
||||
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
|
||||
ah->av.vlan);
|
||||
|
||||
|
@ -88,8 +88,11 @@
|
||||
#define BITMAP_RR 1
|
||||
|
||||
#define MR_TYPE_MR 0x00
|
||||
#define MR_TYPE_FRMR 0x01
|
||||
#define MR_TYPE_DMA 0x03
|
||||
|
||||
#define HNS_ROCE_FRMR_MAX_PA 512
|
||||
|
||||
#define PKEY_ID 0xffff
|
||||
#define GUID_LEN 8
|
||||
#define NODE_DESC_SIZE 64
|
||||
@ -193,6 +196,9 @@ enum {
|
||||
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
|
||||
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
|
||||
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
|
||||
HNS_ROCE_CAP_FLAG_MW = BIT(7),
|
||||
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
|
||||
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
|
||||
};
|
||||
|
||||
enum hns_roce_mtt_type {
|
||||
@ -219,19 +225,11 @@ struct hns_roce_uar {
|
||||
unsigned long logic_idx;
|
||||
};
|
||||
|
||||
struct hns_roce_vma_data {
|
||||
struct list_head list;
|
||||
struct vm_area_struct *vma;
|
||||
struct mutex *vma_list_mutex;
|
||||
};
|
||||
|
||||
struct hns_roce_ucontext {
|
||||
struct ib_ucontext ibucontext;
|
||||
struct hns_roce_uar uar;
|
||||
struct list_head page_list;
|
||||
struct mutex page_mutex;
|
||||
struct list_head vma_list;
|
||||
struct mutex vma_list_mutex;
|
||||
};
|
||||
|
||||
struct hns_roce_pd {
|
||||
@ -293,6 +291,16 @@ struct hns_roce_mtt {
|
||||
enum hns_roce_mtt_type mtt_type;
|
||||
};
|
||||
|
||||
struct hns_roce_mw {
|
||||
struct ib_mw ibmw;
|
||||
u32 pdn;
|
||||
u32 rkey;
|
||||
int enabled; /* MW's active status */
|
||||
u32 pbl_hop_num;
|
||||
u32 pbl_ba_pg_sz;
|
||||
u32 pbl_buf_pg_sz;
|
||||
};
|
||||
|
||||
/* Only support 4K page size for mr register */
|
||||
#define MR_SIZE_4K 0
|
||||
|
||||
@ -304,6 +312,7 @@ struct hns_roce_mr {
|
||||
u32 key; /* Key of MR */
|
||||
u32 pd; /* PD num of MR */
|
||||
u32 access;/* Access permission of MR */
|
||||
u32 npages;
|
||||
int enabled; /* MR's active status */
|
||||
int type; /* MR's register type */
|
||||
u64 *pbl_buf;/* MR's PBL space */
|
||||
@ -457,6 +466,7 @@ struct hns_roce_av {
|
||||
u8 dgid[HNS_ROCE_GID_SIZE];
|
||||
u8 mac[6];
|
||||
__le16 vlan;
|
||||
bool vlan_en;
|
||||
};
|
||||
|
||||
struct hns_roce_ah {
|
||||
@ -656,6 +666,7 @@ struct hns_roce_eq_table {
|
||||
};
|
||||
|
||||
struct hns_roce_caps {
|
||||
u64 fw_ver;
|
||||
u8 num_ports;
|
||||
int gid_table_len[HNS_ROCE_MAX_PORTS];
|
||||
int pkey_table_len[HNS_ROCE_MAX_PORTS];
|
||||
@ -665,7 +676,9 @@ struct hns_roce_caps {
|
||||
u32 max_sq_sg; /* 2 */
|
||||
u32 max_sq_inline; /* 32 */
|
||||
u32 max_rq_sg; /* 2 */
|
||||
u32 max_extend_sg;
|
||||
int num_qps; /* 256k */
|
||||
int reserved_qps;
|
||||
u32 max_wqes; /* 16k */
|
||||
u32 max_sq_desc_sz; /* 64 */
|
||||
u32 max_rq_desc_sz; /* 64 */
|
||||
@ -738,6 +751,7 @@ struct hns_roce_work {
|
||||
struct hns_roce_dev *hr_dev;
|
||||
struct work_struct work;
|
||||
u32 qpn;
|
||||
u32 cqn;
|
||||
int event_type;
|
||||
int sub_type;
|
||||
};
|
||||
@ -764,6 +778,8 @@ struct hns_roce_hw {
|
||||
struct hns_roce_mr *mr, int flags, u32 pdn,
|
||||
int mr_access_flags, u64 iova, u64 size,
|
||||
void *mb_buf);
|
||||
int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
|
||||
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
|
||||
void (*write_cqc)(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
|
||||
dma_addr_t dma_handle, int nent, u32 vector);
|
||||
@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
|
||||
return container_of(ibmr, struct hns_roce_mr, ibmr);
|
||||
}
|
||||
|
||||
static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
|
||||
{
|
||||
return container_of(ibmw, struct hns_roce_mw, ibmw);
|
||||
}
|
||||
|
||||
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
|
||||
{
|
||||
return container_of(ibqp, struct hns_roce_qp, ibqp);
|
||||
@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
||||
int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
|
||||
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
|
||||
struct ib_udata *udata);
|
||||
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
|
||||
u32 max_num_sg);
|
||||
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
|
||||
unsigned int *sg_offset);
|
||||
int hns_roce_dereg_mr(struct ib_mr *ibmr);
|
||||
int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmd_mailbox *mailbox,
|
||||
unsigned long mpt_index);
|
||||
unsigned long key_to_hw_index(u32 key);
|
||||
|
||||
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
|
||||
struct ib_udata *udata);
|
||||
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
|
||||
|
||||
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
|
||||
struct hns_roce_buf *buf);
|
||||
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
|
||||
|
@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
|
||||
cq_init_attr.comp_vector = 0;
|
||||
cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
|
||||
if (IS_ERR(cq)) {
|
||||
dev_err(dev, "Create cq for reseved loop qp failed!");
|
||||
dev_err(dev, "Create cq for reserved loop qp failed!");
|
||||
return -ENOMEM;
|
||||
}
|
||||
free_mr->mr_free_cq = to_hr_cq(cq);
|
||||
@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
|
||||
|
||||
pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
|
||||
if (IS_ERR(pd)) {
|
||||
dev_err(dev, "Create pd for reseved loop qp failed!");
|
||||
dev_err(dev, "Create pd for reserved loop qp failed!");
|
||||
ret = -ENOMEM;
|
||||
goto alloc_pd_failed;
|
||||
}
|
||||
|
@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
dseg->len = cpu_to_le32(sg->length);
|
||||
}
|
||||
|
||||
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
|
||||
struct hns_roce_wqe_frmr_seg *fseg,
|
||||
const struct ib_reg_wr *wr)
|
||||
{
|
||||
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
|
||||
|
||||
/* use ib_access_flags */
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
|
||||
wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
|
||||
wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_FRMR_WQE_BYTE_4_RR_S,
|
||||
wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_FRMR_WQE_BYTE_4_RW_S,
|
||||
wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_FRMR_WQE_BYTE_4_LW_S,
|
||||
wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
|
||||
|
||||
/* Data structure reuse may lead to confusion */
|
||||
rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
|
||||
rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
|
||||
|
||||
rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
|
||||
rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
|
||||
rc_sq_wqe->rkey = cpu_to_le32(wr->key);
|
||||
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
|
||||
|
||||
fseg->pbl_size = cpu_to_le32(mr->pbl_size);
|
||||
roce_set_field(fseg->mode_buf_pg_sz,
|
||||
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
|
||||
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
|
||||
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
|
||||
roce_set_bit(fseg->mode_buf_pg_sz,
|
||||
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
|
||||
}
|
||||
|
||||
static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
|
||||
const struct ib_atomic_wr *wr)
|
||||
{
|
||||
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
|
||||
aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
|
||||
aseg->cmp_data = cpu_to_le64(wr->compare_add);
|
||||
} else {
|
||||
aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
|
||||
aseg->cmp_data = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
|
||||
unsigned int *sge_ind)
|
||||
{
|
||||
@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
|
||||
}
|
||||
|
||||
if (wr->opcode == IB_WR_RDMA_READ) {
|
||||
*bad_wr = wr;
|
||||
dev_err(hr_dev->dev, "Not support inline data!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
|
||||
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
|
||||
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
|
||||
struct hns_roce_qp *qp = to_hr_qp(ibqp);
|
||||
struct hns_roce_wqe_frmr_seg *fseg;
|
||||
struct device *dev = hr_dev->dev;
|
||||
struct hns_roce_v2_db sq_db;
|
||||
struct ib_qp_attr attr;
|
||||
@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
|
||||
int attr_mask;
|
||||
u32 tmp_len;
|
||||
int ret = 0;
|
||||
u32 hr_op;
|
||||
u8 *smac;
|
||||
int nreq;
|
||||
int i;
|
||||
@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
|
||||
V2_UD_SEND_WQE_BYTE_40_PORTN_S,
|
||||
qp->port);
|
||||
|
||||
roce_set_bit(ud_sq_wqe->byte_40,
|
||||
V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
|
||||
ah->av.vlan_en ? 1 : 0);
|
||||
roce_set_field(ud_sq_wqe->byte_48,
|
||||
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
|
||||
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
|
||||
@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
|
||||
|
||||
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
|
||||
switch (wr->opcode) {
|
||||
case IB_WR_RDMA_READ:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_RDMA_READ);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
|
||||
rc_sq_wqe->rkey =
|
||||
cpu_to_le32(rdma_wr(wr)->rkey);
|
||||
rc_sq_wqe->va =
|
||||
cpu_to_le64(rdma_wr(wr)->remote_addr);
|
||||
break;
|
||||
case IB_WR_RDMA_WRITE:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
|
||||
rc_sq_wqe->rkey =
|
||||
cpu_to_le32(rdma_wr(wr)->rkey);
|
||||
rc_sq_wqe->va =
|
||||
cpu_to_le64(rdma_wr(wr)->remote_addr);
|
||||
break;
|
||||
case IB_WR_RDMA_WRITE_WITH_IMM:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
|
||||
rc_sq_wqe->rkey =
|
||||
cpu_to_le32(rdma_wr(wr)->rkey);
|
||||
rc_sq_wqe->va =
|
||||
cpu_to_le64(rdma_wr(wr)->remote_addr);
|
||||
break;
|
||||
case IB_WR_SEND:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_SEND);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_SEND;
|
||||
break;
|
||||
case IB_WR_SEND_WITH_INV:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
|
||||
break;
|
||||
case IB_WR_SEND_WITH_IMM:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
|
||||
break;
|
||||
case IB_WR_LOCAL_INV:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_LOCAL_INV);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
|
||||
roce_set_bit(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
|
||||
rc_sq_wqe->inv_key =
|
||||
cpu_to_le32(wr->ex.invalidate_rkey);
|
||||
break;
|
||||
case IB_WR_REG_MR:
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
|
||||
fseg = wqe;
|
||||
set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
|
||||
break;
|
||||
case IB_WR_ATOMIC_CMP_AND_SWP:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
|
||||
rc_sq_wqe->rkey =
|
||||
cpu_to_le32(atomic_wr(wr)->rkey);
|
||||
rc_sq_wqe->va =
|
||||
cpu_to_le64(atomic_wr(wr)->remote_addr);
|
||||
break;
|
||||
case IB_WR_ATOMIC_FETCH_AND_ADD:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
|
||||
rc_sq_wqe->rkey =
|
||||
cpu_to_le32(atomic_wr(wr)->rkey);
|
||||
rc_sq_wqe->va =
|
||||
cpu_to_le64(atomic_wr(wr)->remote_addr);
|
||||
break;
|
||||
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
|
||||
hr_op =
|
||||
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
|
||||
break;
|
||||
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
|
||||
hr_op =
|
||||
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
|
||||
break;
|
||||
default:
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
|
||||
HNS_ROCE_V2_WQE_OP_MASK);
|
||||
hr_op = HNS_ROCE_V2_WQE_OP_MASK;
|
||||
break;
|
||||
}
|
||||
|
||||
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
|
||||
roce_set_field(rc_sq_wqe->byte_4,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
|
||||
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
|
||||
|
||||
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
||||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
|
||||
struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
|
||||
dseg = wqe;
|
||||
set_data_seg_v2(dseg, wr->sg_list);
|
||||
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
|
||||
set_atomic_seg(wqe, atomic_wr(wr));
|
||||
roce_set_field(rc_sq_wqe->byte_16,
|
||||
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
|
||||
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
|
||||
wr->num_sge);
|
||||
} else if (wr->opcode != IB_WR_REG_MR) {
|
||||
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
|
||||
wqe, &sge_ind, bad_wr);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
|
||||
&sge_ind, bad_wr);
|
||||
if (ret)
|
||||
goto out;
|
||||
ind++;
|
||||
} else {
|
||||
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
|
||||
@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
|
||||
|
||||
resp = (struct hns_roce_query_version *)desc.data;
|
||||
hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
|
||||
hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id);
|
||||
hr_dev->vendor_id = hr_dev->pci_dev->vendor;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
|
||||
{
|
||||
struct hns_roce_query_fw_info *resp;
|
||||
struct hns_roce_cmq_desc desc;
|
||||
int ret;
|
||||
|
||||
hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
|
||||
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
resp = (struct hns_roce_query_fw_info *)desc.data;
|
||||
hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1157,6 +1234,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
|
||||
int ret;
|
||||
|
||||
ret = hns_roce_cmq_query_hw_info(hr_dev);
|
||||
if (ret) {
|
||||
dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = hns_roce_query_fw_ver(hr_dev);
|
||||
if (ret) {
|
||||
dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
|
||||
ret);
|
||||
@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
hr_dev->vendor_part_id = 0;
|
||||
hr_dev->sys_image_guid = 0;
|
||||
|
||||
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
|
||||
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
|
||||
|
||||
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
|
||||
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
|
||||
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
|
||||
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
|
||||
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
|
||||
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
|
||||
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
|
||||
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
|
||||
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
|
||||
@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
|
||||
caps->reserved_mrws = 1;
|
||||
caps->reserved_uars = 0;
|
||||
caps->reserved_cqs = 0;
|
||||
caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
|
||||
|
||||
caps->qpc_ba_pg_sz = 0;
|
||||
caps->qpc_buf_pg_sz = 0;
|
||||
@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
|
||||
HNS_ROCE_CAP_FLAG_RQ_INLINE |
|
||||
HNS_ROCE_CAP_FLAG_RECORD_DB |
|
||||
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
|
||||
|
||||
if (hr_dev->pci_dev->revision == 0x21)
|
||||
caps->flags |= HNS_ROCE_CAP_FLAG_MW |
|
||||
HNS_ROCE_CAP_FLAG_FRMR;
|
||||
|
||||
caps->pkey_table_len[0] = 1;
|
||||
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
|
||||
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
|
||||
@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
|
||||
caps->local_ca_ack_delay = 0;
|
||||
caps->max_mtu = IB_MTU_4096;
|
||||
|
||||
if (hr_dev->pci_dev->revision == 0x21)
|
||||
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
|
||||
|
||||
ret = hns_roce_v2_set_bt(hr_dev);
|
||||
if (ret)
|
||||
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
|
||||
@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
|
||||
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
|
||||
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
|
||||
mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
|
||||
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
|
||||
@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
|
||||
{
|
||||
struct hns_roce_v2_mpt_entry *mpt_entry;
|
||||
|
||||
mpt_entry = mb_buf;
|
||||
memset(mpt_entry, 0, sizeof(*mpt_entry));
|
||||
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
|
||||
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
|
||||
V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st,
|
||||
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
|
||||
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
|
||||
mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
|
||||
V2_MPT_BYTE_4_PD_S, mr->pd);
|
||||
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
|
||||
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
|
||||
|
||||
mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
|
||||
|
||||
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
|
||||
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
|
||||
V2_MPT_BYTE_48_PBL_BA_H_S,
|
||||
upper_32_bits(mr->pbl_ba >> 3));
|
||||
|
||||
roce_set_field(mpt_entry->byte_64_buf_pa1,
|
||||
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
|
||||
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
|
||||
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
|
||||
{
|
||||
struct hns_roce_v2_mpt_entry *mpt_entry;
|
||||
|
||||
mpt_entry = mb_buf;
|
||||
memset(mpt_entry, 0, sizeof(*mpt_entry));
|
||||
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
|
||||
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
|
||||
V2_MPT_BYTE_4_PD_S, mw->pdn);
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st,
|
||||
V2_MPT_BYTE_4_PBL_HOP_NUM_M,
|
||||
V2_MPT_BYTE_4_PBL_HOP_NUM_S,
|
||||
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
|
||||
0 : mw->pbl_hop_num);
|
||||
roce_set_field(mpt_entry->byte_4_pd_hop_st,
|
||||
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
|
||||
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
|
||||
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
|
||||
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
|
||||
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
|
||||
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
|
||||
mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
|
||||
|
||||
roce_set_field(mpt_entry->byte_64_buf_pa1,
|
||||
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
|
||||
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
|
||||
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
|
||||
|
||||
mpt_entry->lkey = cpu_to_le32(mw->rkey);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
|
||||
{
|
||||
return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
|
||||
@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
|
||||
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
|
||||
V2_CQE_BYTE_32_RMT_QPN_M,
|
||||
V2_CQE_BYTE_32_RMT_QPN_S);
|
||||
wc->slid = 0;
|
||||
wc->wc_flags |= (roce_get_bit(cqe->byte_32,
|
||||
V2_CQE_BYTE_32_GRH_S) ?
|
||||
IB_WC_GRH : 0);
|
||||
@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
|
||||
wc->smac[5] = roce_get_field(cqe->byte_28,
|
||||
V2_CQE_BYTE_28_SMAC_5_M,
|
||||
V2_CQE_BYTE_28_SMAC_5_S);
|
||||
wc->vlan_id = 0xffff;
|
||||
if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
|
||||
wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
|
||||
V2_CQE_BYTE_28_VID_M,
|
||||
V2_CQE_BYTE_28_VID_S);
|
||||
} else {
|
||||
wc->vlan_id = 0xffff;
|
||||
}
|
||||
|
||||
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
|
||||
wc->network_hdr_type = roce_get_field(cqe->byte_28,
|
||||
V2_CQE_BYTE_28_PORT_TYPE_M,
|
||||
@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
|
||||
|
||||
roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M,
|
||||
V2_QPC_BYTE_60_MAPID_S, 0);
|
||||
roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
|
||||
V2_QPC_BYTE_60_TEMPID_S, 0);
|
||||
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_mapid,
|
||||
V2_QPC_BYTE_60_INNER_MAP_IND_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S,
|
||||
0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S,
|
||||
0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S,
|
||||
0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
|
||||
0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
|
||||
0);
|
||||
roce_set_field(qpc_mask->byte_60_qpst_tempid,
|
||||
V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
|
||||
0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_tempid,
|
||||
V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_60_qpst_tempid,
|
||||
V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
|
||||
|
||||
@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
|
||||
V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
|
||||
|
||||
roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
|
||||
0);
|
||||
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
|
||||
V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
|
||||
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
|
||||
@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
roce_set_field(qpc_mask->byte_144_raq,
|
||||
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
|
||||
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
|
||||
0);
|
||||
roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
|
||||
V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
|
||||
@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
|
||||
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
|
||||
|
||||
roce_set_field(context->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
|
||||
ilog2((unsigned int)hr_qp->sq.wqe_cnt));
|
||||
roce_set_field(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
|
||||
|
||||
roce_set_bit(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_168_irrl_idx,
|
||||
@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
|
||||
0);
|
||||
|
||||
roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
|
||||
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
|
||||
|
||||
roce_set_field(qpc_mask->byte_176_msg_pktn,
|
||||
V2_QPC_BYTE_176_MSG_USE_PKTN_M,
|
||||
V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
|
||||
@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
|
||||
V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
|
||||
|
||||
roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
|
||||
0);
|
||||
roce_set_bit(qpc_mask->byte_232_irrl_sge,
|
||||
V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
|
||||
roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
|
||||
0);
|
||||
|
||||
qpc_mask->irrl_cur_sge_offset = 0;
|
||||
|
||||
roce_set_field(qpc_mask->byte_240_irrl_tail,
|
||||
@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
|
||||
roce_set_field(qpc_mask->byte_56_dqpn_err,
|
||||
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
|
||||
}
|
||||
roce_set_field(context->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
|
||||
ilog2((unsigned int)hr_qp->sq.wqe_cnt));
|
||||
roce_set_field(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
|
||||
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
|
||||
}
|
||||
|
||||
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
|
||||
@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
|
||||
* we should set all bits of the relevant fields in context mask to
|
||||
* 0 at the same time, else set them to 0x1.
|
||||
*/
|
||||
roce_set_field(context->byte_60_qpst_mapid,
|
||||
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
|
||||
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
|
||||
roce_set_field(qpc_mask->byte_60_qpst_mapid,
|
||||
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
|
||||
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
|
||||
|
||||
context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
|
||||
roce_set_field(context->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
|
||||
@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
|
||||
memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
|
||||
}
|
||||
|
||||
if (is_vlan_dev(gid_attr->ndev)) {
|
||||
roce_set_bit(context->byte_76_srqn_op_en,
|
||||
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
|
||||
roce_set_bit(qpc_mask->byte_76_srqn_op_en,
|
||||
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
|
||||
roce_set_bit(context->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
|
||||
roce_set_bit(qpc_mask->byte_168_irrl_idx,
|
||||
V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
|
||||
}
|
||||
|
||||
roce_set_field(context->byte_24_mtu_tc,
|
||||
V2_QPC_BYTE_24_VLAN_ID_M,
|
||||
V2_QPC_BYTE_24_VLAN_ID_S, vlan);
|
||||
@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
|
||||
V2_QPC_BYTE_24_HOP_LIMIT_M,
|
||||
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
|
||||
|
||||
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
|
||||
V2_QPC_BYTE_24_TC_S, grh->traffic_class);
|
||||
if (hr_dev->pci_dev->revision == 0x21 &&
|
||||
gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
|
||||
roce_set_field(context->byte_24_mtu_tc,
|
||||
V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
|
||||
grh->traffic_class >> 2);
|
||||
else
|
||||
roce_set_field(context->byte_24_mtu_tc,
|
||||
V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
|
||||
grh->traffic_class);
|
||||
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
|
||||
V2_QPC_BYTE_24_TC_S, 0);
|
||||
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
|
||||
@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
|
||||
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
|
||||
|
||||
/* Every status migrate must change state */
|
||||
roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
|
||||
roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
|
||||
V2_QPC_BYTE_60_QP_ST_S, new_state);
|
||||
roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
|
||||
roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
|
||||
V2_QPC_BYTE_60_QP_ST_S, 0);
|
||||
|
||||
/* SW pass context to HW */
|
||||
@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
|
||||
goto out;
|
||||
}
|
||||
|
||||
state = roce_get_field(context->byte_60_qpst_mapid,
|
||||
state = roce_get_field(context->byte_60_qpst_tempid,
|
||||
V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
|
||||
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
|
||||
if (tmp_qp_state == -1) {
|
||||
@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
|
||||
{
|
||||
struct hns_roce_work *irq_work =
|
||||
container_of(work, struct hns_roce_work, work);
|
||||
struct device *dev = irq_work->hr_dev->dev;
|
||||
u32 qpn = irq_work->qpn;
|
||||
u32 cqn = irq_work->cqn;
|
||||
|
||||
switch (irq_work->event_type) {
|
||||
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
|
||||
dev_info(dev, "Path migrated succeeded.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
|
||||
dev_warn(dev, "Path migration failed.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_COMM_EST:
|
||||
dev_info(dev, "Communication established.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
|
||||
dev_warn(dev, "Send queue drained.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
|
||||
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
|
||||
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
|
||||
dev_err(dev, "Local work queue catastrophic error.\n");
|
||||
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
|
||||
switch (irq_work->sub_type) {
|
||||
case HNS_ROCE_LWQCE_QPC_ERROR:
|
||||
dev_err(dev, "QP %d, QPC error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_MTU_ERROR:
|
||||
dev_err(dev, "QP %d, MTU error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
|
||||
dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
|
||||
dev_err(dev, "QP %d, WQE addr error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
|
||||
dev_err(dev, "QP %d, WQE shift error.\n", qpn);
|
||||
break;
|
||||
default:
|
||||
dev_err(dev, "Unhandled sub_event type %d.\n",
|
||||
irq_work->sub_type);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
|
||||
dev_err(dev, "Invalid request local work queue error.\n");
|
||||
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
|
||||
dev_err(dev, "Local access violation work queue error.\n");
|
||||
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
|
||||
switch (irq_work->sub_type) {
|
||||
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
|
||||
dev_err(dev, "QP %d, R_key violation.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
|
||||
dev_err(dev, "QP %d, length error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_VA_ERROR:
|
||||
dev_err(dev, "QP %d, VA error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_PD_ERROR:
|
||||
dev_err(dev, "QP %d, PD error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
|
||||
dev_err(dev, "QP %d, rw acc error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
|
||||
dev_err(dev, "QP %d, key state error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
|
||||
dev_err(dev, "QP %d, MR operation error.\n", qpn);
|
||||
break;
|
||||
default:
|
||||
dev_err(dev, "Unhandled sub_event type %d.\n",
|
||||
irq_work->sub_type);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
|
||||
dev_warn(dev, "SRQ limit reach.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
|
||||
dev_warn(dev, "SRQ last wqe reach.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
|
||||
dev_err(dev, "SRQ catas error.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
|
||||
dev_err(dev, "CQ 0x%x access err.\n", cqn);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
|
||||
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
|
||||
dev_warn(dev, "DB overflow.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_FLR:
|
||||
dev_warn(dev, "Function level reset.\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
|
||||
}
|
||||
|
||||
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_eq *eq, u32 qpn)
|
||||
struct hns_roce_eq *eq,
|
||||
u32 qpn, u32 cqn)
|
||||
{
|
||||
struct hns_roce_work *irq_work;
|
||||
|
||||
@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
|
||||
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
|
||||
irq_work->hr_dev = hr_dev;
|
||||
irq_work->qpn = qpn;
|
||||
irq_work->cqn = cqn;
|
||||
irq_work->event_type = eq->event_type;
|
||||
irq_work->sub_type = eq->sub_type;
|
||||
queue_work(hr_dev->irq_workq, &(irq_work->work));
|
||||
@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
|
||||
hns_roce_write64_k(doorbell, eq->doorbell);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_aeqe *aeqe,
|
||||
u32 qpn)
|
||||
{
|
||||
struct device *dev = hr_dev->dev;
|
||||
int sub_type;
|
||||
|
||||
dev_warn(dev, "Local work queue catastrophic error.\n");
|
||||
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
|
||||
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
|
||||
switch (sub_type) {
|
||||
case HNS_ROCE_LWQCE_QPC_ERROR:
|
||||
dev_warn(dev, "QP %d, QPC error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_MTU_ERROR:
|
||||
dev_warn(dev, "QP %d, MTU error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
|
||||
dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
|
||||
dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
|
||||
dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
|
||||
break;
|
||||
default:
|
||||
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_aeqe *aeqe, u32 qpn)
|
||||
{
|
||||
struct device *dev = hr_dev->dev;
|
||||
int sub_type;
|
||||
|
||||
dev_warn(dev, "Local access violation work queue error.\n");
|
||||
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
|
||||
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
|
||||
switch (sub_type) {
|
||||
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
|
||||
dev_warn(dev, "QP %d, R_key violation.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
|
||||
dev_warn(dev, "QP %d, length error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_VA_ERROR:
|
||||
dev_warn(dev, "QP %d, VA error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_PD_ERROR:
|
||||
dev_err(dev, "QP %d, PD error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
|
||||
dev_warn(dev, "QP %d, rw acc error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
|
||||
dev_warn(dev, "QP %d, key state error.\n", qpn);
|
||||
break;
|
||||
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
|
||||
dev_warn(dev, "QP %d, MR operation error.\n", qpn);
|
||||
break;
|
||||
default:
|
||||
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_aeqe *aeqe,
|
||||
int event_type, u32 qpn)
|
||||
{
|
||||
struct device *dev = hr_dev->dev;
|
||||
|
||||
switch (event_type) {
|
||||
case HNS_ROCE_EVENT_TYPE_COMM_EST:
|
||||
dev_warn(dev, "Communication established.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
|
||||
dev_warn(dev, "Send queue drained.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
|
||||
hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
|
||||
dev_warn(dev, "Invalid request local work queue error.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
|
||||
hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
hns_roce_qp_event(hr_dev, qpn, event_type);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_aeqe *aeqe,
|
||||
int event_type, u32 cqn)
|
||||
{
|
||||
struct device *dev = hr_dev->dev;
|
||||
|
||||
switch (event_type) {
|
||||
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
|
||||
dev_warn(dev, "CQ 0x%x access err.\n", cqn);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
|
||||
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
hns_roce_cq_event(hr_dev, cqn, event_type);
|
||||
}
|
||||
|
||||
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
|
||||
{
|
||||
u32 buf_chk_sz;
|
||||
@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
|
||||
|
||||
switch (event_type) {
|
||||
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
|
||||
dev_warn(dev, "Path migrated succeeded.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
|
||||
dev_warn(dev, "Path migration failed.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_COMM_EST:
|
||||
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
|
||||
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
|
||||
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
|
||||
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
|
||||
hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
|
||||
qpn);
|
||||
hns_roce_qp_event(hr_dev, qpn, event_type);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
|
||||
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
|
||||
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
|
||||
dev_warn(dev, "SRQ not support.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
|
||||
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
|
||||
hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
|
||||
cqn);
|
||||
hns_roce_cq_event(hr_dev, cqn, event_type);
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
|
||||
dev_warn(dev, "DB overflow.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_MB:
|
||||
hns_roce_cmd_event(hr_dev,
|
||||
@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
|
||||
le64_to_cpu(aeqe->event.cmd.out_param));
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
|
||||
dev_warn(dev, "CEQ overflow.\n");
|
||||
break;
|
||||
case HNS_ROCE_EVENT_TYPE_FLR:
|
||||
dev_warn(dev, "Function level reset.\n");
|
||||
break;
|
||||
default:
|
||||
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
|
||||
@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
|
||||
dev_warn(dev, "cons_index overflow, set back to 0.\n");
|
||||
eq->cons_index = 0;
|
||||
}
|
||||
hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
|
||||
hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
|
||||
}
|
||||
|
||||
set_eq_cons_index_v2(eq);
|
||||
@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
|
||||
create_singlethread_workqueue("hns_roce_irq_workqueue");
|
||||
if (!hr_dev->irq_workq) {
|
||||
dev_err(dev, "Create irq workqueue failed!\n");
|
||||
ret = -ENOMEM;
|
||||
goto err_request_irq_fail;
|
||||
}
|
||||
|
||||
@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
|
||||
.set_mac = hns_roce_v2_set_mac,
|
||||
.write_mtpt = hns_roce_v2_write_mtpt,
|
||||
.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
|
||||
.frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
|
||||
.mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
|
||||
.write_cqc = hns_roce_v2_write_cqc,
|
||||
.set_hem = hns_roce_v2_set_hem,
|
||||
.clear_hem = hns_roce_v2_clear_hem,
|
||||
|
@ -50,6 +50,7 @@
|
||||
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
|
||||
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
|
||||
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
|
||||
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
|
||||
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
|
||||
#define HNS_ROCE_V2_UAR_NUM 256
|
||||
#define HNS_ROCE_V2_PHY_UAR_NUM 1
|
||||
@ -78,6 +79,7 @@
|
||||
#define HNS_ROCE_INVALID_LKEY 0x100
|
||||
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
|
||||
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
|
||||
#define HNS_ROCE_V2_RSV_QPS 8
|
||||
|
||||
#define HNS_ROCE_CONTEXT_HOP_NUM 1
|
||||
#define HNS_ROCE_MTT_HOP_NUM 1
|
||||
@ -201,6 +203,7 @@ enum {
|
||||
|
||||
/* CMQ command */
|
||||
enum hns_roce_opcode_type {
|
||||
HNS_QUERY_FW_VER = 0x0001,
|
||||
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
|
||||
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
|
||||
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
|
||||
@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
|
||||
|
||||
enum{
|
||||
V2_MPT_ST_VALID = 0x1,
|
||||
V2_MPT_ST_FREE = 0x2,
|
||||
};
|
||||
|
||||
enum hns_roce_v2_qp_state {
|
||||
@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
|
||||
__le32 dmac;
|
||||
__le32 byte_52_udpspn_dmac;
|
||||
__le32 byte_56_dqpn_err;
|
||||
__le32 byte_60_qpst_mapid;
|
||||
__le32 byte_60_qpst_tempid;
|
||||
__le32 qkey_xrcd;
|
||||
__le32 byte_68_rq_db;
|
||||
__le32 rq_db_record_addr;
|
||||
@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
|
||||
#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
|
||||
#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
|
||||
|
||||
#define V2_QPC_BYTE_60_MAPID_S 0
|
||||
#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0)
|
||||
#define V2_QPC_BYTE_60_TEMPID_S 0
|
||||
#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
|
||||
|
||||
#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13
|
||||
#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
|
||||
#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
|
||||
|
||||
#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14
|
||||
#define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
|
||||
|
||||
#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15
|
||||
|
||||
#define V2_QPC_BYTE_60_TEMPID_S 16
|
||||
#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16)
|
||||
|
||||
#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23
|
||||
|
||||
#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
|
||||
#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
|
||||
|
||||
#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
|
||||
|
||||
#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28
|
||||
#define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
|
||||
|
||||
#define V2_QPC_BYTE_60_QP_ST_S 29
|
||||
#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
|
||||
@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
|
||||
|
||||
#define V2_QPC_BYTE_76_RQIE_S 28
|
||||
|
||||
#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
|
||||
#define V2_QPC_BYTE_80_RX_CQN_S 0
|
||||
#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
|
||||
|
||||
@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
|
||||
#define V2_QPC_BYTE_140_RR_MAX_S 12
|
||||
#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
|
||||
|
||||
#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15
|
||||
#define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
|
||||
|
||||
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
|
||||
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
|
||||
@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
|
||||
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
|
||||
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
|
||||
|
||||
#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
|
||||
|
||||
#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
|
||||
#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
|
||||
|
||||
@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
|
||||
#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
|
||||
#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
|
||||
|
||||
#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24
|
||||
#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24)
|
||||
|
||||
#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
|
||||
#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
|
||||
#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
|
||||
#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
|
||||
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
|
||||
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
|
||||
|
||||
@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
|
||||
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
|
||||
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
|
||||
|
||||
#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
|
||||
#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
|
||||
#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
|
||||
|
||||
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
|
||||
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
|
||||
|
||||
@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
|
||||
#define V2_QPC_BYTE_244_RNR_CNT_S 27
|
||||
#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
|
||||
|
||||
#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
|
||||
#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
|
||||
|
||||
#define V2_QPC_BYTE_248_IRRL_PSN_S 0
|
||||
#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
|
||||
|
||||
@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
|
||||
#define V2_CQE_BYTE_28_PORT_TYPE_S 16
|
||||
#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
|
||||
|
||||
#define V2_CQE_BYTE_28_VID_S 18
|
||||
#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
|
||||
|
||||
#define V2_CQE_BYTE_28_VID_VLD_S 30
|
||||
|
||||
#define V2_CQE_BYTE_32_RMT_QPN_S 0
|
||||
#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
|
||||
|
||||
@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
|
||||
|
||||
#define V2_MPT_BYTE_8_LW_EN_S 7
|
||||
|
||||
#define V2_MPT_BYTE_8_MW_CNT_S 8
|
||||
#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
|
||||
|
||||
#define V2_MPT_BYTE_12_FRE_S 0
|
||||
|
||||
#define V2_MPT_BYTE_12_PA_S 1
|
||||
|
||||
#define V2_MPT_BYTE_12_MR_MW_S 4
|
||||
|
||||
#define V2_MPT_BYTE_12_BPD_S 5
|
||||
|
||||
#define V2_MPT_BYTE_12_BQP_S 6
|
||||
|
||||
#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
|
||||
|
||||
#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
|
||||
@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
|
||||
#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
|
||||
#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
|
||||
|
||||
#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
|
||||
|
||||
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
|
||||
|
||||
#define V2_UD_SEND_WQE_DMAC_0_S 0
|
||||
@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
|
||||
|
||||
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
|
||||
|
||||
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
|
||||
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
|
||||
|
||||
@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
|
||||
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
|
||||
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
|
||||
|
||||
struct hns_roce_wqe_frmr_seg {
|
||||
__le32 pbl_size;
|
||||
__le32 mode_buf_pg_sz;
|
||||
};
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
|
||||
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
|
||||
|
||||
#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
|
||||
|
||||
struct hns_roce_v2_wqe_data_seg {
|
||||
__le32 len;
|
||||
__le32 lkey;
|
||||
@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
|
||||
__le32 rsv[5];
|
||||
};
|
||||
|
||||
struct hns_roce_query_fw_info {
|
||||
__le32 fw_ver;
|
||||
__le32 rsv[5];
|
||||
};
|
||||
|
||||
struct hns_roce_cfg_llm_a {
|
||||
__le32 base_addr_l;
|
||||
__le32 base_addr_h;
|
||||
@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
|
||||
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
|
||||
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
|
||||
|
||||
struct hns_roce_wqe_atomic_seg {
|
||||
__le64 fetchadd_swap_data;
|
||||
__le64 cmp_data;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
|
||||
|
||||
memset(props, 0, sizeof(*props));
|
||||
|
||||
props->fw_ver = hr_dev->caps.fw_ver;
|
||||
props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
|
||||
props->max_mr_size = (u64)(~(0ULL));
|
||||
props->page_size_cap = hr_dev->caps.page_size_cap;
|
||||
@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
|
||||
props->max_pd = hr_dev->caps.num_pds;
|
||||
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
|
||||
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
|
||||
props->atomic_cap = IB_ATOMIC_NONE;
|
||||
props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
|
||||
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
|
||||
props->max_pkeys = 1;
|
||||
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
|
||||
|
||||
@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
|
||||
if (ret)
|
||||
goto error_fail_uar_alloc;
|
||||
|
||||
INIT_LIST_HEAD(&context->vma_list);
|
||||
mutex_init(&context->vma_list_mutex);
|
||||
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
|
||||
INIT_LIST_HEAD(&context->page_list);
|
||||
mutex_init(&context->page_mutex);
|
||||
@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hns_roce_vma_open(struct vm_area_struct *vma)
|
||||
{
|
||||
vma->vm_ops = NULL;
|
||||
}
|
||||
|
||||
static void hns_roce_vma_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct hns_roce_vma_data *vma_data;
|
||||
|
||||
vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
|
||||
vma_data->vma = NULL;
|
||||
mutex_lock(vma_data->vma_list_mutex);
|
||||
list_del(&vma_data->list);
|
||||
mutex_unlock(vma_data->vma_list_mutex);
|
||||
kfree(vma_data);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct hns_roce_vm_ops = {
|
||||
.open = hns_roce_vma_open,
|
||||
.close = hns_roce_vma_close,
|
||||
};
|
||||
|
||||
static int hns_roce_set_vma_data(struct vm_area_struct *vma,
|
||||
struct hns_roce_ucontext *context)
|
||||
{
|
||||
struct list_head *vma_head = &context->vma_list;
|
||||
struct hns_roce_vma_data *vma_data;
|
||||
|
||||
vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
|
||||
if (!vma_data)
|
||||
return -ENOMEM;
|
||||
|
||||
vma_data->vma = vma;
|
||||
vma_data->vma_list_mutex = &context->vma_list_mutex;
|
||||
vma->vm_private_data = vma_data;
|
||||
vma->vm_ops = &hns_roce_vm_ops;
|
||||
|
||||
mutex_lock(&context->vma_list_mutex);
|
||||
list_add(&vma_data->list, vma_head);
|
||||
mutex_unlock(&context->vma_list_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hns_roce_mmap(struct ib_ucontext *context,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
|
||||
|
||||
if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
|
||||
return -EINVAL;
|
||||
switch (vma->vm_pgoff) {
|
||||
case 0:
|
||||
return rdma_user_mmap_io(context, vma,
|
||||
to_hr_ucontext(context)->uar.pfn,
|
||||
PAGE_SIZE,
|
||||
pgprot_noncached(vma->vm_page_prot));
|
||||
|
||||
if (vma->vm_pgoff == 0) {
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
if (io_remap_pfn_range(vma, vma->vm_start,
|
||||
to_hr_ucontext(context)->uar.pfn,
|
||||
PAGE_SIZE, vma->vm_page_prot))
|
||||
return -EAGAIN;
|
||||
} else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr &&
|
||||
hr_dev->tptr_size) {
|
||||
/* vm_pgoff: 1 -- TPTR */
|
||||
if (io_remap_pfn_range(vma, vma->vm_start,
|
||||
hr_dev->tptr_dma_addr >> PAGE_SHIFT,
|
||||
hr_dev->tptr_size,
|
||||
vma->vm_page_prot))
|
||||
return -EAGAIN;
|
||||
} else
|
||||
return -EINVAL;
|
||||
/* vm_pgoff: 1 -- TPTR */
|
||||
case 1:
|
||||
if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* FIXME: using io_remap_pfn_range on the dma address returned
|
||||
* by dma_alloc_coherent is totally wrong.
|
||||
*/
|
||||
return rdma_user_mmap_io(context, vma,
|
||||
hr_dev->tptr_dma_addr >> PAGE_SHIFT,
|
||||
hr_dev->tptr_size,
|
||||
vma->vm_page_prot);
|
||||
|
||||
return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
|
||||
@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
|
||||
|
||||
static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
|
||||
{
|
||||
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
|
||||
struct hns_roce_vma_data *vma_data, *n;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
mutex_lock(&context->vma_list_mutex);
|
||||
list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
|
||||
vma = vma_data->vma;
|
||||
zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
|
||||
|
||||
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
|
||||
vma->vm_ops = NULL;
|
||||
list_del(&vma_data->list);
|
||||
kfree(vma_data);
|
||||
}
|
||||
mutex_unlock(&context->vma_list_mutex);
|
||||
}
|
||||
|
||||
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
|
||||
@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
|
||||
spin_lock_init(&iboe->lock);
|
||||
|
||||
ib_dev = &hr_dev->ib_dev;
|
||||
strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
|
||||
|
||||
ib_dev->owner = THIS_MODULE;
|
||||
ib_dev->node_type = RDMA_NODE_IB_CA;
|
||||
@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
|
||||
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
|
||||
}
|
||||
|
||||
/* MW */
|
||||
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
|
||||
ib_dev->alloc_mw = hns_roce_alloc_mw;
|
||||
ib_dev->dealloc_mw = hns_roce_dealloc_mw;
|
||||
ib_dev->uverbs_cmd_mask |=
|
||||
(1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
|
||||
(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
|
||||
}
|
||||
|
||||
/* FRMR */
|
||||
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
|
||||
ib_dev->alloc_mr = hns_roce_alloc_mr;
|
||||
ib_dev->map_mr_sg = hns_roce_map_mr_sg;
|
||||
}
|
||||
|
||||
/* OTHERS */
|
||||
ib_dev->get_port_immutable = hns_roce_port_immutable;
|
||||
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
|
||||
|
||||
ib_dev->driver_id = RDMA_DRIVER_HNS;
|
||||
ret = ib_register_device(ib_dev, NULL);
|
||||
ret = ib_register_device(ib_dev, "hns_%d", NULL);
|
||||
if (ret) {
|
||||
dev_err(dev, "ib_register_device failed!\n");
|
||||
return ret;
|
||||
|
@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
|
||||
u64 bt_idx;
|
||||
u64 size;
|
||||
|
||||
mhop_num = hr_dev->caps.pbl_hop_num;
|
||||
mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
|
||||
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
|
||||
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
|
||||
|
||||
@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
|
||||
|
||||
mr->pbl_size = npages;
|
||||
mr->pbl_ba = mr->pbl_dma_addr;
|
||||
mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
|
||||
mr->pbl_hop_num = mhop_num;
|
||||
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
|
||||
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
|
||||
return 0;
|
||||
@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
|
||||
mr->key = hw_index_to_key(index); /* MR key */
|
||||
|
||||
if (size == ~0ull) {
|
||||
mr->type = MR_TYPE_DMA;
|
||||
mr->pbl_buf = NULL;
|
||||
mr->pbl_dma_addr = 0;
|
||||
/* PBL multi-hop addressing parameters */
|
||||
@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
|
||||
mr->pbl_l1_dma_addr = NULL;
|
||||
mr->pbl_l0_dma_addr = 0;
|
||||
} else {
|
||||
mr->type = MR_TYPE_MR;
|
||||
if (!hr_dev->caps.pbl_hop_num) {
|
||||
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
|
||||
&(mr->pbl_dma_addr),
|
||||
@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
|
||||
u32 mhop_num;
|
||||
u64 bt_idx;
|
||||
|
||||
npages = ib_umem_page_count(mr->umem);
|
||||
npages = mr->pbl_size;
|
||||
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
|
||||
mhop_num = hr_dev->caps.pbl_hop_num;
|
||||
mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
|
||||
|
||||
if (mhop_num == HNS_ROCE_HOP_NUM_0)
|
||||
return;
|
||||
@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
|
||||
}
|
||||
|
||||
if (mr->size != ~0ULL) {
|
||||
npages = ib_umem_page_count(mr->umem);
|
||||
if (mr->type == MR_TYPE_MR)
|
||||
npages = ib_umem_page_count(mr->umem);
|
||||
|
||||
if (!hr_dev->caps.pbl_hop_num)
|
||||
dma_free_coherent(dev, (unsigned int)(npages * 8),
|
||||
@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
|
||||
goto err_table;
|
||||
}
|
||||
|
||||
ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
|
||||
if (mr->type != MR_TYPE_FRMR)
|
||||
ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
|
||||
else
|
||||
ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
|
||||
if (ret) {
|
||||
dev_err(dev, "Write mtpt fail!\n");
|
||||
goto err_page;
|
||||
@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
|
||||
if (mr == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mr->type = MR_TYPE_DMA;
|
||||
|
||||
/* Allocate memory region key */
|
||||
ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
|
||||
~0ULL, acc, 0, mr);
|
||||
@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
||||
}
|
||||
}
|
||||
|
||||
mr->type = MR_TYPE_MR;
|
||||
|
||||
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
|
||||
access_flags, n, mr);
|
||||
if (ret)
|
||||
@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
|
||||
u32 max_num_sg)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
|
||||
struct device *dev = hr_dev->dev;
|
||||
struct hns_roce_mr *mr;
|
||||
u64 length;
|
||||
u32 page_size;
|
||||
int ret;
|
||||
|
||||
page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
|
||||
length = max_num_sg * page_size;
|
||||
|
||||
if (mr_type != IB_MR_TYPE_MEM_REG)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
|
||||
dev_err(dev, "max_num_sg larger than %d\n",
|
||||
HNS_ROCE_FRMR_MAX_PA);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
|
||||
if (!mr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mr->type = MR_TYPE_FRMR;
|
||||
|
||||
/* Allocate memory region key */
|
||||
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
|
||||
0, max_num_sg, mr);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
ret = hns_roce_mr_enable(hr_dev, mr);
|
||||
if (ret)
|
||||
goto err_mr;
|
||||
|
||||
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
|
||||
mr->umem = NULL;
|
||||
|
||||
return &mr->ibmr;
|
||||
|
||||
err_mr:
|
||||
hns_roce_mr_free(to_hr_dev(pd->device), mr);
|
||||
|
||||
err_free:
|
||||
kfree(mr);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
|
||||
{
|
||||
struct hns_roce_mr *mr = to_hr_mr(ibmr);
|
||||
|
||||
mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
|
||||
unsigned int *sg_offset)
|
||||
{
|
||||
struct hns_roce_mr *mr = to_hr_mr(ibmr);
|
||||
|
||||
mr->npages = 0;
|
||||
|
||||
return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
|
||||
}
|
||||
|
||||
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_mw *mw)
|
||||
{
|
||||
struct device *dev = hr_dev->dev;
|
||||
int ret;
|
||||
|
||||
if (mw->enabled) {
|
||||
ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
|
||||
& (hr_dev->caps.num_mtpts - 1));
|
||||
if (ret)
|
||||
dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
|
||||
|
||||
hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
|
||||
key_to_hw_index(mw->rkey));
|
||||
}
|
||||
|
||||
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
|
||||
key_to_hw_index(mw->rkey), BITMAP_NO_RR);
|
||||
}
|
||||
|
||||
static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_mw *mw)
|
||||
{
|
||||
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
|
||||
struct hns_roce_cmd_mailbox *mailbox;
|
||||
struct device *dev = hr_dev->dev;
|
||||
unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
|
||||
int ret;
|
||||
|
||||
/* prepare HEM entry memory */
|
||||
ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
|
||||
if (IS_ERR(mailbox)) {
|
||||
ret = PTR_ERR(mailbox);
|
||||
goto err_table;
|
||||
}
|
||||
|
||||
ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
|
||||
if (ret) {
|
||||
dev_err(dev, "MW write mtpt fail!\n");
|
||||
goto err_page;
|
||||
}
|
||||
|
||||
ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
|
||||
mtpt_idx & (hr_dev->caps.num_mtpts - 1));
|
||||
if (ret) {
|
||||
dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
|
||||
goto err_page;
|
||||
}
|
||||
|
||||
mw->enabled = 1;
|
||||
|
||||
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
|
||||
|
||||
return 0;
|
||||
|
||||
err_page:
|
||||
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
|
||||
|
||||
err_table:
|
||||
hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
|
||||
struct ib_udata *udata)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
|
||||
struct hns_roce_mw *mw;
|
||||
unsigned long index = 0;
|
||||
int ret;
|
||||
|
||||
mw = kmalloc(sizeof(*mw), GFP_KERNEL);
|
||||
if (!mw)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* Allocate a key for mw from bitmap */
|
||||
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
|
||||
if (ret)
|
||||
goto err_bitmap;
|
||||
|
||||
mw->rkey = hw_index_to_key(index);
|
||||
|
||||
mw->ibmw.rkey = mw->rkey;
|
||||
mw->ibmw.type = type;
|
||||
mw->pdn = to_hr_pd(ib_pd)->pdn;
|
||||
mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
|
||||
mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
|
||||
mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
|
||||
|
||||
ret = hns_roce_mw_enable(hr_dev, mw);
|
||||
if (ret)
|
||||
goto err_mw;
|
||||
|
||||
return &mw->ibmw;
|
||||
|
||||
err_mw:
|
||||
hns_roce_mw_free(hr_dev, mw);
|
||||
|
||||
err_bitmap:
|
||||
kfree(mw);
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
int hns_roce_dealloc_mw(struct ib_mw *ibmw)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
|
||||
struct hns_roce_mw *mw = to_hr_mw(ibmw);
|
||||
|
||||
hns_roce_mw_free(hr_dev, mw);
|
||||
kfree(mw);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -31,6 +31,7 @@
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <rdma/ib_addr.h>
|
||||
#include <rdma/ib_umem.h>
|
||||
@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
|
||||
{
|
||||
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
|
||||
u8 max_sq_stride = ilog2(roundup_sq_stride);
|
||||
u32 ex_sge_num;
|
||||
u32 page_size;
|
||||
u32 max_cnt;
|
||||
|
||||
@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
|
||||
if (hr_qp->sq.max_gs > 2)
|
||||
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
|
||||
(hr_qp->sq.max_gs - 2));
|
||||
|
||||
if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
|
||||
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
|
||||
dev_err(hr_dev->dev,
|
||||
"The extended sge cnt error! sge_cnt=%d\n",
|
||||
hr_qp->sge.sge_cnt);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
hr_qp->sge.sge_shift = 4;
|
||||
ex_sge_num = hr_qp->sge.sge_cnt;
|
||||
|
||||
/* Get buf size, SQ and RQ are aligned to page_szie */
|
||||
if (hr_dev->caps.max_sq_sg <= 2) {
|
||||
@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
|
||||
hr_qp->sq.wqe_shift), PAGE_SIZE);
|
||||
} else {
|
||||
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
|
||||
hr_qp->sge.sge_cnt =
|
||||
max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
|
||||
hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
|
||||
hr_qp->rq.wqe_shift), page_size) +
|
||||
HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
|
||||
@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
|
||||
hr_qp->sq.wqe_shift), page_size);
|
||||
|
||||
hr_qp->sq.offset = 0;
|
||||
if (hr_qp->sge.sge_cnt) {
|
||||
if (ex_sge_num) {
|
||||
hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
|
||||
(hr_qp->sq.wqe_cnt <<
|
||||
hr_qp->sq.wqe_shift),
|
||||
@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
|
||||
hr_qp->sge.sge_shift = 4;
|
||||
}
|
||||
|
||||
if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
|
||||
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
|
||||
dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
|
||||
hr_qp->sge.sge_cnt);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
|
||||
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
|
||||
hr_qp->sq.offset = 0;
|
||||
@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
|
||||
page_size);
|
||||
|
||||
if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
|
||||
hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
|
||||
(u32)hr_qp->sge.sge_cnt);
|
||||
hr_qp->sge.offset = size;
|
||||
size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
|
||||
hr_qp->sge.sge_shift, page_size);
|
||||
@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
||||
}
|
||||
}
|
||||
|
||||
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
|
||||
IB_LINK_LAYER_ETHERNET)) {
|
||||
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
|
||||
attr_mask)) {
|
||||
dev_err(dev, "ib_modify_qp_is_ok failed\n");
|
||||
goto out;
|
||||
}
|
||||
@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
|
||||
{
|
||||
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
|
||||
int reserved_from_top = 0;
|
||||
int reserved_from_bot;
|
||||
int ret;
|
||||
|
||||
spin_lock_init(&qp_table->lock);
|
||||
INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
|
||||
|
||||
/* A port include two SQP, six port total 12 */
|
||||
/* In hw v1, a port include two SQP, six ports total 12 */
|
||||
if (hr_dev->caps.max_sq_sg <= 2)
|
||||
reserved_from_bot = SQP_NUM;
|
||||
else
|
||||
reserved_from_bot = hr_dev->caps.reserved_qps;
|
||||
|
||||
ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
|
||||
hr_dev->caps.num_qps - 1, SQP_NUM,
|
||||
hr_dev->caps.num_qps - 1, reserved_from_bot,
|
||||
reserved_from_top);
|
||||
if (ret) {
|
||||
dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
|
||||
|
@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
|
||||
unsigned long flags;
|
||||
|
||||
rtnl_lock();
|
||||
for_each_netdev_rcu(&init_net, ip_dev) {
|
||||
for_each_netdev(&init_net, ip_dev) {
|
||||
if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
|
||||
(rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
|
||||
(ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
|
||||
|
@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
|
||||
}
|
||||
|
||||
/**
|
||||
* i40iw_show_rev
|
||||
* hw_rev_show
|
||||
*/
|
||||
static ssize_t i40iw_show_rev(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t hw_rev_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct i40iw_ib_device *iwibdev = container_of(dev,
|
||||
struct i40iw_ib_device,
|
||||
@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
|
||||
|
||||
return sprintf(buf, "%x\n", hw_rev);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hw_rev);
|
||||
|
||||
/**
|
||||
* i40iw_show_hca
|
||||
* hca_type_show
|
||||
*/
|
||||
static ssize_t i40iw_show_hca(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t hca_type_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "I40IW\n");
|
||||
}
|
||||
static DEVICE_ATTR_RO(hca_type);
|
||||
|
||||
/**
|
||||
* i40iw_show_board
|
||||
* board_id_show
|
||||
*/
|
||||
static ssize_t i40iw_show_board(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t board_id_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
|
||||
}
|
||||
static DEVICE_ATTR_RO(board_id);
|
||||
|
||||
static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
|
||||
static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
|
||||
static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
|
||||
static struct attribute *i40iw_dev_attributes[] = {
|
||||
&dev_attr_hw_rev.attr,
|
||||
&dev_attr_hca_type.attr,
|
||||
&dev_attr_board_id.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct device_attribute *i40iw_dev_attributes[] = {
|
||||
&dev_attr_hw_rev,
|
||||
&dev_attr_hca_type,
|
||||
&dev_attr_board_id
|
||||
static const struct attribute_group i40iw_attr_group = {
|
||||
.attrs = i40iw_dev_attributes,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
|
||||
i40iw_pr_err("iwdev == NULL\n");
|
||||
return NULL;
|
||||
}
|
||||
strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
|
||||
iwibdev->ibdev.owner = THIS_MODULE;
|
||||
iwdev->iwibdev = iwibdev;
|
||||
iwibdev->iwdev = iwdev;
|
||||
@ -2850,20 +2852,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
|
||||
ib_dispatch_event(&event);
|
||||
}
|
||||
|
||||
/**
|
||||
* i40iw_unregister_rdma_device - unregister of iwarp from IB
|
||||
* @iwibdev: rdma device ptr
|
||||
*/
|
||||
static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
|
||||
device_remove_file(&iwibdev->ibdev.dev,
|
||||
i40iw_dev_attributes[i]);
|
||||
ib_unregister_device(&iwibdev->ibdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* i40iw_destroy_rdma_device - destroy rdma device and free resources
|
||||
* @iwibdev: IB device ptr
|
||||
@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
|
||||
if (!iwibdev)
|
||||
return;
|
||||
|
||||
i40iw_unregister_rdma_device(iwibdev);
|
||||
ib_unregister_device(&iwibdev->ibdev);
|
||||
kfree(iwibdev->ibdev.iwcm);
|
||||
iwibdev->ibdev.iwcm = NULL;
|
||||
wait_event_timeout(iwibdev->iwdev->close_wq,
|
||||
@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
|
||||
*/
|
||||
int i40iw_register_rdma_device(struct i40iw_device *iwdev)
|
||||
{
|
||||
int i, ret;
|
||||
int ret;
|
||||
struct i40iw_ib_device *iwibdev;
|
||||
|
||||
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
|
||||
if (!iwdev->iwibdev)
|
||||
return -ENOMEM;
|
||||
iwibdev = iwdev->iwibdev;
|
||||
|
||||
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
|
||||
iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
|
||||
ret = ib_register_device(&iwibdev->ibdev, NULL);
|
||||
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
|
||||
ret =
|
||||
device_create_file(&iwibdev->ibdev.dev,
|
||||
i40iw_dev_attributes[i]);
|
||||
if (ret) {
|
||||
while (i > 0) {
|
||||
i--;
|
||||
device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
|
||||
}
|
||||
ib_unregister_device(&iwibdev->ibdev);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
error:
|
||||
kfree(iwdev->iwibdev->ibdev.iwcm);
|
||||
|
@ -1,6 +1,7 @@
|
||||
config MLX4_INFINIBAND
|
||||
tristate "Mellanox ConnectX HCA support"
|
||||
depends on NETDEVICES && ETHERNET && PCI && INET
|
||||
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
|
||||
depends on MAY_USE_DEVLINK
|
||||
select NET_VENDOR_MELLANOX
|
||||
select MLX4_CORE
|
||||
|
@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
|
||||
int err;
|
||||
struct ib_port_attr pattr;
|
||||
|
||||
if (in_wc && in_wc->qp->qp_num) {
|
||||
pr_debug("received MAD: slid:%d sqpn:%d "
|
||||
"dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
|
||||
in_wc->slid, in_wc->src_qp,
|
||||
in_wc->dlid_path_bits,
|
||||
in_wc->qp->qp_num,
|
||||
in_wc->wc_flags,
|
||||
in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
|
||||
be16_to_cpu(in_mad->mad_hdr.attr_id));
|
||||
if (in_wc && in_wc->qp) {
|
||||
pr_debug("received MAD: port:%d slid:%d sqpn:%d "
|
||||
"dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
|
||||
port_num,
|
||||
in_wc->slid, in_wc->src_qp,
|
||||
in_wc->dlid_path_bits,
|
||||
in_wc->qp->qp_num,
|
||||
in_wc->wc_flags,
|
||||
be64_to_cpu(in_mad->mad_hdr.tid),
|
||||
in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
|
||||
be16_to_cpu(in_mad->mad_hdr.attr_id));
|
||||
if (in_wc->wc_flags & IB_WC_GRH) {
|
||||
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
|
||||
be64_to_cpu(in_grh->sgid.global.subnet_prefix),
|
||||
|
@ -1140,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mlx4_ib_vma_open(struct vm_area_struct *area)
|
||||
{
|
||||
/* vma_open is called when a new VMA is created on top of our VMA.
|
||||
* This is done through either mremap flow or split_vma (usually due
|
||||
* to mlock, madvise, munmap, etc.). We do not support a clone of the
|
||||
* vma, as this VMA is strongly hardware related. Therefore we set the
|
||||
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
|
||||
* calling us again and trying to do incorrect actions. We assume that
|
||||
* the original vma size is exactly a single page that there will be no
|
||||
* "splitting" operations on.
|
||||
*/
|
||||
area->vm_ops = NULL;
|
||||
}
|
||||
|
||||
static void mlx4_ib_vma_close(struct vm_area_struct *area)
|
||||
{
|
||||
struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
|
||||
|
||||
/* It's guaranteed that all VMAs opened on a FD are closed before the
|
||||
* file itself is closed, therefore no sync is needed with the regular
|
||||
* closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
|
||||
* with accessing the vma as part of mlx4_ib_disassociate_ucontext.
|
||||
* The close operation is usually called under mm->mmap_sem except when
|
||||
* process is exiting. The exiting case is handled explicitly as part
|
||||
* of mlx4_ib_disassociate_ucontext.
|
||||
*/
|
||||
mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
|
||||
area->vm_private_data;
|
||||
|
||||
/* set the vma context pointer to null in the mlx4_ib driver's private
|
||||
* data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
|
||||
*/
|
||||
mlx4_ib_vma_priv_data->vma = NULL;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct mlx4_ib_vm_ops = {
|
||||
.open = mlx4_ib_vma_open,
|
||||
.close = mlx4_ib_vma_close
|
||||
};
|
||||
|
||||
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
|
||||
{
|
||||
int i;
|
||||
struct vm_area_struct *vma;
|
||||
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
|
||||
|
||||
/* need to protect from a race on closing the vma as part of
|
||||
* mlx4_ib_vma_close().
|
||||
*/
|
||||
for (i = 0; i < HW_BAR_COUNT; i++) {
|
||||
vma = context->hw_bar_info[i].vma;
|
||||
if (!vma)
|
||||
continue;
|
||||
|
||||
zap_vma_ptes(context->hw_bar_info[i].vma,
|
||||
context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
|
||||
|
||||
context->hw_bar_info[i].vma->vm_flags &=
|
||||
~(VM_SHARED | VM_MAYSHARE);
|
||||
/* context going to be destroyed, should not access ops any more */
|
||||
context->hw_bar_info[i].vma->vm_ops = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
|
||||
struct mlx4_ib_vma_private_data *vma_private_data)
|
||||
{
|
||||
vma_private_data->vma = vma;
|
||||
vma->vm_private_data = vma_private_data;
|
||||
vma->vm_ops = &mlx4_ib_vm_ops;
|
||||
}
|
||||
|
||||
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
|
||||
{
|
||||
struct mlx4_ib_dev *dev = to_mdev(context->device);
|
||||
struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
|
||||
|
||||
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
switch (vma->vm_pgoff) {
|
||||
case 0:
|
||||
return rdma_user_mmap_io(context, vma,
|
||||
to_mucontext(context)->uar.pfn,
|
||||
PAGE_SIZE,
|
||||
pgprot_noncached(vma->vm_page_prot));
|
||||
|
||||
if (vma->vm_pgoff == 0) {
|
||||
/* We prevent double mmaping on same context */
|
||||
if (mucontext->hw_bar_info[HW_BAR_DB].vma)
|
||||
case 1:
|
||||
if (dev->dev->caps.bf_reg_size == 0)
|
||||
return -EINVAL;
|
||||
return rdma_user_mmap_io(
|
||||
context, vma,
|
||||
to_mucontext(context)->uar.pfn +
|
||||
dev->dev->caps.num_uars,
|
||||
PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
|
||||
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
|
||||
if (io_remap_pfn_range(vma, vma->vm_start,
|
||||
to_mucontext(context)->uar.pfn,
|
||||
PAGE_SIZE, vma->vm_page_prot))
|
||||
return -EAGAIN;
|
||||
|
||||
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
|
||||
|
||||
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
|
||||
/* We prevent double mmaping on same context */
|
||||
if (mucontext->hw_bar_info[HW_BAR_BF].vma)
|
||||
return -EINVAL;
|
||||
|
||||
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
|
||||
|
||||
if (io_remap_pfn_range(vma, vma->vm_start,
|
||||
to_mucontext(context)->uar.pfn +
|
||||
dev->dev->caps.num_uars,
|
||||
PAGE_SIZE, vma->vm_page_prot))
|
||||
return -EAGAIN;
|
||||
|
||||
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
|
||||
|
||||
} else if (vma->vm_pgoff == 3) {
|
||||
case 3: {
|
||||
struct mlx4_clock_params params;
|
||||
int ret;
|
||||
|
||||
/* We prevent double mmaping on same context */
|
||||
if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
|
||||
return -EINVAL;
|
||||
|
||||
ret = mlx4_get_internal_clock_params(dev->dev, ¶ms);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
if (io_remap_pfn_range(vma, vma->vm_start,
|
||||
(pci_resource_start(dev->dev->persist->pdev,
|
||||
params.bar) +
|
||||
params.offset)
|
||||
>> PAGE_SHIFT,
|
||||
PAGE_SIZE, vma->vm_page_prot))
|
||||
return -EAGAIN;
|
||||
|
||||
mlx4_ib_set_vma_data(vma,
|
||||
&mucontext->hw_bar_info[HW_BAR_CLOCK]);
|
||||
} else {
|
||||
return -EINVAL;
|
||||
return rdma_user_mmap_io(
|
||||
context, vma,
|
||||
(pci_resource_start(dev->dev->persist->pdev,
|
||||
params.bar) +
|
||||
params.offset) >>
|
||||
PAGE_SHIFT,
|
||||
PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
|
||||
}
|
||||
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
|
||||
@ -2133,39 +2039,43 @@ static int init_node_data(struct mlx4_ib_dev *dev)
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hca_type_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct mlx4_ib_dev *dev =
|
||||
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
||||
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hca_type);
|
||||
|
||||
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t hw_rev_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct mlx4_ib_dev *dev =
|
||||
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
||||
return sprintf(buf, "%x\n", dev->dev->rev_id);
|
||||
}
|
||||
static DEVICE_ATTR_RO(hw_rev);
|
||||
|
||||
static ssize_t show_board(struct device *device, struct device_attribute *attr,
|
||||
char *buf)
|
||||
static ssize_t board_id_show(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct mlx4_ib_dev *dev =
|
||||
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
||||
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
|
||||
dev->dev->board_id);
|
||||
}
|
||||
static DEVICE_ATTR_RO(board_id);
|
||||
|
||||
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
|
||||
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
|
||||
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
|
||||
static struct attribute *mlx4_class_attributes[] = {
|
||||
&dev_attr_hw_rev.attr,
|
||||
&dev_attr_hca_type.attr,
|
||||
&dev_attr_board_id.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct device_attribute *mlx4_class_attributes[] = {
|
||||
&dev_attr_hw_rev,
|
||||
&dev_attr_hca_type,
|
||||
&dev_attr_board_id
|
||||
static const struct attribute_group mlx4_attr_group = {
|
||||
.attrs = mlx4_class_attributes,
|
||||
};
|
||||
|
||||
struct diag_counter {
|
||||
@ -2636,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
|
||||
ibdev->dev = dev;
|
||||
ibdev->bond_next_port = 0;
|
||||
|
||||
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
|
||||
ibdev->ib_dev.owner = THIS_MODULE;
|
||||
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
|
||||
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
|
||||
@ -2898,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
|
||||
if (mlx4_ib_alloc_diag_counters(ibdev))
|
||||
goto err_steer_free_bitmap;
|
||||
|
||||
rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
|
||||
ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
|
||||
if (ib_register_device(&ibdev->ib_dev, NULL))
|
||||
if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
|
||||
goto err_diag_counters;
|
||||
|
||||
if (mlx4_ib_mad_init(ibdev))
|
||||
@ -2922,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
|
||||
goto err_notif;
|
||||
}
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
|
||||
if (device_create_file(&ibdev->ib_dev.dev,
|
||||
mlx4_class_attributes[j]))
|
||||
goto err_notif;
|
||||
}
|
||||
|
||||
ibdev->ib_active = true;
|
||||
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
|
||||
devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
|
||||
|
@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
|
||||
if (!list_empty(&group->pending_list))
|
||||
req = list_first_entry(&group->pending_list,
|
||||
struct mcast_req, group_list);
|
||||
if ((method == IB_MGMT_METHOD_GET_RESP)) {
|
||||
if (method == IB_MGMT_METHOD_GET_RESP) {
|
||||
if (req) {
|
||||
send_reply_to_slave(req->func, group, &req->sa_mad, status);
|
||||
--group->func[req->func].num_pend_reqs;
|
||||
|
@ -80,16 +80,11 @@ enum hw_bar_type {
|
||||
HW_BAR_COUNT
|
||||
};
|
||||
|
||||
struct mlx4_ib_vma_private_data {
|
||||
struct vm_area_struct *vma;
|
||||
};
|
||||
|
||||
struct mlx4_ib_ucontext {
|
||||
struct ib_ucontext ibucontext;
|
||||
struct mlx4_uar uar;
|
||||
struct list_head db_page_list;
|
||||
struct mutex db_page_mutex;
|
||||
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
|
||||
struct list_head wqn_ranges_list;
|
||||
struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
|
||||
};
|
||||
|
@ -2629,7 +2629,6 @@ enum {
|
||||
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
||||
int attr_mask, struct ib_udata *udata)
|
||||
{
|
||||
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
|
||||
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
|
||||
struct mlx4_ib_qp *qp = to_mqp(ibqp);
|
||||
enum ib_qp_state cur_state, new_state;
|
||||
@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
||||
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
|
||||
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
|
||||
|
||||
if (cur_state != new_state || cur_state != IB_QPS_RESET) {
|
||||
int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
|
||||
ll = rdma_port_get_link_layer(&dev->ib_dev, port);
|
||||
}
|
||||
|
||||
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
|
||||
attr_mask, ll)) {
|
||||
attr_mask)) {
|
||||
pr_debug("qpn 0x%x: invalid attribute mask specified "
|
||||
"for transition %d to %d. qp_type %d,"
|
||||
" attr_mask 0x%x\n",
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user