linux_dsm_epyc7002/drivers/infiniband/hw/hfi1/netdev_rx.c
Dennis Dalessandro b46925a24a IB/hfi1: Restore kfree in dummy_netdev cleanup
We need to do some rework on the dummy netdev. Calling the free_netdev()
would normally make sense, and that will be addressed in an upcoming
patch. For now just revert the behavior to what it was before keeping the
unused variable removal part of the patch.

The dd->dumm_netdev is mainly used for packet receiving through
alloc_netdev_mqs() for typical net devices. A a result, it should be freed
with kfree instead of free_netdev() that leads to a crash when unloading
the hfi1 module:

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 8000000855b54067 P4D 8000000855b54067 PUD 84a4f5067 PMD 0
  Oops: 0000 [#1] SMP PTI
  CPU: 73 PID: 10299 Comm: modprobe Not tainted 5.6.0-rc5+ #1
  Hardware name: Intel Corporation S2600WT2R/S2600WT2R, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016
  RIP: 0010:__hw_addr_flush+0x12/0x80
  Code: 40 00 48 83 c4 08 4c 89 e7 5b 5d 41 5c e9 76 77 18 00 66 0f 1f 44 00 00 0f 1f 44 00 00 41 54 49 89 fc 55 53 48 8b 1f 48 39 df <48> 8b 2b 75 08 eb 4a 48 89 eb 48 89 c5 48 89 df e8 99 bf d0 ff 84
  RSP: 0018:ffffb40e08783db8 EFLAGS: 00010282
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002
  RDX: ffffb40e00000000 RSI: 0000000000000246 RDI: ffff88ab13662298
  RBP: ffff88ab13662000 R08: 0000000000001549 R09: 0000000000001549
  R10: 0000000000000001 R11: 0000000000aaaaaa R12: ffff88ab13662298
  R13: ffff88ab1b259e20 R14: ffff88ab1b259e42 R15: 0000000000000000
  FS:  00007fb39b534740(0000) GS:ffff88b31f940000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 000000084d3ea004 CR4: 00000000003606e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   dev_addr_flush+0x15/0x30
   free_netdev+0x7e/0x130
   hfi1_netdev_free+0x59/0x70 [hfi1]
   remove_one+0x65/0x110 [hfi1]
   pci_device_remove+0x3b/0xc0
   device_release_driver_internal+0xec/0x1b0
   driver_detach+0x46/0x90
   bus_remove_driver+0x58/0xd0
   pci_unregister_driver+0x26/0xa0
   hfi1_mod_cleanup+0xc/0xd54 [hfi1]
   __x64_sys_delete_module+0x16c/0x260
   ? exit_to_usermode_loop+0xa4/0xc0
   do_syscall_64+0x5b/0x200
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 193ba03141 ("IB/hfi1: Use free_netdev() in hfi1_netdev_free()")
Link: https://lore.kernel.org/r/20200623203224.106975.16926.stgit@awfm-01.aw.intel.com
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-24 15:54:08 -03:00

482 lines
12 KiB
C

// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2020 Intel Corporation.
*
*/
/*
* This file contains HFI1 support for netdev RX functionality
*/
#include "sdma.h"
#include "verbs.h"
#include "netdev.h"
#include "hfi.h"
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <rdma/ib_verbs.h>
static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_priv *priv,
struct hfi1_ctxtdata *uctxt)
{
unsigned int rcvctrl_ops;
struct hfi1_devdata *dd = priv->dd;
int ret;
uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions;
uctxt->do_interrupt = &handle_receive_interrupt_napi_sp;
/* Now allocate the RcvHdr queue and eager buffers. */
ret = hfi1_create_rcvhdrq(dd, uctxt);
if (ret)
goto done;
ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
goto done;
clear_rcvhdrtail(uctxt);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS;
rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS;
if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
done:
return ret;
}
static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd,
struct hfi1_ctxtdata **ctxt)
{
struct hfi1_ctxtdata *uctxt;
int ret;
if (dd->flags & HFI1_FROZEN)
return -EIO;
ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
if (ret < 0) {
dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
return -ENOMEM;
}
uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
HFI1_CAP_KGET(NODROP_RHQ_FULL) |
HFI1_CAP_KGET(NODROP_EGR_FULL) |
HFI1_CAP_KGET(DMA_RTAIL);
/* Netdev contexts are always NO_RDMA_RTAIL */
uctxt->fast_handler = handle_receive_interrupt_napi_fp;
uctxt->slow_handler = handle_receive_interrupt_napi_sp;
hfi1_set_seq_cnt(uctxt, 1);
uctxt->is_vnic = true;
hfi1_stats.sps_ctxts++;
dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt);
*ctxt = uctxt;
return 0;
}
static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd,
struct hfi1_ctxtdata *uctxt)
{
flush_wc();
/*
* Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values.
*/
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
HFI1_RCVCTRL_TIDFLOW_DIS |
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS)
msix_free_irq(dd, uctxt->msix_intr);
uctxt->msix_intr = CCE_NUM_MSIX_VECTORS;
uctxt->event_flags = 0;
hfi1_clear_tids(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
hfi1_stats.sps_ctxts--;
hfi1_free_ctxt(uctxt);
}
static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv,
struct hfi1_ctxtdata **ctxt)
{
int rc;
struct hfi1_devdata *dd = priv->dd;
rc = hfi1_netdev_allocate_ctxt(dd, ctxt);
if (rc) {
dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc);
return rc;
}
rc = hfi1_netdev_setup_ctxt(priv, *ctxt);
if (rc) {
dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc);
hfi1_netdev_deallocate_ctxt(dd, *ctxt);
*ctxt = NULL;
}
return rc;
}
/**
* hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
* @dd: device on which to allocate netdev contexts
* @available_contexts: count of available receive contexts
* @cpu_mask: mask of possible cpus to include for contexts
*
* Return: count of physical cores on a node or the remaining available recv
* contexts for netdev recv context usage up to the maximum of
* HFI1_MAX_NETDEV_CTXTS.
* A value of 0 can be returned when acceleration is explicitly turned off,
* a memory allocation error occurs or when there are no available contexts.
*
*/
u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
struct cpumask *cpu_mask)
{
cpumask_var_t node_cpu_mask;
unsigned int available_cpus;
if (!HFI1_CAP_IS_KSET(AIP))
return 0;
/* Always give user contexts priority over netdev contexts */
if (available_contexts == 0) {
dd_dev_info(dd, "No receive contexts available for netdevs.\n");
return 0;
}
if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
return 0;
}
cpumask_and(node_cpu_mask, cpu_mask,
cpumask_of_node(pcibus_to_node(dd->pcidev->bus)));
available_cpus = cpumask_weight(node_cpu_mask);
free_cpumask_var(node_cpu_mask);
return min3(available_cpus, available_contexts,
(u32)HFI1_MAX_NETDEV_CTXTS);
}
static int hfi1_netdev_rxq_init(struct net_device *dev)
{
int i;
int rc;
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
struct hfi1_devdata *dd = priv->dd;
priv->num_rx_q = dd->num_netdev_contexts;
priv->rxq = kcalloc_node(priv->num_rx_q, sizeof(struct hfi1_netdev_rxq),
GFP_KERNEL, dd->node);
if (!priv->rxq) {
dd_dev_err(dd, "Unable to allocate netdev queue data\n");
return (-ENOMEM);
}
for (i = 0; i < priv->num_rx_q; i++) {
struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
rc = hfi1_netdev_allot_ctxt(priv, &rxq->rcd);
if (rc)
goto bail_context_irq_failure;
hfi1_rcd_get(rxq->rcd);
rxq->priv = priv;
rxq->rcd->napi = &rxq->napi;
dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n",
i, rxq->rcd->ctxt);
/*
* Disable BUSY_POLL on this NAPI as this is not supported
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;
}
return 0;
bail_context_irq_failure:
dd_dev_err(dd, "Unable to allot receive context\n");
for (; i >= 0; i--) {
struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
if (rxq->rcd) {
hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
hfi1_rcd_put(rxq->rcd);
rxq->rcd = NULL;
}
}
kfree(priv->rxq);
priv->rxq = NULL;
return rc;
}
static void hfi1_netdev_rxq_deinit(struct net_device *dev)
{
int i;
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
struct hfi1_devdata *dd = priv->dd;
for (i = 0; i < priv->num_rx_q; i++) {
struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
netif_napi_del(&rxq->napi);
hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
hfi1_rcd_put(rxq->rcd);
rxq->rcd = NULL;
}
kfree(priv->rxq);
priv->rxq = NULL;
priv->num_rx_q = 0;
}
static void enable_queues(struct hfi1_netdev_priv *priv)
{
int i;
for (i = 0; i < priv->num_rx_q; i++) {
struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
dd_dev_info(priv->dd, "enabling queue %d on context %d\n", i,
rxq->rcd->ctxt);
napi_enable(&rxq->napi);
hfi1_rcvctrl(priv->dd,
HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB,
rxq->rcd);
}
}
static void disable_queues(struct hfi1_netdev_priv *priv)
{
int i;
msix_netdev_synchronize_irq(priv->dd);
for (i = 0; i < priv->num_rx_q; i++) {
struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
dd_dev_info(priv->dd, "disabling queue %d on context %d\n", i,
rxq->rcd->ctxt);
/* wait for napi if it was scheduled */
hfi1_rcvctrl(priv->dd,
HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS,
rxq->rcd);
napi_synchronize(&rxq->napi);
napi_disable(&rxq->napi);
}
}
/**
* hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time,
* it allocates receive queue data and calls netif_napi_add
* for each queue.
*
* @dd: hfi1 dev data
*/
int hfi1_netdev_rx_init(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
int res;
if (atomic_fetch_inc(&priv->netdevs))
return 0;
mutex_lock(&hfi1_mutex);
init_dummy_netdev(dd->dummy_netdev);
res = hfi1_netdev_rxq_init(dd->dummy_netdev);
mutex_unlock(&hfi1_mutex);
return res;
}
/**
* hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0
* napi is deleted and receive queses memory is freed.
*
* @dd: hfi1 dev data
*/
int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
/* destroy the RX queues only if it is the last netdev going away */
if (atomic_fetch_add_unless(&priv->netdevs, -1, 0) == 1) {
mutex_lock(&hfi1_mutex);
hfi1_netdev_rxq_deinit(dd->dummy_netdev);
mutex_unlock(&hfi1_mutex);
}
return 0;
}
/**
* hfi1_netdev_alloc - Allocates netdev and private data. It is required
* because RMT index and MSI-X interrupt can be set only
* during driver initialization.
*
* @dd: hfi1 dev data
*/
int hfi1_netdev_alloc(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv;
const int netdev_size = sizeof(*dd->dummy_netdev) +
sizeof(struct hfi1_netdev_priv);
dd_dev_info(dd, "allocating netdev size %d\n", netdev_size);
dd->dummy_netdev = kcalloc_node(1, netdev_size, GFP_KERNEL, dd->node);
if (!dd->dummy_netdev)
return -ENOMEM;
priv = hfi1_netdev_priv(dd->dummy_netdev);
priv->dd = dd;
xa_init(&priv->dev_tbl);
atomic_set(&priv->enabled, 0);
atomic_set(&priv->netdevs, 0);
return 0;
}
void hfi1_netdev_free(struct hfi1_devdata *dd)
{
if (dd->dummy_netdev) {
dd_dev_info(dd, "hfi1 netdev freed\n");
kfree(dd->dummy_netdev);
dd->dummy_netdev = NULL;
}
}
/**
* hfi1_netdev_enable_queues - This is napi enable function.
* It enables napi objects associated with queues.
* When at least one device has called it it increments atomic counter.
* Disable function decrements counter and when it is 0,
* calls napi_disable for every queue.
*
* @dd: hfi1 dev data
*/
void hfi1_netdev_enable_queues(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv;
if (!dd->dummy_netdev)
return;
priv = hfi1_netdev_priv(dd->dummy_netdev);
if (atomic_fetch_inc(&priv->enabled))
return;
mutex_lock(&hfi1_mutex);
enable_queues(priv);
mutex_unlock(&hfi1_mutex);
}
void hfi1_netdev_disable_queues(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv;
if (!dd->dummy_netdev)
return;
priv = hfi1_netdev_priv(dd->dummy_netdev);
if (atomic_dec_if_positive(&priv->enabled))
return;
mutex_lock(&hfi1_mutex);
disable_queues(priv);
mutex_unlock(&hfi1_mutex);
}
/**
* hfi1_netdev_add_data - Registers data with unique identifier
* to be requested later this is needed for VNIC and IPoIB VLANs
* implementations.
* This call is protected by mutex idr_lock.
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
* @data: data to be associated with index
*/
int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT);
}
/**
* hfi1_netdev_remove_data - Removes data with previously given id.
* Returns the reference to removed entry.
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return xa_erase(&priv->dev_tbl, id);
}
/**
* hfi1_netdev_get_data - Gets data with given id
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return xa_load(&priv->dev_tbl, id);
}
/**
* hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
unsigned long index = *start_id;
void *ret;
ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT);
*start_id = (int)index;
return ret;
}