linux_dsm_epyc7002/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
Sunil Goutham 773225388d net: thunderx: Optimize page recycling for XDP
Driver follows a method of taking one extra reference on the
page for recycling which is fine in usual packet path where
each 64KB page is segmented into multiple receive buffers.

But in XDP mode since there is just one receive buffer per
page taking extra page reference itself becomes big bottleneck
consuming ~50% of CPU cycles due to atomic operations.

This patch adds a internal ref count in pgcache for each
page and additional page references are taken in a batch
instead of just one at a time. Internal i.e 'pgcache->ref_count'
and page's i.e 'page->_refcount' counters are compared to check
page's recyclability.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-02 15:41:22 -04:00

1948 lines
51 KiB
C

/*
* Copyright (C) 2015 Cavium, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*/
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/ip.h>
#include <linux/etherdevice.h>
#include <linux/iommu.h>
#include <net/ip.h>
#include <net/tso.h>
#include "nic_reg.h"
#include "nic.h"
#include "q_struct.h"
#include "nicvf_queues.h"
static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
int size, u64 data);
static void nicvf_get_page(struct nicvf *nic)
{
if (!nic->rb_pageref || !nic->rb_page)
return;
page_ref_add(nic->rb_page, nic->rb_pageref);
nic->rb_pageref = 0;
}
/* Poll a register for a specific value */
static int nicvf_poll_reg(struct nicvf *nic, int qidx,
u64 reg, int bit_pos, int bits, int val)
{
u64 bit_mask;
u64 reg_val;
int timeout = 10;
bit_mask = (1ULL << bits) - 1;
bit_mask = (bit_mask << bit_pos);
while (timeout) {
reg_val = nicvf_queue_reg_read(nic, reg, qidx);
if (((reg_val & bit_mask) >> bit_pos) == val)
return 0;
usleep_range(1000, 2000);
timeout--;
}
netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
return 1;
}
/* Allocate memory for a queue's descriptors */
static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
int q_len, int desc_size, int align_bytes)
{
dmem->q_len = q_len;
dmem->size = (desc_size * q_len) + align_bytes;
/* Save address, need it while freeing */
dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
&dmem->dma, GFP_KERNEL);
if (!dmem->unalign_base)
return -ENOMEM;
/* Align memory address for 'align_bytes' */
dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
return 0;
}
/* Free queue's descriptor memory */
static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
{
if (!dmem)
return;
dma_free_coherent(&nic->pdev->dev, dmem->size,
dmem->unalign_base, dmem->dma);
dmem->unalign_base = NULL;
dmem->base = NULL;
}
#define XDP_PAGE_REFCNT_REFILL 256
/* Allocate a new page or recycle one if possible
*
* We cannot optimize dma mapping here, since
* 1. It's only one RBDR ring for 8 Rx queues.
* 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
* and not idx into RBDR ring, so can't refer to saved info.
* 3. There are multiple receive buffers per page
*/
static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
struct rbdr *rbdr, gfp_t gfp)
{
int ref_count;
struct page *page = NULL;
struct pgcache *pgcache, *next;
/* Check if page is already allocated */
pgcache = &rbdr->pgcache[rbdr->pgidx];
page = pgcache->page;
/* Check if page can be recycled */
if (page) {
ref_count = page_ref_count(page);
/* Check if this page has been used once i.e 'put_page'
* called after packet transmission i.e internal ref_count
* and page's ref_count are equal i.e page can be recycled.
*/
if (rbdr->is_xdp && (ref_count == pgcache->ref_count))
pgcache->ref_count--;
else
page = NULL;
/* In non-XDP mode, page's ref_count needs to be '1' for it
* to be recycled.
*/
if (!rbdr->is_xdp && (ref_count != 1))
page = NULL;
}
if (!page) {
page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
if (!page)
return NULL;
this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
/* Check for space */
if (rbdr->pgalloc >= rbdr->pgcnt) {
/* Page can still be used */
nic->rb_page = page;
return NULL;
}
/* Save the page in page cache */
pgcache->page = page;
pgcache->dma_addr = 0;
pgcache->ref_count = 0;
rbdr->pgalloc++;
}
/* Take additional page references for recycling */
if (rbdr->is_xdp) {
/* Since there is single RBDR (i.e single core doing
* page recycling) per 8 Rx queues, in XDP mode adjusting
* page references atomically is the biggest bottleneck, so
* take bunch of references at a time.
*
* So here, below reference counts defer by '1'.
*/
if (!pgcache->ref_count) {
pgcache->ref_count = XDP_PAGE_REFCNT_REFILL;
page_ref_add(page, XDP_PAGE_REFCNT_REFILL);
}
} else {
/* In non-XDP case, single 64K page is divided across multiple
* receive buffers, so cost of recycling is less anyway.
* So we can do with just one extra reference.
*/
page_ref_add(page, 1);
}
rbdr->pgidx++;
rbdr->pgidx &= (rbdr->pgcnt - 1);
/* Prefetch refcount of next page in page cache */
next = &rbdr->pgcache[rbdr->pgidx];
page = next->page;
if (page)
prefetch(&page->_refcount);
return pgcache;
}
/* Allocate buffer for packet reception */
static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
gfp_t gfp, u32 buf_len, u64 *rbuf)
{
struct pgcache *pgcache = NULL;
/* Check if request can be accomodated in previous allocated page.
* But in XDP mode only one buffer per page is permitted.
*/
if (!rbdr->is_xdp && nic->rb_page &&
((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
nic->rb_pageref++;
goto ret;
}
nicvf_get_page(nic);
nic->rb_page = NULL;
/* Get new page, either recycled or new one */
pgcache = nicvf_alloc_page(nic, rbdr, gfp);
if (!pgcache && !nic->rb_page) {
this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
return -ENOMEM;
}
nic->rb_page_offset = 0;
/* Reserve space for header modifications by BPF program */
if (rbdr->is_xdp)
buf_len += XDP_PACKET_HEADROOM;
/* Check if it's recycled */
if (pgcache)
nic->rb_page = pgcache->page;
ret:
if (rbdr->is_xdp && pgcache && pgcache->dma_addr) {
*rbuf = pgcache->dma_addr;
} else {
/* HW will ensure data coherency, CPU sync not required */
*rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
nic->rb_page_offset, buf_len,
DMA_FROM_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
if (!nic->rb_page_offset)
__free_pages(nic->rb_page, 0);
nic->rb_page = NULL;
return -ENOMEM;
}
if (pgcache)
pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
nic->rb_page_offset += buf_len;
}
return 0;
}
/* Build skb around receive buffer */
static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
u64 rb_ptr, int len)
{
void *data;
struct sk_buff *skb;
data = phys_to_virt(rb_ptr);
/* Now build an skb to give to stack */
skb = build_skb(data, RCV_FRAG_LEN);
if (!skb) {
put_page(virt_to_page(data));
return NULL;
}
prefetch(skb->data);
return skb;
}
/* Allocate RBDR ring and populate receive buffers */
static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
int ring_len, int buf_size)
{
int idx;
u64 rbuf;
struct rbdr_entry_t *desc;
int err;
err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
sizeof(struct rbdr_entry_t),
NICVF_RCV_BUF_ALIGN_BYTES);
if (err)
return err;
rbdr->desc = rbdr->dmem.base;
/* Buffer size has to be in multiples of 128 bytes */
rbdr->dma_size = buf_size;
rbdr->enable = true;
rbdr->thresh = RBDR_THRESH;
rbdr->head = 0;
rbdr->tail = 0;
/* Initialize page recycling stuff.
*
* Can't use single buffer per page especially with 64K pages.
* On embedded platforms i.e 81xx/83xx available memory itself
* is low and minimum ring size of RBDR is 8K, that takes away
* lots of memory.
*
* But for XDP it has to be a single buffer per page.
*/
if (!nic->pnicvf->xdp_prog) {
rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
rbdr->is_xdp = false;
} else {
rbdr->pgcnt = ring_len;
rbdr->is_xdp = true;
}
rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) *
rbdr->pgcnt, GFP_KERNEL);
if (!rbdr->pgcache)
return -ENOMEM;
rbdr->pgidx = 0;
rbdr->pgalloc = 0;
nic->rb_page = NULL;
for (idx = 0; idx < ring_len; idx++) {
err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
RCV_FRAG_LEN, &rbuf);
if (err) {
/* To free already allocated and mapped ones */
rbdr->tail = idx - 1;
return err;
}
desc = GET_RBDR_DESC(rbdr, idx);
desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
}
nicvf_get_page(nic);
return 0;
}
/* Free RBDR ring and its receive buffers */
static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
{
int head, tail;
u64 buf_addr, phys_addr;
struct pgcache *pgcache;
struct rbdr_entry_t *desc;
if (!rbdr)
return;
rbdr->enable = false;
if (!rbdr->dmem.base)
return;
head = rbdr->head;
tail = rbdr->tail;
/* Release page references */
while (head != tail) {
desc = GET_RBDR_DESC(rbdr, head);
buf_addr = desc->buf_addr;
phys_addr = nicvf_iova_to_phys(nic, buf_addr);
dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
if (phys_addr)
put_page(virt_to_page(phys_to_virt(phys_addr)));
head++;
head &= (rbdr->dmem.q_len - 1);
}
/* Release buffer of tail desc */
desc = GET_RBDR_DESC(rbdr, tail);
buf_addr = desc->buf_addr;
phys_addr = nicvf_iova_to_phys(nic, buf_addr);
dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
if (phys_addr)
put_page(virt_to_page(phys_to_virt(phys_addr)));
/* Sync page cache info */
smp_rmb();
/* Release additional page references held for recycling */
head = 0;
while (head < rbdr->pgcnt) {
pgcache = &rbdr->pgcache[head];
if (pgcache->page && page_ref_count(pgcache->page) != 0) {
if (!rbdr->is_xdp) {
put_page(pgcache->page);
continue;
}
page_ref_sub(pgcache->page, pgcache->ref_count - 1);
put_page(pgcache->page);
}
head++;
}
/* Free RBDR ring */
nicvf_free_q_desc_mem(nic, &rbdr->dmem);
}
/* Refill receive buffer descriptors with new buffers.
*/
static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
{
struct queue_set *qs = nic->qs;
int rbdr_idx = qs->rbdr_cnt;
int tail, qcount;
int refill_rb_cnt;
struct rbdr *rbdr;
struct rbdr_entry_t *desc;
u64 rbuf;
int new_rb = 0;
refill:
if (!rbdr_idx)
return;
rbdr_idx--;
rbdr = &qs->rbdr[rbdr_idx];
/* Check if it's enabled */
if (!rbdr->enable)
goto next_rbdr;
/* Get no of desc's to be refilled */
qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
qcount &= 0x7FFFF;
/* Doorbell can be ringed with a max of ring size minus 1 */
if (qcount >= (qs->rbdr_len - 1))
goto next_rbdr;
else
refill_rb_cnt = qs->rbdr_len - qcount - 1;
/* Sync page cache info */
smp_rmb();
/* Start filling descs from tail */
tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
while (refill_rb_cnt) {
tail++;
tail &= (rbdr->dmem.q_len - 1);
if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
break;
desc = GET_RBDR_DESC(rbdr, tail);
desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
refill_rb_cnt--;
new_rb++;
}
nicvf_get_page(nic);
/* make sure all memory stores are done before ringing doorbell */
smp_wmb();
/* Check if buffer allocation failed */
if (refill_rb_cnt)
nic->rb_alloc_fail = true;
else
nic->rb_alloc_fail = false;
/* Notify HW */
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
rbdr_idx, new_rb);
next_rbdr:
/* Re-enable RBDR interrupts only if buffer allocation is success */
if (!nic->rb_alloc_fail && rbdr->enable &&
netif_running(nic->pnicvf->netdev))
nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
if (rbdr_idx)
goto refill;
}
/* Alloc rcv buffers in non-atomic mode for better success */
void nicvf_rbdr_work(struct work_struct *work)
{
struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
nicvf_refill_rbdr(nic, GFP_KERNEL);
if (nic->rb_alloc_fail)
schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
else
nic->rb_work_scheduled = false;
}
/* In Softirq context, alloc rcv buffers in atomic mode */
void nicvf_rbdr_task(unsigned long data)
{
struct nicvf *nic = (struct nicvf *)data;
nicvf_refill_rbdr(nic, GFP_ATOMIC);
if (nic->rb_alloc_fail) {
nic->rb_work_scheduled = true;
schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
}
}
/* Initialize completion queue */
static int nicvf_init_cmp_queue(struct nicvf *nic,
struct cmp_queue *cq, int q_len)
{
int err;
err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
NICVF_CQ_BASE_ALIGN_BYTES);
if (err)
return err;
cq->desc = cq->dmem.base;
cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
return 0;
}
static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
{
if (!cq)
return;
if (!cq->dmem.base)
return;
nicvf_free_q_desc_mem(nic, &cq->dmem);
}
/* Initialize transmit queue */
static int nicvf_init_snd_queue(struct nicvf *nic,
struct snd_queue *sq, int q_len, int qidx)
{
int err;
err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
NICVF_SQ_BASE_ALIGN_BYTES);
if (err)
return err;
sq->desc = sq->dmem.base;
sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
if (!sq->skbuff)
return -ENOMEM;
sq->head = 0;
sq->tail = 0;
sq->thresh = SND_QUEUE_THRESH;
/* Check if this SQ is a XDP TX queue */
if (nic->sqs_mode)
qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
if (qidx < nic->pnicvf->xdp_tx_queues) {
/* Alloc memory to save page pointers for XDP_TX */
sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
if (!sq->xdp_page)
return -ENOMEM;
sq->xdp_desc_cnt = 0;
sq->xdp_free_cnt = q_len - 1;
sq->is_xdp = true;
} else {
sq->xdp_page = NULL;
sq->xdp_desc_cnt = 0;
sq->xdp_free_cnt = 0;
sq->is_xdp = false;
atomic_set(&sq->free_cnt, q_len - 1);
/* Preallocate memory for TSO segment's header */
sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
q_len * TSO_HEADER_SIZE,
&sq->tso_hdrs_phys,
GFP_KERNEL);
if (!sq->tso_hdrs)
return -ENOMEM;
}
return 0;
}
void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
int hdr_sqe, u8 subdesc_cnt)
{
u8 idx;
struct sq_gather_subdesc *gather;
/* Unmap DMA mapped skb data buffers */
for (idx = 0; idx < subdesc_cnt; idx++) {
hdr_sqe++;
hdr_sqe &= (sq->dmem.q_len - 1);
gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
/* HW will ensure data coherency, CPU sync not required */
dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
gather->size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
}
}
static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
{
struct sk_buff *skb;
struct page *page;
struct sq_hdr_subdesc *hdr;
struct sq_hdr_subdesc *tso_sqe;
if (!sq)
return;
if (!sq->dmem.base)
return;
if (sq->tso_hdrs)
dma_free_coherent(&nic->pdev->dev,
sq->dmem.q_len * TSO_HEADER_SIZE,
sq->tso_hdrs, sq->tso_hdrs_phys);
/* Free pending skbs in the queue */
smp_rmb();
while (sq->head != sq->tail) {
skb = (struct sk_buff *)sq->skbuff[sq->head];
if (!skb || !sq->xdp_page)
goto next;
page = (struct page *)sq->xdp_page[sq->head];
if (!page)
goto next;
else
put_page(page);
hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
/* Check for dummy descriptor used for HW TSO offload on 88xx */
if (hdr->dont_send) {
/* Get actual TSO descriptors and unmap them */
tso_sqe =
(struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
tso_sqe->subdesc_cnt);
} else {
nicvf_unmap_sndq_buffers(nic, sq, sq->head,
hdr->subdesc_cnt);
}
if (skb)
dev_kfree_skb_any(skb);
next:
sq->head++;
sq->head &= (sq->dmem.q_len - 1);
}
kfree(sq->skbuff);
kfree(sq->xdp_page);
nicvf_free_q_desc_mem(nic, &sq->dmem);
}
static void nicvf_reclaim_snd_queue(struct nicvf *nic,
struct queue_set *qs, int qidx)
{
/* Disable send queue */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
/* Check if SQ is stopped */
if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
return;
/* Reset send queue */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
}
static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
struct queue_set *qs, int qidx)
{
union nic_mbx mbx = {};
/* Make sure all packets in the pipeline are written back into mem */
mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
nicvf_send_msg_to_pf(nic, &mbx);
}
static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
struct queue_set *qs, int qidx)
{
/* Disable timer threshold (doesn't get reset upon CQ reset */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
/* Disable completion queue */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
/* Reset completion queue */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
}
static void nicvf_reclaim_rbdr(struct nicvf *nic,
struct rbdr *rbdr, int qidx)
{
u64 tmp, fifo_state;
int timeout = 10;
/* Save head and tail pointers for feeing up buffers */
rbdr->head = nicvf_queue_reg_read(nic,
NIC_QSET_RBDR_0_1_HEAD,
qidx) >> 3;
rbdr->tail = nicvf_queue_reg_read(nic,
NIC_QSET_RBDR_0_1_TAIL,
qidx) >> 3;
/* If RBDR FIFO is in 'FAIL' state then do a reset first
* before relaiming.
*/
fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
if (((fifo_state >> 62) & 0x03) == 0x3)
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
qidx, NICVF_RBDR_RESET);
/* Disable RBDR */
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
return;
while (1) {
tmp = nicvf_queue_reg_read(nic,
NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
qidx);
if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
break;
usleep_range(1000, 2000);
timeout--;
if (!timeout) {
netdev_err(nic->netdev,
"Failed polling on prefetch status\n");
return;
}
}
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
qidx, NICVF_RBDR_RESET);
if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
return;
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
return;
}
void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
{
u64 rq_cfg;
int sqs;
rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
/* Enable first VLAN stripping */
if (features & NETIF_F_HW_VLAN_CTAG_RX)
rq_cfg |= (1ULL << 25);
else
rq_cfg &= ~(1ULL << 25);
nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
/* Configure Secondary Qsets, if any */
for (sqs = 0; sqs < nic->sqs_count; sqs++)
if (nic->snicvf[sqs])
nicvf_queue_reg_write(nic->snicvf[sqs],
NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
}
static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
{
union nic_mbx mbx = {};
/* Reset all RQ/SQ and VF stats */
mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
mbx.reset_stat.rx_stat_mask = 0x3FFF;
mbx.reset_stat.tx_stat_mask = 0x1F;
mbx.reset_stat.rq_stat_mask = 0xFFFF;
mbx.reset_stat.sq_stat_mask = 0xFFFF;
nicvf_send_msg_to_pf(nic, &mbx);
}
/* Configures receive queue */
static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
int qidx, bool enable)
{
union nic_mbx mbx = {};
struct rcv_queue *rq;
struct rq_cfg rq_cfg;
rq = &qs->rq[qidx];
rq->enable = enable;
/* Disable receive queue */
nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
if (!rq->enable) {
nicvf_reclaim_rcv_queue(nic, qs, qidx);
return;
}
rq->cq_qs = qs->vnic_id;
rq->cq_idx = qidx;
rq->start_rbdr_qs = qs->vnic_id;
rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
rq->cont_rbdr_qs = qs->vnic_id;
rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
/* all writes of RBDR data to be loaded into L2 Cache as well*/
rq->caching = 1;
/* Send a mailbox msg to PF to config RQ */
mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
mbx.rq.qs_num = qs->vnic_id;
mbx.rq.rq_num = qidx;
mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
(rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
(rq->cont_qs_rbdr_idx << 8) |
(rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
nicvf_send_msg_to_pf(nic, &mbx);
mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
(RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
(qs->vnic_id << 0);
nicvf_send_msg_to_pf(nic, &mbx);
/* RQ drop config
* Enable CQ drop to reserve sufficient CQEs for all tx packets
*/
mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
(RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
(RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
nicvf_send_msg_to_pf(nic, &mbx);
if (!nic->sqs_mode && (qidx == 0)) {
/* Enable checking L3/L4 length and TCP/UDP checksums
* Also allow IPv6 pkts with zero UDP checksum.
*/
nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
(BIT(24) | BIT(23) | BIT(21) | BIT(20)));
nicvf_config_vlan_stripping(nic, nic->netdev->features);
}
/* Enable Receive queue */
memset(&rq_cfg, 0, sizeof(struct rq_cfg));
rq_cfg.ena = 1;
rq_cfg.tcp_ena = 0;
nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
}
/* Configures completion queue */
void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
int qidx, bool enable)
{
struct cmp_queue *cq;
struct cq_cfg cq_cfg;
cq = &qs->cq[qidx];
cq->enable = enable;
if (!cq->enable) {
nicvf_reclaim_cmp_queue(nic, qs, qidx);
return;
}
/* Reset completion queue */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
if (!cq->enable)
return;
spin_lock_init(&cq->lock);
/* Set completion queue base address */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
qidx, (u64)(cq->dmem.phys_base));
/* Enable Completion queue */
memset(&cq_cfg, 0, sizeof(struct cq_cfg));
cq_cfg.ena = 1;
cq_cfg.reset = 0;
cq_cfg.caching = 0;
cq_cfg.qsize = ilog2(qs->cq_len >> 10);
cq_cfg.avg_con = 0;
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
/* Set threshold value for interrupt generation */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
qidx, CMP_QUEUE_TIMER_THRESH);
}
/* Configures transmit queue */
static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
int qidx, bool enable)
{
union nic_mbx mbx = {};
struct snd_queue *sq;
struct sq_cfg sq_cfg;
sq = &qs->sq[qidx];
sq->enable = enable;
if (!sq->enable) {
nicvf_reclaim_snd_queue(nic, qs, qidx);
return;
}
/* Reset send queue */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
sq->cq_qs = qs->vnic_id;
sq->cq_idx = qidx;
/* Send a mailbox msg to PF to config SQ */
mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
mbx.sq.qs_num = qs->vnic_id;
mbx.sq.sq_num = qidx;
mbx.sq.sqs_mode = nic->sqs_mode;
mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
nicvf_send_msg_to_pf(nic, &mbx);
/* Set queue base address */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
qidx, (u64)(sq->dmem.phys_base));
/* Enable send queue & set queue size */
memset(&sq_cfg, 0, sizeof(struct sq_cfg));
sq_cfg.ena = 1;
sq_cfg.reset = 0;
sq_cfg.ldwb = 0;
sq_cfg.qsize = ilog2(qs->sq_len >> 10);
sq_cfg.tstmp_bgx_intf = 0;
/* CQ's level at which HW will stop processing SQEs to avoid
* transmitting a pkt with no space in CQ to post CQE_TX.
*/
sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len;
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
/* Set threshold value for interrupt generation */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
/* Set queue:cpu affinity for better load distribution */
if (cpu_online(qidx)) {
cpumask_set_cpu(qidx, &sq->affinity_mask);
netif_set_xps_queue(nic->netdev,
&sq->affinity_mask, qidx);
}
}
/* Configures receive buffer descriptor ring */
static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
int qidx, bool enable)
{
struct rbdr *rbdr;
struct rbdr_cfg rbdr_cfg;
rbdr = &qs->rbdr[qidx];
nicvf_reclaim_rbdr(nic, rbdr, qidx);
if (!enable)
return;
/* Set descriptor base address */
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
qidx, (u64)(rbdr->dmem.phys_base));
/* Enable RBDR & set queue size */
/* Buffer size should be in multiples of 128 bytes */
memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
rbdr_cfg.ena = 1;
rbdr_cfg.reset = 0;
rbdr_cfg.ldwb = 0;
rbdr_cfg.qsize = RBDR_SIZE;
rbdr_cfg.avg_con = 0;
rbdr_cfg.lines = rbdr->dma_size / 128;
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
qidx, *(u64 *)&rbdr_cfg);
/* Notify HW */
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
qidx, qs->rbdr_len - 1);
/* Set threshold value for interrupt generation */
nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
qidx, rbdr->thresh - 1);
}
/* Requests PF to assign and enable Qset */
void nicvf_qset_config(struct nicvf *nic, bool enable)
{
union nic_mbx mbx = {};
struct queue_set *qs = nic->qs;
struct qs_cfg *qs_cfg;
if (!qs) {
netdev_warn(nic->netdev,
"Qset is still not allocated, don't init queues\n");
return;
}
qs->enable = enable;
qs->vnic_id = nic->vf_id;
/* Send a mailbox msg to PF to config Qset */
mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
mbx.qs.num = qs->vnic_id;
mbx.qs.sqs_count = nic->sqs_count;
mbx.qs.cfg = 0;
qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
if (qs->enable) {
qs_cfg->ena = 1;
#ifdef __BIG_ENDIAN
qs_cfg->be = 1;
#endif
qs_cfg->vnic = qs->vnic_id;
}
nicvf_send_msg_to_pf(nic, &mbx);
}
static void nicvf_free_resources(struct nicvf *nic)
{
int qidx;
struct queue_set *qs = nic->qs;
/* Free receive buffer descriptor ring */
for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
/* Free completion queue */
for (qidx = 0; qidx < qs->cq_cnt; qidx++)
nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
/* Free send queue */
for (qidx = 0; qidx < qs->sq_cnt; qidx++)
nicvf_free_snd_queue(nic, &qs->sq[qidx]);
}
static int nicvf_alloc_resources(struct nicvf *nic)
{
int qidx;
struct queue_set *qs = nic->qs;
/* Alloc receive buffer descriptor ring */
for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
DMA_BUFFER_LEN))
goto alloc_fail;
}
/* Alloc send queue */
for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
goto alloc_fail;
}
/* Alloc completion queue */
for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
goto alloc_fail;
}
return 0;
alloc_fail:
nicvf_free_resources(nic);
return -ENOMEM;
}
int nicvf_set_qset_resources(struct nicvf *nic)
{
struct queue_set *qs;
qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
if (!qs)
return -ENOMEM;
nic->qs = qs;
/* Set count of each queue */
qs->rbdr_cnt = DEFAULT_RBDR_CNT;
qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
/* Set queue lengths */
qs->rbdr_len = RCV_BUF_COUNT;
qs->sq_len = SND_QUEUE_LEN;
qs->cq_len = CMP_QUEUE_LEN;
nic->rx_queues = qs->rq_cnt;
nic->tx_queues = qs->sq_cnt;
nic->xdp_tx_queues = 0;
return 0;
}
int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
{
bool disable = false;
struct queue_set *qs = nic->qs;
struct queue_set *pqs = nic->pnicvf->qs;
int qidx;
if (!qs)
return 0;
/* Take primary VF's queue lengths.
* This is needed to take queue lengths set from ethtool
* into consideration.
*/
if (nic->sqs_mode && pqs) {
qs->cq_len = pqs->cq_len;
qs->sq_len = pqs->sq_len;
}
if (enable) {
if (nicvf_alloc_resources(nic))
return -ENOMEM;
for (qidx = 0; qidx < qs->sq_cnt; qidx++)
nicvf_snd_queue_config(nic, qs, qidx, enable);
for (qidx = 0; qidx < qs->cq_cnt; qidx++)
nicvf_cmp_queue_config(nic, qs, qidx, enable);
for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
nicvf_rbdr_config(nic, qs, qidx, enable);
for (qidx = 0; qidx < qs->rq_cnt; qidx++)
nicvf_rcv_queue_config(nic, qs, qidx, enable);
} else {
for (qidx = 0; qidx < qs->rq_cnt; qidx++)
nicvf_rcv_queue_config(nic, qs, qidx, disable);
for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
nicvf_rbdr_config(nic, qs, qidx, disable);
for (qidx = 0; qidx < qs->sq_cnt; qidx++)
nicvf_snd_queue_config(nic, qs, qidx, disable);
for (qidx = 0; qidx < qs->cq_cnt; qidx++)
nicvf_cmp_queue_config(nic, qs, qidx, disable);
nicvf_free_resources(nic);
}
/* Reset RXQ's stats.
* SQ's stats will get reset automatically once SQ is reset.
*/
nicvf_reset_rcv_queue_stats(nic);
return 0;
}
/* Get a free desc from SQ
* returns descriptor ponter & descriptor number
*/
static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
{
int qentry;
qentry = sq->tail;
if (!sq->is_xdp)
atomic_sub(desc_cnt, &sq->free_cnt);
else
sq->xdp_free_cnt -= desc_cnt;
sq->tail += desc_cnt;
sq->tail &= (sq->dmem.q_len - 1);
return qentry;
}
/* Rollback to previous tail pointer when descriptors not used */
static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
int qentry, int desc_cnt)
{
sq->tail = qentry;
atomic_add(desc_cnt, &sq->free_cnt);
}
/* Free descriptor back to SQ for future use */
void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
{
if (!sq->is_xdp)
atomic_add(desc_cnt, &sq->free_cnt);
else
sq->xdp_free_cnt += desc_cnt;
sq->head += desc_cnt;
sq->head &= (sq->dmem.q_len - 1);
}
static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
{
qentry++;
qentry &= (sq->dmem.q_len - 1);
return qentry;
}
void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
{
u64 sq_cfg;
sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
sq_cfg |= NICVF_SQ_EN;
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
/* Ring doorbell so that H/W restarts processing SQEs */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
}
void nicvf_sq_disable(struct nicvf *nic, int qidx)
{
u64 sq_cfg;
sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
sq_cfg &= ~NICVF_SQ_EN;
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
}
void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
int qidx)
{
u64 head, tail;
struct sk_buff *skb;
struct nicvf *nic = netdev_priv(netdev);
struct sq_hdr_subdesc *hdr;
head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
while (sq->head != head) {
hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
nicvf_put_sq_desc(sq, 1);
continue;
}
skb = (struct sk_buff *)sq->skbuff[sq->head];
if (skb)
dev_kfree_skb_any(skb);
atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
atomic64_add(hdr->tot_len,
(atomic64_t *)&netdev->stats.tx_bytes);
nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
}
}
/* XDP Transmit APIs */
void nicvf_xdp_sq_doorbell(struct nicvf *nic,
struct snd_queue *sq, int sq_num)
{
if (!sq->xdp_desc_cnt)
return;
/* make sure all memory stores are done before ringing doorbell */
wmb();
/* Inform HW to xmit all TSO segments */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
sq_num, sq->xdp_desc_cnt);
sq->xdp_desc_cnt = 0;
}
static inline void
nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
int subdesc_cnt, u64 data, int len)
{
struct sq_hdr_subdesc *hdr;
hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
memset(hdr, 0, SND_QUEUE_DESC_SIZE);
hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
hdr->subdesc_cnt = subdesc_cnt;
hdr->tot_len = len;
hdr->post_cqe = 1;
sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
}
int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
u64 bufaddr, u64 dma_addr, u16 len)
{
int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
int qentry;
if (subdesc_cnt > sq->xdp_free_cnt)
return 0;
qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
qentry = nicvf_get_nxt_sqentry(sq, qentry);
nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
sq->xdp_desc_cnt += subdesc_cnt;
return 1;
}
/* Calculate no of SQ subdescriptors needed to transmit all
* segments of this TSO packet.
* Taken from 'Tilera network driver' with a minor modification.
*/
static int nicvf_tso_count_subdescs(struct sk_buff *skb)
{
struct skb_shared_info *sh = skb_shinfo(skb);
unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
unsigned int data_len = skb->len - sh_len;
unsigned int p_len = sh->gso_size;
long f_id = -1; /* id of the current fragment */
long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
long f_used = 0; /* bytes used from the current fragment */
long n; /* size of the current piece of payload */
int num_edescs = 0;
int segment;
for (segment = 0; segment < sh->gso_segs; segment++) {
unsigned int p_used = 0;
/* One edesc for header and for each piece of the payload. */
for (num_edescs++; p_used < p_len; num_edescs++) {
/* Advance as needed. */
while (f_used >= f_size) {
f_id++;
f_size = skb_frag_size(&sh->frags[f_id]);
f_used = 0;
}
/* Use bytes from the current fragment. */
n = p_len - p_used;
if (n > f_size - f_used)
n = f_size - f_used;
f_used += n;
p_used += n;
}
/* The last segment may be less than gso_size. */
data_len -= p_len;
if (data_len < p_len)
p_len = data_len;
}
/* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
return num_edescs + sh->gso_segs;
}
#define POST_CQE_DESC_COUNT 2
/* Get the number of SQ descriptors needed to xmit this skb */
static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
{
int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
subdesc_cnt = nicvf_tso_count_subdescs(skb);
return subdesc_cnt;
}
/* Dummy descriptors to get TSO pkt completion notification */
if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
subdesc_cnt += POST_CQE_DESC_COUNT;
if (skb_shinfo(skb)->nr_frags)
subdesc_cnt += skb_shinfo(skb)->nr_frags;
return subdesc_cnt;
}
/* Add SQ HEADER subdescriptor.
* First subdescriptor for every send descriptor.
*/
static inline void
nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
int subdesc_cnt, struct sk_buff *skb, int len)
{
int proto;
struct sq_hdr_subdesc *hdr;
union {
struct iphdr *v4;
struct ipv6hdr *v6;
unsigned char *hdr;
} ip;
ip.hdr = skb_network_header(skb);
hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
memset(hdr, 0, SND_QUEUE_DESC_SIZE);
hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
/* post_cqe = 0, to avoid HW posting a CQE for every TSO
* segment transmitted on 88xx.
*/
hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
} else {
sq->skbuff[qentry] = (u64)skb;
/* Enable notification via CQE after processing SQE */
hdr->post_cqe = 1;
/* No of subdescriptors following this */
hdr->subdesc_cnt = subdesc_cnt;
}
hdr->tot_len = len;
/* Offload checksum calculation to HW */
if (skb->ip_summed == CHECKSUM_PARTIAL) {
hdr->csum_l3 = 1; /* Enable IP csum calculation */
hdr->l3_offset = skb_network_offset(skb);
hdr->l4_offset = skb_transport_offset(skb);
proto = (ip.v4->version == 4) ? ip.v4->protocol :
ip.v6->nexthdr;
switch (proto) {
case IPPROTO_TCP:
hdr->csum_l4 = SEND_L4_CSUM_TCP;
break;
case IPPROTO_UDP:
hdr->csum_l4 = SEND_L4_CSUM_UDP;
break;
case IPPROTO_SCTP:
hdr->csum_l4 = SEND_L4_CSUM_SCTP;
break;
}
}
if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
hdr->tso = 1;
hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb);
hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
/* For non-tunneled pkts, point this to L2 ethertype */
hdr->inner_l3_offset = skb_network_offset(skb) - 2;
this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
}
}
/* SQ GATHER subdescriptor
* Must follow HDR descriptor
*/
static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
int size, u64 data)
{
struct sq_gather_subdesc *gather;
qentry &= (sq->dmem.q_len - 1);
gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
memset(gather, 0, SND_QUEUE_DESC_SIZE);
gather->subdesc_type = SQ_DESC_TYPE_GATHER;
gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
gather->size = size;
gather->addr = data;
}
/* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
* packet so that a CQE is posted as a notifation for transmission of
* TSO packet.
*/
static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
int tso_sqe, struct sk_buff *skb)
{
struct sq_imm_subdesc *imm;
struct sq_hdr_subdesc *hdr;
sq->skbuff[qentry] = (u64)skb;
hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
memset(hdr, 0, SND_QUEUE_DESC_SIZE);
hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
/* Enable notification via CQE after processing SQE */
hdr->post_cqe = 1;
/* There is no packet to transmit here */
hdr->dont_send = 1;
hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
hdr->tot_len = 1;
/* Actual TSO header SQE index, needed for cleanup */
hdr->rsvd2 = tso_sqe;
qentry = nicvf_get_nxt_sqentry(sq, qentry);
imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
memset(imm, 0, SND_QUEUE_DESC_SIZE);
imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
imm->len = 1;
}
static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
int sq_num, int desc_cnt)
{
struct netdev_queue *txq;
txq = netdev_get_tx_queue(nic->pnicvf->netdev,
skb_get_queue_mapping(skb));
netdev_tx_sent_queue(txq, skb->len);
/* make sure all memory stores are done before ringing doorbell */
smp_wmb();
/* Inform HW to xmit all TSO segments */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
sq_num, desc_cnt);
}
/* Segment a TSO packet into 'gso_size' segments and append
* them to SQ for transfer
*/
static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
int sq_num, int qentry, struct sk_buff *skb)
{
struct tso_t tso;
int seg_subdescs = 0, desc_cnt = 0;
int seg_len, total_len, data_left;
int hdr_qentry = qentry;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
tso_start(skb, &tso);
total_len = skb->len - hdr_len;
while (total_len > 0) {
char *hdr;
/* Save Qentry for adding HDR_SUBDESC at the end */
hdr_qentry = qentry;
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
total_len -= data_left;
/* Add segment's header */
qentry = nicvf_get_nxt_sqentry(sq, qentry);
hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len,
sq->tso_hdrs_phys +
qentry * TSO_HEADER_SIZE);
/* HDR_SUDESC + GATHER */
seg_subdescs = 2;
seg_len = hdr_len;
/* Add segment's payload fragments */
while (data_left > 0) {
int size;
size = min_t(int, tso.size, data_left);
qentry = nicvf_get_nxt_sqentry(sq, qentry);
nicvf_sq_add_gather_subdesc(sq, qentry, size,
virt_to_phys(tso.data));
seg_subdescs++;
seg_len += size;
data_left -= size;
tso_build_data(skb, &tso, size);
}
nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
seg_subdescs - 1, skb, seg_len);
sq->skbuff[hdr_qentry] = (u64)NULL;
qentry = nicvf_get_nxt_sqentry(sq, qentry);
desc_cnt += seg_subdescs;
}
/* Save SKB in the last segment for freeing */
sq->skbuff[hdr_qentry] = (u64)skb;
nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
return 1;
}
/* Append an skb to a SQ for packet transfer. */
int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
struct sk_buff *skb, u8 sq_num)
{
int i, size;
int subdesc_cnt, hdr_sqe = 0;
int qentry;
u64 dma_addr;
subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
if (subdesc_cnt > atomic_read(&sq->free_cnt))
goto append_fail;
qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
/* Check if its a TSO packet */
if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
/* Add SQ header subdesc */
nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
skb, skb->len);
hdr_sqe = qentry;
/* Add SQ gather subdescs */
qentry = nicvf_get_nxt_sqentry(sq, qentry);
size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
/* HW will ensure data coherency, CPU sync not required */
dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
offset_in_page(skb->data), size,
DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
return 0;
}
nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
/* Check for scattered buffer */
if (!skb_is_nonlinear(skb))
goto doorbell;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
const struct skb_frag_struct *frag;
frag = &skb_shinfo(skb)->frags[i];
qentry = nicvf_get_nxt_sqentry(sq, qentry);
size = skb_frag_size(frag);
dma_addr = dma_map_page_attrs(&nic->pdev->dev,
skb_frag_page(frag),
frag->page_offset, size,
DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
/* Free entire chain of mapped buffers
* here 'i' = frags mapped + above mapped skb->data
*/
nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
return 0;
}
nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
}
doorbell:
if (nic->t88 && skb_shinfo(skb)->gso_size) {
qentry = nicvf_get_nxt_sqentry(sq, qentry);
nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
}
nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
return 1;
append_fail:
/* Use original PCI dev for debug log */
nic = nic->pnicvf;
netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
return 0;
}
static inline unsigned frag_num(unsigned i)
{
#ifdef __BIG_ENDIAN
return (i & ~3) + 3 - (i & 3);
#else
return i;
#endif
}
static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
u64 buf_addr, bool xdp)
{
struct page *page = NULL;
int len = RCV_FRAG_LEN;
if (xdp) {
page = virt_to_page(phys_to_virt(buf_addr));
/* Check if it's a recycled page, if not
* unmap the DMA mapping.
*
* Recycled page holds an extra reference.
*/
if (page_ref_count(page) != 1)
return;
len += XDP_PACKET_HEADROOM;
/* Receive buffers in XDP mode are mapped from page start */
dma_addr &= PAGE_MASK;
}
dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len,
DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
}
/* Returns SKB for a received packet */
struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
struct cqe_rx_t *cqe_rx, bool xdp)
{
int frag;
int payload_len = 0;
struct sk_buff *skb = NULL;
struct page *page;
int offset;
u16 *rb_lens = NULL;
u64 *rb_ptrs = NULL;
u64 phys_addr;
rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
* CQE_RX at word6, hence buffer pointers move by word
*
* Use existing 'hw_tso' flag which will be set for all chips
* except 88xx pass1 instead of a additional cache line
* access (or miss) by using pci dev's revision.
*/
if (!nic->hw_tso)
rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
else
rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
payload_len = rb_lens[frag_num(frag)];
phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
if (!phys_addr) {
if (skb)
dev_kfree_skb_any(skb);
return NULL;
}
if (!frag) {
/* First fragment */
nicvf_unmap_rcv_buffer(nic,
*rb_ptrs - cqe_rx->align_pad,
phys_addr, xdp);
skb = nicvf_rb_ptr_to_skb(nic,
phys_addr - cqe_rx->align_pad,
payload_len);
if (!skb)
return NULL;
skb_reserve(skb, cqe_rx->align_pad);
skb_put(skb, payload_len);
} else {
/* Add fragments */
nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp);
page = virt_to_page(phys_to_virt(phys_addr));
offset = phys_to_virt(phys_addr) - page_address(page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
offset, payload_len, RCV_FRAG_LEN);
}
/* Next buffer pointer */
rb_ptrs++;
}
return skb;
}
static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
{
u64 reg_val;
switch (int_type) {
case NICVF_INTR_CQ:
reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
break;
case NICVF_INTR_SQ:
reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
break;
case NICVF_INTR_RBDR:
reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
break;
case NICVF_INTR_PKT_DROP:
reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
break;
case NICVF_INTR_TCP_TIMER:
reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
break;
case NICVF_INTR_MBOX:
reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
break;
case NICVF_INTR_QS_ERR:
reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
break;
default:
reg_val = 0;
}
return reg_val;
}
/* Enable interrupt */
void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
{
u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
if (!mask) {
netdev_dbg(nic->netdev,
"Failed to enable interrupt: unknown type\n");
return;
}
nicvf_reg_write(nic, NIC_VF_ENA_W1S,
nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
}
/* Disable interrupt */
void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
{
u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
if (!mask) {
netdev_dbg(nic->netdev,
"Failed to disable interrupt: unknown type\n");
return;
}
nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
}
/* Clear interrupt */
void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
{
u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
if (!mask) {
netdev_dbg(nic->netdev,
"Failed to clear interrupt: unknown type\n");
return;
}
nicvf_reg_write(nic, NIC_VF_INT, mask);
}
/* Check if interrupt is enabled */
int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
{
u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
/* If interrupt type is unknown, we treat it disabled. */
if (!mask) {
netdev_dbg(nic->netdev,
"Failed to check interrupt enable: unknown type\n");
return 0;
}
return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
}
void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
{
struct rcv_queue *rq;
#define GET_RQ_STATS(reg) \
nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
(rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
rq = &nic->qs->rq[rq_idx];
rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
}
void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
{
struct snd_queue *sq;
#define GET_SQ_STATS(reg) \
nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
(sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
sq = &nic->qs->sq[sq_idx];
sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
}
/* Check for errors in the receive cmp.queue entry */
int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
{
if (netif_msg_rx_err(nic))
netdev_err(nic->netdev,
"%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
nic->netdev->name,
cqe_rx->err_level, cqe_rx->err_opcode);
switch (cqe_rx->err_opcode) {
case CQ_RX_ERROP_RE_PARTIAL:
this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
break;
case CQ_RX_ERROP_RE_JABBER:
this_cpu_inc(nic->drv_stats->rx_jabber_errs);
break;
case CQ_RX_ERROP_RE_FCS:
this_cpu_inc(nic->drv_stats->rx_fcs_errs);
break;
case CQ_RX_ERROP_RE_RX_CTL:
this_cpu_inc(nic->drv_stats->rx_bgx_errs);
break;
case CQ_RX_ERROP_PREL2_ERR:
this_cpu_inc(nic->drv_stats->rx_prel2_errs);
break;
case CQ_RX_ERROP_L2_MAL:
this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
break;
case CQ_RX_ERROP_L2_OVERSIZE:
this_cpu_inc(nic->drv_stats->rx_oversize);
break;
case CQ_RX_ERROP_L2_UNDERSIZE:
this_cpu_inc(nic->drv_stats->rx_undersize);
break;
case CQ_RX_ERROP_L2_LENMISM:
this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
break;
case CQ_RX_ERROP_L2_PCLP:
this_cpu_inc(nic->drv_stats->rx_l2_pclp);
break;
case CQ_RX_ERROP_IP_NOT:
this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
break;
case CQ_RX_ERROP_IP_CSUM_ERR:
this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
break;
case CQ_RX_ERROP_IP_MAL:
this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
break;
case CQ_RX_ERROP_IP_MALD:
this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
break;
case CQ_RX_ERROP_IP_HOP:
this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
break;
case CQ_RX_ERROP_L3_PCLP:
this_cpu_inc(nic->drv_stats->rx_l3_pclp);
break;
case CQ_RX_ERROP_L4_MAL:
this_cpu_inc(nic->drv_stats->rx_l4_malformed);
break;
case CQ_RX_ERROP_L4_CHK:
this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
break;
case CQ_RX_ERROP_UDP_LEN:
this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
break;
case CQ_RX_ERROP_L4_PORT:
this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
break;
case CQ_RX_ERROP_TCP_FLAG:
this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
break;
case CQ_RX_ERROP_TCP_OFFSET:
this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
break;
case CQ_RX_ERROP_L4_PCLP:
this_cpu_inc(nic->drv_stats->rx_l4_pclp);
break;
case CQ_RX_ERROP_RBDR_TRUNC:
this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
break;
}
return 1;
}
/* Check for errors in the send cmp.queue entry */
int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
{
switch (cqe_tx->send_status) {
case CQ_TX_ERROP_DESC_FAULT:
this_cpu_inc(nic->drv_stats->tx_desc_fault);
break;
case CQ_TX_ERROP_HDR_CONS_ERR:
this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
break;
case CQ_TX_ERROP_SUBDC_ERR:
this_cpu_inc(nic->drv_stats->tx_subdesc_err);
break;
case CQ_TX_ERROP_MAX_SIZE_VIOL:
this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
break;
case CQ_TX_ERROP_IMM_SIZE_OFLOW:
this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
break;
case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
this_cpu_inc(nic->drv_stats->tx_data_seq_err);
break;
case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
break;
case CQ_TX_ERROP_LOCK_VIOL:
this_cpu_inc(nic->drv_stats->tx_lock_viol);
break;
case CQ_TX_ERROP_DATA_FAULT:
this_cpu_inc(nic->drv_stats->tx_data_fault);
break;
case CQ_TX_ERROP_TSTMP_CONFLICT:
this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
break;
case CQ_TX_ERROP_TSTMP_TIMEOUT:
this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
break;
case CQ_TX_ERROP_MEM_FAULT:
this_cpu_inc(nic->drv_stats->tx_mem_fault);
break;
case CQ_TX_ERROP_CK_OVERLAP:
this_cpu_inc(nic->drv_stats->tx_csum_overlap);
break;
case CQ_TX_ERROP_CK_OFLOW:
this_cpu_inc(nic->drv_stats->tx_csum_overflow);
break;
}
return 1;
}