mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-30 04:56:43 +07:00
bnx2: switch to build_skb() infrastructure
This is very similar to bnx2x conversion, but bnx2 only requires 16bytes alignement at start of the received frame to store its l2_fhdr, so goal was not to reduce skb truesize (in fact it should not change after this patch) Using build_skb() reduces cache line misses in the driver, since we use cache hot skb instead of cold ones. Number of in-flight sk_buff structures is lower, they are more likely recycled in SLUB caches while still hot. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Michael Chan <mchan@broadcom.com> CC: Eilon Greenstein <eilong@broadcom.com> Reviewed-by: Michael Chan <mchan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
adc9300e78
commit
dd2bc8e9c0
@ -2725,31 +2725,27 @@ bnx2_free_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
|
||||
}
|
||||
|
||||
static inline int
|
||||
bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
|
||||
bnx2_alloc_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
u8 *data;
|
||||
struct sw_bd *rx_buf = &rxr->rx_buf_ring[index];
|
||||
dma_addr_t mapping;
|
||||
struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(index)][RX_IDX(index)];
|
||||
unsigned long align;
|
||||
|
||||
skb = __netdev_alloc_skb(bp->dev, bp->rx_buf_size, gfp);
|
||||
if (skb == NULL) {
|
||||
data = kmalloc(bp->rx_buf_size, gfp);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (unlikely((align = (unsigned long) skb->data & (BNX2_RX_ALIGN - 1))))
|
||||
skb_reserve(skb, BNX2_RX_ALIGN - align);
|
||||
|
||||
mapping = dma_map_single(&bp->pdev->dev, skb->data, bp->rx_buf_use_size,
|
||||
mapping = dma_map_single(&bp->pdev->dev,
|
||||
get_l2_fhdr(data),
|
||||
bp->rx_buf_use_size,
|
||||
PCI_DMA_FROMDEVICE);
|
||||
if (dma_mapping_error(&bp->pdev->dev, mapping)) {
|
||||
dev_kfree_skb(skb);
|
||||
kfree(data);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
rx_buf->skb = skb;
|
||||
rx_buf->desc = (struct l2_fhdr *) skb->data;
|
||||
rx_buf->data = data;
|
||||
dma_unmap_addr_set(rx_buf, mapping, mapping);
|
||||
|
||||
rxbd->rx_bd_haddr_hi = (u64) mapping >> 32;
|
||||
@ -2956,8 +2952,8 @@ bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
|
||||
}
|
||||
|
||||
static inline void
|
||||
bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
|
||||
struct sk_buff *skb, u16 cons, u16 prod)
|
||||
bnx2_reuse_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
|
||||
u8 *data, u16 cons, u16 prod)
|
||||
{
|
||||
struct sw_bd *cons_rx_buf, *prod_rx_buf;
|
||||
struct rx_bd *cons_bd, *prod_bd;
|
||||
@ -2971,8 +2967,7 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
|
||||
|
||||
rxr->rx_prod_bseq += bp->rx_buf_use_size;
|
||||
|
||||
prod_rx_buf->skb = skb;
|
||||
prod_rx_buf->desc = (struct l2_fhdr *) skb->data;
|
||||
prod_rx_buf->data = data;
|
||||
|
||||
if (cons == prod)
|
||||
return;
|
||||
@ -2986,33 +2981,39 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
|
||||
prod_bd->rx_bd_haddr_lo = cons_bd->rx_bd_haddr_lo;
|
||||
}
|
||||
|
||||
static int
|
||||
bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
|
||||
static struct sk_buff *
|
||||
bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u8 *data,
|
||||
unsigned int len, unsigned int hdr_len, dma_addr_t dma_addr,
|
||||
u32 ring_idx)
|
||||
{
|
||||
int err;
|
||||
u16 prod = ring_idx & 0xffff;
|
||||
struct sk_buff *skb;
|
||||
|
||||
err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_ATOMIC);
|
||||
err = bnx2_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC);
|
||||
if (unlikely(err)) {
|
||||
bnx2_reuse_rx_skb(bp, rxr, skb, (u16) (ring_idx >> 16), prod);
|
||||
bnx2_reuse_rx_data(bp, rxr, data, (u16) (ring_idx >> 16), prod);
|
||||
error:
|
||||
if (hdr_len) {
|
||||
unsigned int raw_len = len + 4;
|
||||
int pages = PAGE_ALIGN(raw_len - hdr_len) >> PAGE_SHIFT;
|
||||
|
||||
bnx2_reuse_rx_skb_pages(bp, rxr, NULL, pages);
|
||||
}
|
||||
return err;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
skb_reserve(skb, BNX2_RX_OFFSET);
|
||||
dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
|
||||
PCI_DMA_FROMDEVICE);
|
||||
|
||||
skb = build_skb(data);
|
||||
if (!skb) {
|
||||
kfree(data);
|
||||
goto error;
|
||||
}
|
||||
skb_reserve(skb, ((u8 *)get_l2_fhdr(data) - data) + BNX2_RX_OFFSET);
|
||||
if (hdr_len == 0) {
|
||||
skb_put(skb, len);
|
||||
return 0;
|
||||
return skb;
|
||||
} else {
|
||||
unsigned int i, frag_len, frag_size, pages;
|
||||
struct sw_pg *rx_pg;
|
||||
@ -3043,7 +3044,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
|
||||
skb_frag_size_sub(frag, tail);
|
||||
skb->data_len -= tail;
|
||||
}
|
||||
return 0;
|
||||
return skb;
|
||||
}
|
||||
rx_pg = &rxr->rx_pg_ring[pg_cons];
|
||||
|
||||
@ -3065,7 +3066,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
|
||||
rxr->rx_pg_prod = pg_prod;
|
||||
bnx2_reuse_rx_skb_pages(bp, rxr, skb,
|
||||
pages - i);
|
||||
return err;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dma_unmap_page(&bp->pdev->dev, mapping_old,
|
||||
@ -3082,7 +3083,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
|
||||
rxr->rx_pg_prod = pg_prod;
|
||||
rxr->rx_pg_cons = pg_cons;
|
||||
}
|
||||
return 0;
|
||||
return skb;
|
||||
}
|
||||
|
||||
static inline u16
|
||||
@ -3121,19 +3122,17 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
|
||||
struct sw_bd *rx_buf, *next_rx_buf;
|
||||
struct sk_buff *skb;
|
||||
dma_addr_t dma_addr;
|
||||
u8 *data;
|
||||
|
||||
sw_ring_cons = RX_RING_IDX(sw_cons);
|
||||
sw_ring_prod = RX_RING_IDX(sw_prod);
|
||||
|
||||
rx_buf = &rxr->rx_buf_ring[sw_ring_cons];
|
||||
skb = rx_buf->skb;
|
||||
prefetchw(skb);
|
||||
data = rx_buf->data;
|
||||
rx_buf->data = NULL;
|
||||
|
||||
next_rx_buf =
|
||||
&rxr->rx_buf_ring[RX_RING_IDX(NEXT_RX_BD(sw_cons))];
|
||||
prefetch(next_rx_buf->desc);
|
||||
|
||||
rx_buf->skb = NULL;
|
||||
rx_hdr = get_l2_fhdr(data);
|
||||
prefetch(rx_hdr);
|
||||
|
||||
dma_addr = dma_unmap_addr(rx_buf, mapping);
|
||||
|
||||
@ -3141,7 +3140,10 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
|
||||
BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH,
|
||||
PCI_DMA_FROMDEVICE);
|
||||
|
||||
rx_hdr = rx_buf->desc;
|
||||
next_rx_buf =
|
||||
&rxr->rx_buf_ring[RX_RING_IDX(NEXT_RX_BD(sw_cons))];
|
||||
prefetch(get_l2_fhdr(next_rx_buf->data));
|
||||
|
||||
len = rx_hdr->l2_fhdr_pkt_len;
|
||||
status = rx_hdr->l2_fhdr_status;
|
||||
|
||||
@ -3160,7 +3162,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
|
||||
L2_FHDR_ERRORS_TOO_SHORT |
|
||||
L2_FHDR_ERRORS_GIANT_FRAME))) {
|
||||
|
||||
bnx2_reuse_rx_skb(bp, rxr, skb, sw_ring_cons,
|
||||
bnx2_reuse_rx_data(bp, rxr, data, sw_ring_cons,
|
||||
sw_ring_prod);
|
||||
if (pg_ring_used) {
|
||||
int pages;
|
||||
@ -3175,30 +3177,29 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
|
||||
len -= 4;
|
||||
|
||||
if (len <= bp->rx_copy_thresh) {
|
||||
struct sk_buff *new_skb;
|
||||
|
||||
new_skb = netdev_alloc_skb(bp->dev, len + 6);
|
||||
if (new_skb == NULL) {
|
||||
bnx2_reuse_rx_skb(bp, rxr, skb, sw_ring_cons,
|
||||
skb = netdev_alloc_skb(bp->dev, len + 6);
|
||||
if (skb == NULL) {
|
||||
bnx2_reuse_rx_data(bp, rxr, data, sw_ring_cons,
|
||||
sw_ring_prod);
|
||||
goto next_rx;
|
||||
}
|
||||
|
||||
/* aligned copy */
|
||||
skb_copy_from_linear_data_offset(skb,
|
||||
BNX2_RX_OFFSET - 6,
|
||||
new_skb->data, len + 6);
|
||||
skb_reserve(new_skb, 6);
|
||||
skb_put(new_skb, len);
|
||||
memcpy(skb->data,
|
||||
(u8 *)rx_hdr + BNX2_RX_OFFSET - 6,
|
||||
len + 6);
|
||||
skb_reserve(skb, 6);
|
||||
skb_put(skb, len);
|
||||
|
||||
bnx2_reuse_rx_skb(bp, rxr, skb,
|
||||
bnx2_reuse_rx_data(bp, rxr, data,
|
||||
sw_ring_cons, sw_ring_prod);
|
||||
|
||||
skb = new_skb;
|
||||
} else if (unlikely(bnx2_rx_skb(bp, rxr, skb, len, hdr_len,
|
||||
dma_addr, (sw_ring_cons << 16) | sw_ring_prod)))
|
||||
goto next_rx;
|
||||
|
||||
} else {
|
||||
skb = bnx2_rx_skb(bp, rxr, data, len, hdr_len, dma_addr,
|
||||
(sw_ring_cons << 16) | sw_ring_prod);
|
||||
if (!skb)
|
||||
goto next_rx;
|
||||
}
|
||||
if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) &&
|
||||
!(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG))
|
||||
__vlan_hwaccel_put_tag(skb, rx_hdr->l2_fhdr_vlan_tag);
|
||||
@ -5225,7 +5226,7 @@ bnx2_init_rx_ring(struct bnx2 *bp, int ring_num)
|
||||
|
||||
ring_prod = prod = rxr->rx_prod;
|
||||
for (i = 0; i < bp->rx_ring_size; i++) {
|
||||
if (bnx2_alloc_rx_skb(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
|
||||
if (bnx2_alloc_rx_data(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
|
||||
netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d skbs only\n",
|
||||
ring_num, i, bp->rx_ring_size);
|
||||
break;
|
||||
@ -5320,7 +5321,7 @@ bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size)
|
||||
rx_size = bp->dev->mtu + ETH_HLEN + BNX2_RX_OFFSET + 8;
|
||||
|
||||
rx_space = SKB_DATA_ALIGN(rx_size + BNX2_RX_ALIGN) + NET_SKB_PAD +
|
||||
sizeof(struct skb_shared_info);
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
bp->rx_copy_thresh = BNX2_RX_COPY_THRESH;
|
||||
bp->rx_pg_ring_size = 0;
|
||||
@ -5342,8 +5343,9 @@ bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size)
|
||||
}
|
||||
|
||||
bp->rx_buf_use_size = rx_size;
|
||||
/* hw alignment */
|
||||
bp->rx_buf_size = bp->rx_buf_use_size + BNX2_RX_ALIGN;
|
||||
/* hw alignment + build_skb() overhead*/
|
||||
bp->rx_buf_size = SKB_DATA_ALIGN(bp->rx_buf_use_size + BNX2_RX_ALIGN) +
|
||||
NET_SKB_PAD + SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
bp->rx_jumbo_thresh = rx_size - BNX2_RX_OFFSET;
|
||||
bp->rx_ring_size = size;
|
||||
bp->rx_max_ring = bnx2_find_max_ring(size, MAX_RX_RINGS);
|
||||
@ -5409,9 +5411,9 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
|
||||
|
||||
for (j = 0; j < bp->rx_max_ring_idx; j++) {
|
||||
struct sw_bd *rx_buf = &rxr->rx_buf_ring[j];
|
||||
struct sk_buff *skb = rx_buf->skb;
|
||||
u8 *data = rx_buf->data;
|
||||
|
||||
if (skb == NULL)
|
||||
if (data == NULL)
|
||||
continue;
|
||||
|
||||
dma_unmap_single(&bp->pdev->dev,
|
||||
@ -5419,9 +5421,9 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
|
||||
bp->rx_buf_use_size,
|
||||
PCI_DMA_FROMDEVICE);
|
||||
|
||||
rx_buf->skb = NULL;
|
||||
rx_buf->data = NULL;
|
||||
|
||||
dev_kfree_skb(skb);
|
||||
kfree(data);
|
||||
}
|
||||
for (j = 0; j < bp->rx_max_pg_ring_idx; j++)
|
||||
bnx2_free_rx_page(bp, rxr, j);
|
||||
@ -5727,7 +5729,8 @@ static int
|
||||
bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
|
||||
{
|
||||
unsigned int pkt_size, num_pkts, i;
|
||||
struct sk_buff *skb, *rx_skb;
|
||||
struct sk_buff *skb;
|
||||
u8 *data;
|
||||
unsigned char *packet;
|
||||
u16 rx_start_idx, rx_idx;
|
||||
dma_addr_t map;
|
||||
@ -5819,14 +5822,14 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
|
||||
}
|
||||
|
||||
rx_buf = &rxr->rx_buf_ring[rx_start_idx];
|
||||
rx_skb = rx_buf->skb;
|
||||
data = rx_buf->data;
|
||||
|
||||
rx_hdr = rx_buf->desc;
|
||||
skb_reserve(rx_skb, BNX2_RX_OFFSET);
|
||||
rx_hdr = get_l2_fhdr(data);
|
||||
data = (u8 *)rx_hdr + BNX2_RX_OFFSET;
|
||||
|
||||
dma_sync_single_for_cpu(&bp->pdev->dev,
|
||||
dma_unmap_addr(rx_buf, mapping),
|
||||
bp->rx_buf_size, PCI_DMA_FROMDEVICE);
|
||||
bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
|
||||
|
||||
if (rx_hdr->l2_fhdr_status &
|
||||
(L2_FHDR_ERRORS_BAD_CRC |
|
||||
@ -5843,7 +5846,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
|
||||
}
|
||||
|
||||
for (i = 14; i < pkt_size; i++) {
|
||||
if (*(rx_skb->data + i) != (unsigned char) (i & 0xff)) {
|
||||
if (*(data + i) != (unsigned char) (i & 0xff)) {
|
||||
goto loopback_test_done;
|
||||
}
|
||||
}
|
||||
|
@ -6563,12 +6563,25 @@ struct l2_fhdr {
|
||||
#define MB_TX_CID_ADDR MB_GET_CID_ADDR(TX_CID)
|
||||
#define MB_RX_CID_ADDR MB_GET_CID_ADDR(RX_CID)
|
||||
|
||||
/*
|
||||
* This driver uses new build_skb() API :
|
||||
* RX ring buffer contains pointer to kmalloc() data only,
|
||||
* skb are built only after Hardware filled the frame.
|
||||
*/
|
||||
struct sw_bd {
|
||||
struct sk_buff *skb;
|
||||
struct l2_fhdr *desc;
|
||||
u8 *data;
|
||||
DEFINE_DMA_UNMAP_ADDR(mapping);
|
||||
};
|
||||
|
||||
/* Its faster to compute this from data than storing it in sw_bd
|
||||
* (less cache misses)
|
||||
*/
|
||||
static inline struct l2_fhdr *get_l2_fhdr(u8 *data)
|
||||
{
|
||||
return (struct l2_fhdr *)(PTR_ALIGN(data, BNX2_RX_ALIGN) + NET_SKB_PAD);
|
||||
}
|
||||
|
||||
|
||||
struct sw_pg {
|
||||
struct page *page;
|
||||
DEFINE_DMA_UNMAP_ADDR(mapping);
|
||||
|
Loading…
Reference in New Issue
Block a user