mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-17 10:52:26 +07:00
mlx4: avoid unnecessary dirtying of critical fields
While stressing a 40Gbit mlx4 NIC with busy polling, I found false sharing in mlx4 driver that can be easily avoided. This patch brings an additional 7 % performance improvement in UDP_RR workload. 1) If we received no frame during one mlx4_en_process_rx_cq() invocation, no need to call mlx4_cq_set_ci() and/or dirty ring->cons 2) Do not refill rx buffers if we have plenty of them. This avoids false sharing and allows some bulk/batch optimizations. Page allocator and its locks will thank us. Finally, mlx4_en_poll_rx_cq() should not return 0 if it determined cpu handling NIC IRQ should be changed. We should return budget-1 instead, to not fool net_rx_action() and its netdev_budget. v2: keep AVG_PERF_COUNTER(... polled) even if polled is 0 Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Tariq Toukan <tariqt@mellanox.com> Reviewed-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b668534c1d
commit
dad42c3038
@ -688,18 +688,23 @@ static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
|
|||||||
dev_kfree_skb_any(skb);
|
dev_kfree_skb_any(skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
|
static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
|
||||||
struct mlx4_en_rx_ring *ring)
|
struct mlx4_en_rx_ring *ring)
|
||||||
{
|
{
|
||||||
int index = ring->prod & ring->size_mask;
|
u32 missing = ring->actual_size - (ring->prod - ring->cons);
|
||||||
|
|
||||||
while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
|
/* Try to batch allocations, but not too much. */
|
||||||
if (mlx4_en_prepare_rx_desc(priv, ring, index,
|
if (missing < 8)
|
||||||
|
return false;
|
||||||
|
do {
|
||||||
|
if (mlx4_en_prepare_rx_desc(priv, ring,
|
||||||
|
ring->prod & ring->size_mask,
|
||||||
GFP_ATOMIC | __GFP_COLD))
|
GFP_ATOMIC | __GFP_COLD))
|
||||||
break;
|
break;
|
||||||
ring->prod++;
|
ring->prod++;
|
||||||
index = ring->prod & ring->size_mask;
|
} while (--missing);
|
||||||
}
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When hardware doesn't strip the vlan, we need to calculate the checksum
|
/* When hardware doesn't strip the vlan, we need to calculate the checksum
|
||||||
@ -1081,15 +1086,20 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
|||||||
|
|
||||||
out:
|
out:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
if (doorbell_pending)
|
|
||||||
mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
|
|
||||||
|
|
||||||
|
if (polled) {
|
||||||
|
if (doorbell_pending)
|
||||||
|
mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
|
||||||
|
|
||||||
|
mlx4_cq_set_ci(&cq->mcq);
|
||||||
|
wmb(); /* ensure HW sees CQ consumer before we post new buffers */
|
||||||
|
ring->cons = cq->mcq.cons_index;
|
||||||
|
}
|
||||||
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
|
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
|
||||||
mlx4_cq_set_ci(&cq->mcq);
|
|
||||||
wmb(); /* ensure HW sees CQ consumer before we post new buffers */
|
if (mlx4_en_refill_rx_buffers(priv, ring))
|
||||||
ring->cons = cq->mcq.cons_index;
|
mlx4_en_update_rx_prod_db(ring);
|
||||||
mlx4_en_refill_rx_buffers(priv, ring);
|
|
||||||
mlx4_en_update_rx_prod_db(ring);
|
|
||||||
return polled;
|
return polled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1131,10 +1141,13 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
|
|||||||
return budget;
|
return budget;
|
||||||
|
|
||||||
/* Current cpu is not according to smp_irq_affinity -
|
/* Current cpu is not according to smp_irq_affinity -
|
||||||
* probably affinity changed. need to stop this NAPI
|
* probably affinity changed. Need to stop this NAPI
|
||||||
* poll, and restart it on the right CPU
|
* poll, and restart it on the right CPU.
|
||||||
|
* Try to avoid returning a too small value (like 0),
|
||||||
|
* to not fool net_rx_action() and its netdev_budget
|
||||||
*/
|
*/
|
||||||
done = 0;
|
if (done)
|
||||||
|
done--;
|
||||||
}
|
}
|
||||||
/* Done for now */
|
/* Done for now */
|
||||||
if (napi_complete_done(napi, done))
|
if (napi_complete_done(napi, done))
|
||||||
|
Loading…
Reference in New Issue
Block a user