mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-19 07:26:45 +07:00
nvme-rdma: support devices with queue size < 32
In the case of small NVMe-oF queue size (<32) we may enter a deadlock caused by the fact that the IB completions aren't sent waiting for 32 and the send queue will fill up. The error is seen as (using mlx5): [ 2048.693355] mlx5_0:mlx5_ib_post_send:3765:(pid 7273): [ 2048.693360] nvme nvme1: nvme_rdma_post_send failed with error code -12 This patch changes the way the signaling is done so that it depends on the queue depth now. The magic define has been removed completely. Cc: stable@vger.kernel.org Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu> Signed-off-by: Samuel Jones <sjones@kalray.eu> Acked-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
08332893e3
commit
0544f5494a
@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
|
||||
nvme_rdma_wr_error(cq, wc, "SEND");
|
||||
}
|
||||
|
||||
static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
|
||||
{
|
||||
int sig_limit;
|
||||
|
||||
/*
|
||||
* We signal completion every queue depth/2 and also handle the
|
||||
* degenerated case of a device with queue_depth=1, where we
|
||||
* would need to signal every message.
|
||||
*/
|
||||
sig_limit = max(queue->queue_size / 2, 1);
|
||||
return (++queue->sig_count % sig_limit) == 0;
|
||||
}
|
||||
|
||||
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
||||
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
|
||||
struct ib_send_wr *first, bool flush)
|
||||
@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
||||
* Would have been way to obvious to handle this in hardware or
|
||||
* at least the RDMA stack..
|
||||
*
|
||||
* This messy and racy code sniplet is copy and pasted from the iSER
|
||||
* initiator, and the magic '32' comes from there as well.
|
||||
*
|
||||
* Always signal the flushes. The magic request used for the flush
|
||||
* sequencer is not allocated in our driver's tagset and it's
|
||||
* triggered to be freed by blk_cleanup_queue(). So we need to
|
||||
@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
||||
* embedded in request's payload, is not freed when __ib_process_cq()
|
||||
* calls wr_cqe->done().
|
||||
*/
|
||||
if ((++queue->sig_count % 32) == 0 || flush)
|
||||
if (nvme_rdma_queue_sig_limit(queue) || flush)
|
||||
wr.send_flags |= IB_SEND_SIGNALED;
|
||||
|
||||
if (first)
|
||||
|
Loading…
Reference in New Issue
Block a user