From 44e43d91ad4731d9e2e70c60eecc5982d6671e8c Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Thu, 24 Jan 2019 06:09:46 -0800 Subject: [PATCH 1/6] IB/hfi1: OPFN support discovery OPFN (Omni Path Feature Negotiation) support discovery allows a RC QP to announce that it supports OPFN and also discover if OPFN is supported by the peer QP. OPFN parameter negotiation is skipped unless OPFN support is first discovered. OPFN support is announced by claiming what was the reserved bit in dword 1 of OmniPath modified base transport header in requests and responses. Reviewed-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/opfn.h | 53 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/rc.c | 16 ++++++--- drivers/infiniband/hw/hfi1/ruc.c | 16 ++++----- drivers/infiniband/hw/hfi1/uc.c | 3 +- drivers/infiniband/hw/hfi1/verbs.h | 3 +- 6 files changed, 77 insertions(+), 15 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/opfn.h diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 6db2276f5c13..ddfcf2fe40ca 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -73,6 +73,7 @@ #include "chip_registers.h" #include "common.h" +#include "opfn.h" #include "verbs.h" #include "pio.h" #include "chip.h" diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h new file mode 100644 index 000000000000..1a2b3449df67 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#ifndef _HFI1_OPFN_H +#define _HFI1_OPFN_H + +/** + * DOC: Omni Path Feature Negotion (OPFN) + * + * OPFN is a discovery protocol for Intel Omni-Path fabric that + * allows two RC QPs to negotiate a common feature that both QPs + * can support. Currently, the only OPA feature that OPFN + * supports is TID RDMA. + * + * Architecture + * + * OPFN involves the communication between two QPs on the HFI + * level on an Omni-Path fabric, and ULPs have no knowledge of + * OPFN at all. + * + * Implementation + * + * OPFN extends the existing IB RC protocol with the following + * changes: + * -- Uses Bit 24 (reserved) of DWORD 1 of Base Transport + * Header (BTH1) to indicate that the RC QP supports OPFN; + * -- Uses a combination of RC COMPARE_SWAP opcode (0x13) and + * the address U64_MAX (0xFFFFFFFFFFFFFFFF) as an OPFN + * request; The 64-bit data carried with the request/response + * contains the parameters for negotiation and will be + * defined in tid_rdma.c file; + * -- Defines IB_WR_RESERVED3 as IB_WR_OPFN. + * + * The OPFN communication will be triggered when an RC QP + * receives a request with Bit 24 of BTH1 set. The responder QP + * will then post send an OPFN request with its local + * parameters, which will be sent to the requester QP once all + * existing requests on the responder QP side have been sent. + * Once the requester QP receives the OPFN request, it will + * keep a copy of the responder QP's parameters, and return a + * response packet with its own local parameters. The responder + * QP receives the response packet and keeps a copy of the requester + * QP's parameters. After this exchange, each side has the parameters + * for both sides and therefore can select the right parameters + * for future transactions + */ + +/* STL Verbs Extended */ +#define IB_BTHE_E_SHIFT 24 + +#endif /* _HFI1_OPFN_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index be603f35d7e4..940e9236c328 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -89,8 +89,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, struct rvt_ack_entry *e; u32 hwords; u32 len; - u32 bth0; - u32 bth2; + u32 bth0, bth2; + u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT); int middle = 0; u32 pmtu = qp->pmtu; struct hfi1_qp_priv *priv = qp->priv; @@ -229,7 +229,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ps->s_txreq->sde = priv->s_sde; ps->s_txreq->s_cur_size = len; ps->s_txreq->hdr_dwords = hwords; - hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); + hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps); return 1; bail: @@ -262,8 +262,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct rvt_swqe *wqe; u32 hwords; u32 len; - u32 bth0 = 0; - u32 bth2; + u32 bth0 = 0, bth2; + u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT); u32 pmtu = qp->pmtu; char newreq; int middle = 0; @@ -693,6 +693,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp, ohdr, bth0 | (qp->s_state << 24), + bth1, bth2, middle, ps); @@ -796,6 +797,11 @@ static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet, if (qp->s_mig_state == IB_MIG_MIGRATED) bth0 |= IB_BTH_MIG_REQ; bth1 = (!!is_fecn) << IB_BECN_SHIFT; + /* + * Inline ACKs go out without the use of the Verbs send engine, so + * we need to set the STL Verbs Extended bit here + */ + bth1 |= HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT; hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); } diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 7fb317c711df..f96c0f544cb0 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -250,7 +250,6 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth1, u32 bth2) { - bth1 |= qp->remote_qpn; ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(bth1); ohdr->bth[2] = cpu_to_be32(bth2); @@ -272,13 +271,13 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, */ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, + u32 bth0, u32 bth1, u32 bth2, + int middle, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = ps->ibp; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 bth1 = 0; u32 slid; u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u8 l4 = OPA_16B_L4_IB_LOCAL; @@ -360,12 +359,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, */ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, + u32 bth0, u32 bth1, u32 bth2, + int middle, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = ps->ibp; - u32 bth1 = 0; u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u16 lrh0 = HFI1_LRH_BTH; u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; @@ -415,7 +414,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, + u32 bth0, u32 bth1, u32 bth2, int middle, struct hfi1_pkt_state *ps); /* We support only two types - 9B and 16B for now */ @@ -425,7 +424,7 @@ static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = { }; void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, + u32 bth0, u32 bth1, u32 bth2, int middle, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; @@ -446,7 +445,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, priv->s_ahg->ahgidx = 0; /* Make the appropriate header */ - hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps); + hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth1, bth2, middle, + ps); } /* when sending, force a reschedule every one of these periods */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 6ba47037c424..4ed4fcfabd6c 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -271,7 +271,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ps->s_txreq->ss = &qp->s_sge; ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - mask_psn(qp->s_psn++), middle, ps); + qp->remote_qpn, mask_psn(qp->s_psn++), + middle, ps); return 1; done_free_tx: diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 1ad0b14bdb3c..8834119184b3 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -72,6 +72,7 @@ struct hfi1_packet; #include "iowait.h" #include "tid_rdma.h" +#include "opfn.h" #define HFI1_MAX_RDMA_ATOMIC 16 @@ -356,7 +357,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, const struct ib_global_route *grh, u32 hwords, u32 nwords); void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, + u32 bth0, u32 bth1, u32 bth2, int middle, struct hfi1_pkt_state *ps); void _hfi1_do_send(struct work_struct *work); From d22a207d74adb0b43742f83d025079207425928b Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:20:42 -0800 Subject: [PATCH 2/6] IB/hfi1: Add OPFN helper functions for TID RDMA feature This patch adds the OPFN helper functions to initialize, encode, decode, and reset OPFN parameters for the TID RDMA feature. Reviewed-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 11 ++ drivers/infiniband/hw/hfi1/chip.h | 3 +- drivers/infiniband/hw/hfi1/init.c | 2 + drivers/infiniband/hw/hfi1/opfn.h | 5 + drivers/infiniband/hw/hfi1/tid_rdma.c | 202 ++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/tid_rdma.h | 28 +++- drivers/infiniband/hw/hfi1/verbs.h | 5 + 7 files changed, 254 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b443642eac02..4d40311f082e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5222,6 +5222,17 @@ int is_bx(struct hfi1_devdata *dd) return (chip_rev_minor & 0xF0) == 0x10; } +/* return true is kernel urg disabled for rcd */ +bool is_urg_masked(struct hfi1_ctxtdata *rcd) +{ + u64 mask; + u32 is = IS_RCVURGENT_START + rcd->ctxt; + u8 bit = is % 64; + + mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64))); + return !(mask & BIT_ULL(bit)); +} + /* * Append string s to buffer buf. Arguments curp and len are the current * position and remaining length, respectively. diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6b9c8f12dff8..ba3d99e6e33b 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd); u32 hdrqempty(struct hfi1_ctxtdata *rcd); int is_ax(struct hfi1_devdata *dd); int is_bx(struct hfi1_devdata *dd); +bool is_urg_masked(struct hfi1_ctxtdata *rcd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); const char *opa_lstate_name(u32 lstate); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 7835eb52e7c5..2ba5a2a8b68f 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -927,6 +927,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); + if (!lastfail) + lastfail = hfi1_kern_exp_rcv_init(rcd, reinit); if (lastfail) { dd_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h index 1a2b3449df67..1927c9862b8f 100644 --- a/drivers/infiniband/hw/hfi1/opfn.h +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -50,4 +50,9 @@ /* STL Verbs Extended */ #define IB_BTHE_E_SHIFT 24 +struct hfi1_opfn_data { + /* serialize opfn function calls */ + spinlock_t lock; +}; + #endif /* _HFI1_OPFN_H */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index da1ecb68a928..a8fd66f31fee 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -8,6 +8,208 @@ #include "verbs.h" #include "tid_rdma.h" +/* + * J_KEY for kernel contexts when TID RDMA is used. + * See generate_jkey() in hfi.h for more information. + */ +#define TID_RDMA_JKEY 32 +#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE +#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1) + +#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6 +#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4 + +#define TID_OPFN_QP_CTXT_MASK 0xff +#define TID_OPFN_QP_CTXT_SHIFT 56 +#define TID_OPFN_QP_KDETH_MASK 0xff +#define TID_OPFN_QP_KDETH_SHIFT 48 +#define TID_OPFN_MAX_LEN_MASK 0x7ff +#define TID_OPFN_MAX_LEN_SHIFT 37 +#define TID_OPFN_TIMEOUT_MASK 0x1f +#define TID_OPFN_TIMEOUT_SHIFT 32 +#define TID_OPFN_RESERVED_MASK 0x3f +#define TID_OPFN_RESERVED_SHIFT 26 +#define TID_OPFN_URG_MASK 0x1 +#define TID_OPFN_URG_SHIFT 25 +#define TID_OPFN_VER_MASK 0x7 +#define TID_OPFN_VER_SHIFT 22 +#define TID_OPFN_JKEY_MASK 0x3f +#define TID_OPFN_JKEY_SHIFT 16 +#define TID_OPFN_MAX_READ_MASK 0x3f +#define TID_OPFN_MAX_READ_SHIFT 10 +#define TID_OPFN_MAX_WRITE_MASK 0x3f +#define TID_OPFN_MAX_WRITE_SHIFT 4 + +/* + * OPFN TID layout + * + * 63 47 31 15 + * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC + * 3210987654321098 7654321098765432 1098765432109876 5432109876543210 + * N - the context Number + * K - the Kdeth_qp + * M - Max_len + * T - Timeout + * D - reserveD + * V - version + * U - Urg capable + * J - Jkey + * R - max_Read + * W - max_Write + * C - Capcode + */ + +static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) +{ + return + (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) << + TID_OPFN_QP_CTXT_SHIFT) | + ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) << + TID_OPFN_QP_KDETH_SHIFT) | + (((u64)((p->max_len >> PAGE_SHIFT) - 1) & + TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) | + (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) << + TID_OPFN_TIMEOUT_SHIFT) | + (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) | + (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) | + (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) << + TID_OPFN_MAX_READ_SHIFT) | + (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) << + TID_OPFN_MAX_WRITE_SHIFT); +} + +static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data) +{ + p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) & + TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT; + p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK; + p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) & + TID_OPFN_MAX_WRITE_MASK; + p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) & + TID_OPFN_MAX_READ_MASK; + p->qp = + ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK) + << 16) | + ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK)); + p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK; + p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK; +} + +void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p) +{ + struct hfi1_qp_priv *priv = qp->priv; + + p->qp = (kdeth_qp << 16) | priv->rcd->ctxt; + p->max_len = TID_RDMA_MAX_SEGMENT_SIZE; + p->jkey = priv->rcd->jkey; + p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ; + p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ; + p->timeout = qp->timeout; + p->urg = is_urg_masked(priv->rcd); +} + +bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data) +{ + struct hfi1_qp_priv *priv = qp->priv; + + *data = tid_rdma_opfn_encode(&priv->tid_rdma.local); + return true; +} + +bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct tid_rdma_params *remote, *old; + bool ret = true; + + old = rcu_dereference_protected(priv->tid_rdma.remote, + lockdep_is_held(&priv->opfn.lock)); + data &= ~0xfULL; + /* + * If data passed in is zero, return true so as not to continue the + * negotiation process + */ + if (!data || !HFI1_CAP_IS_KSET(TID_RDMA)) + goto null; + /* + * If kzalloc fails, return false. This will result in: + * * at the requester a new OPFN request being generated to retry + * the negotiation + * * at the responder, 0 being returned to the requester so as to + * disable TID RDMA at both the requester and the responder + */ + remote = kzalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) { + ret = false; + goto null; + } + + tid_rdma_opfn_decode(remote, data); + priv->tid_timer_timeout_jiffies = + usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) / + 1000UL) << 3) * 7); + rcu_assign_pointer(priv->tid_rdma.remote, remote); + /* + * A TID RDMA READ request's segment size is not equal to + * remote->max_len only when the request's data length is smaller + * than remote->max_len. In that case, there will be only one segment. + * Therefore, when priv->pkts_ps is used to calculate req->cur_seg + * during retry, it will lead to req->cur_seg = 0, which is exactly + * what is expected. + */ + priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len); + priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1; + goto free; +null: + RCU_INIT_POINTER(priv->tid_rdma.remote, NULL); + priv->timeout_shift = 0; +free: + if (old) + kfree_rcu(old, rcu_head); + return ret; +} + +bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data) +{ + bool ret; + + ret = tid_rdma_conn_reply(qp, *data); + *data = 0; + /* + * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate + * TID RDMA could not be enabled. This will result in TID RDMA being + * disabled at the requester too. + */ + if (ret) + (void)tid_rdma_conn_req(qp, data); + return ret; +} + +void tid_rdma_conn_error(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct tid_rdma_params *old; + + old = rcu_dereference_protected(priv->tid_rdma.remote, + lockdep_is_held(&priv->opfn.lock)); + RCU_INIT_POINTER(priv->tid_rdma.remote, NULL); + if (old) + kfree_rcu(old, rcu_head); +} + +/* This is called at context initialization time */ +int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit) +{ + if (reinit) + return 0; + + BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY); + BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY); + rcd->jkey = TID_RDMA_JKEY; + hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey); + return 0; +} + /** * qp_to_rcd - determine the receive context used by a qp * @qp - the qp diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 6fcd3adcdcc3..18c6d4333f1e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -6,8 +6,34 @@ #ifndef HFI1_TID_RDMA_H #define HFI1_TID_RDMA_H +#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ + +struct tid_rdma_params { + struct rcu_head rcu_head; + u32 qp; + u32 max_len; + u16 jkey; + u8 max_read; + u8 max_write; + u8 timeout; + u8 urg; + u8 version; +}; + +struct tid_rdma_qp_params { + struct tid_rdma_params local; + struct tid_rdma_params __rcu *remote; +}; + +bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data); +bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data); +bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data); +void tid_rdma_conn_error(struct rvt_qp *qp); +void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p); + +int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit); + int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_init_attr *init_attr); #endif /* HFI1_TID_RDMA_H */ - diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 8834119184b3..c8baa1e38ff6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -161,8 +161,13 @@ struct hfi1_qp_priv { struct hfi1_ctxtdata *rcd; /* QP's receive context */ u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; + struct hfi1_opfn_data opfn; + struct tid_rdma_qp_params tid_rdma; struct rvt_qp *owner; u8 hdr_type; /* 9B or 16B */ + unsigned long tid_timer_timeout_jiffies; + u16 pkts_ps; /* packets per segment */ + u8 timeout_shift; /* account for number of packets per segment */ }; /* From f01b4d5a43da47a9f61618a81a4ff3258ddc2c01 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:20:52 -0800 Subject: [PATCH 3/6] IB/hfi1: OPFN interface OPFN allows a pair of connected RC QPs to exchange a set of parameters in succession. The parameter exchange itself is done using the IB compare and swap request with a special virtual address. The request is triggered using a reserved IB work request opcode. This patch implements the OPFN interface to initialize, start, process, and reset the OPFN request. Reviewed-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 1 + drivers/infiniband/hw/hfi1/hfi.h | 2 + drivers/infiniband/hw/hfi1/init.c | 1 - drivers/infiniband/hw/hfi1/opfn.c | 304 ++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/opfn.h | 33 ++- 5 files changed, 337 insertions(+), 4 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/opfn.c diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 3ce9dc8c3463..4044a8c8dbf4 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -24,6 +24,7 @@ hfi1-y := \ mad.o \ mmu_rb.o \ msix.o \ + opfn.o \ pcie.o \ pio.o \ pio_copy.o \ diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ddfcf2fe40ca..9aa0357e17b7 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -99,6 +99,8 @@ #define NEIGHBOR_TYPE_HFI 0 #define NEIGHBOR_TYPE_SWITCH 1 +#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5 + extern unsigned long hfi1_cap_mask; #define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap) #define HFI1_CAP_UGET_MASK(mask, cap) \ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 2ba5a2a8b68f..09c898d0975c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -72,7 +72,6 @@ #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt -#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5 /* * min buffers we want to have per context, after driver */ diff --git a/drivers/infiniband/hw/hfi1/opfn.c b/drivers/infiniband/hw/hfi1/opfn.c new file mode 100644 index 000000000000..2d46c91eb129 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/opfn.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#include "hfi.h" +#include "trace.h" +#include "qp.h" +#include "opfn.h" + +#define IB_BTHE_E BIT(IB_BTHE_E_SHIFT) + +#define OPFN_CODE(code) BIT((code) - 1) +#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code) + +struct hfi1_opfn_type { + bool (*request)(struct rvt_qp *qp, u64 *data); + bool (*response)(struct rvt_qp *qp, u64 *data); + bool (*reply)(struct rvt_qp *qp, u64 data); + void (*error)(struct rvt_qp *qp); +}; + +static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = { + [STL_VERBS_EXTD_TID_RDMA] = { + .request = tid_rdma_conn_req, + .response = tid_rdma_conn_resp, + .reply = tid_rdma_conn_reply, + .error = tid_rdma_conn_error, + }, +}; + +static struct workqueue_struct *opfn_wq; + +static void opfn_schedule_conn_request(struct rvt_qp *qp); + +static bool hfi1_opfn_extended(u32 bth1) +{ + return !!(bth1 & IB_BTHE_E); +} + +static void opfn_conn_request(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct ib_atomic_wr wr; + u16 mask, capcode; + struct hfi1_opfn_type *extd; + u64 data; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&priv->opfn.lock, flags); + /* + * Exit if the extended bit is not set, or if nothing is requested, or + * if we have completed all requests, or if a previous request is in + * progress + */ + if (!priv->opfn.extended || !priv->opfn.requested || + priv->opfn.requested == priv->opfn.completed || priv->opfn.curr) + goto done; + + mask = priv->opfn.requested & ~priv->opfn.completed; + capcode = ilog2(mask & ~(mask - 1)) + 1; + if (capcode >= STL_VERBS_EXTD_MAX) { + priv->opfn.completed |= OPFN_CODE(capcode); + goto done; + } + + extd = &hfi1_opfn_handlers[capcode]; + if (!extd || !extd->request || !extd->request(qp, &data)) { + /* + * Either there is no handler for this capability or the request + * packet could not be generated. Either way, mark it as done so + * we don't keep attempting to complete it. + */ + priv->opfn.completed |= OPFN_CODE(capcode); + goto done; + } + + data = (data & ~0xf) | capcode; + + memset(&wr, 0, sizeof(wr)); + wr.wr.opcode = IB_WR_OPFN; + wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR; + wr.compare_add = data; + + priv->opfn.curr = capcode; /* A new request is now in progress */ + /* Drop opfn.lock before calling ib_post_send() */ + spin_unlock_irqrestore(&priv->opfn.lock, flags); + + ret = ib_post_send(&qp->ibqp, &wr.wr, NULL); + if (ret) + goto err; + return; +err: + spin_lock_irqsave(&priv->opfn.lock, flags); + /* + * In case of an unexpected error return from ib_post_send + * clear opfn.curr and reschedule to try again + */ + priv->opfn.curr = STL_VERBS_EXTD_NONE; + opfn_schedule_conn_request(qp); +done: + spin_unlock_irqrestore(&priv->opfn.lock, flags); +} + +void opfn_send_conn_request(struct work_struct *work) +{ + struct hfi1_opfn_data *od; + struct hfi1_qp_priv *qpriv; + + od = container_of(work, struct hfi1_opfn_data, opfn_work); + qpriv = container_of(od, struct hfi1_qp_priv, opfn); + + opfn_conn_request(qpriv->owner); +} + +/* + * When QP s_lock is held in the caller, the OPFN request must be scheduled + * to a different workqueue to avoid double locking QP s_lock in call to + * ib_post_send in opfn_conn_request + */ +static void opfn_schedule_conn_request(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + queue_work(opfn_wq, &priv->opfn.opfn_work); +} + +void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e, + struct ib_atomic_eth *ateth) +{ + struct hfi1_qp_priv *priv = qp->priv; + u64 data = be64_to_cpu(ateth->compare_data); + struct hfi1_opfn_type *extd; + u8 capcode; + unsigned long flags; + + capcode = data & 0xf; + if (!capcode || capcode >= STL_VERBS_EXTD_MAX) + return; + + extd = &hfi1_opfn_handlers[capcode]; + + if (!extd || !extd->response) { + e->atomic_data = capcode; + return; + } + + spin_lock_irqsave(&priv->opfn.lock, flags); + if (priv->opfn.completed & OPFN_CODE(capcode)) { + /* + * We are receiving a request for a feature that has already + * been negotiated. This may mean that the other side has reset + */ + priv->opfn.completed &= ~OPFN_CODE(capcode); + if (extd->error) + extd->error(qp); + } + + if (extd->response(qp, &data)) + priv->opfn.completed |= OPFN_CODE(capcode); + e->atomic_data = (data & ~0xf) | capcode; + spin_unlock_irqrestore(&priv->opfn.lock, flags); +} + +void opfn_conn_reply(struct rvt_qp *qp, u64 data) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_opfn_type *extd; + u8 capcode; + unsigned long flags; + + capcode = data & 0xf; + if (!capcode || capcode >= STL_VERBS_EXTD_MAX) + return; + + spin_lock_irqsave(&priv->opfn.lock, flags); + /* + * Either there is no previous request or the reply is not for the + * current request + */ + if (!priv->opfn.curr || capcode != priv->opfn.curr) + goto done; + + extd = &hfi1_opfn_handlers[capcode]; + + if (!extd || !extd->reply) + goto clear; + + if (extd->reply(qp, data)) + priv->opfn.completed |= OPFN_CODE(capcode); +clear: + /* + * Clear opfn.curr to indicate that the previous request is no longer in + * progress + */ + priv->opfn.curr = STL_VERBS_EXTD_NONE; +done: + spin_unlock_irqrestore(&priv->opfn.lock, flags); +} + +void opfn_conn_error(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_opfn_type *extd = NULL; + unsigned long flags; + u16 capcode; + + /* + * The QP has gone into the Error state. We have to invalidate all + * negotiated feature, including the one in progress (if any). The RC + * QP handling will clean the WQE for the connection request. + */ + spin_lock_irqsave(&priv->opfn.lock, flags); + while (priv->opfn.completed) { + capcode = priv->opfn.completed & ~(priv->opfn.completed - 1); + extd = &hfi1_opfn_handlers[ilog2(capcode) + 1]; + if (extd->error) + extd->error(qp); + priv->opfn.completed &= ~OPFN_CODE(capcode); + } + priv->opfn.extended = 0; + priv->opfn.requested = 0; + priv->opfn.curr = STL_VERBS_EXTD_NONE; + spin_unlock_irqrestore(&priv->opfn.lock, flags); +} + +void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct hfi1_qp_priv *priv = qp->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->opfn.lock, flags); + if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) { + struct tid_rdma_params *local = &priv->tid_rdma.local; + + if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) || + qp->pmtu == enum_to_mtu(OPA_MTU_8192)) { + tid_rdma_opfn_init(qp, local); + /* + * We only want to set the OPFN requested bit when the + * QP transitions to RTS. + */ + if (attr_mask & IB_QP_STATE && + attr->qp_state == IB_QPS_RTS) { + priv->opfn.requested |= OPFN_MASK(TID_RDMA); + /* + * If the QP is transitioning to RTS and the + * opfn.completed for TID RDMA has already been + * set, the QP is being moved *back* into RTS. + * We can now renegotiate the TID RDMA + * parameters. + */ + if (priv->opfn.completed & + OPFN_MASK(TID_RDMA)) { + priv->opfn.completed &= + ~OPFN_MASK(TID_RDMA); + /* + * Since the opfn.completed bit was + * already set, it is safe to assume + * that the opfn.extended is also set. + */ + opfn_schedule_conn_request(qp); + } + } + } else { + memset(local, 0, sizeof(*local)); + } + } + spin_unlock_irqrestore(&priv->opfn.lock, flags); +} + +void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1) +{ + struct hfi1_qp_priv *priv = qp->priv; + + if (!priv->opfn.extended && hfi1_opfn_extended(bth1) && + HFI1_CAP_IS_KSET(OPFN)) { + priv->opfn.extended = 1; + if (qp->state == IB_QPS_RTS) + opfn_conn_request(qp); + } +} + +int opfn_init(void) +{ + opfn_wq = alloc_workqueue("hfi_opfn", + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | + WQ_MEM_RECLAIM, + HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES); + if (!opfn_wq) + return -ENOMEM; + + return 0; +} + +void opfn_exit(void) +{ + if (opfn_wq) { + destroy_workqueue(opfn_wq); + opfn_wq = NULL; + } +} diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h index 1927c9862b8f..5f2011cabc25 100644 --- a/drivers/infiniband/hw/hfi1/opfn.h +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -49,10 +49,37 @@ /* STL Verbs Extended */ #define IB_BTHE_E_SHIFT 24 +#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX -struct hfi1_opfn_data { - /* serialize opfn function calls */ - spinlock_t lock; +struct ib_atomic_eth; + +enum hfi1_opfn_codes { + STL_VERBS_EXTD_NONE = 0, + STL_VERBS_EXTD_TID_RDMA, + STL_VERBS_EXTD_MAX }; +struct hfi1_opfn_data { + u8 extended; + u16 requested; + u16 completed; + enum hfi1_opfn_codes curr; + /* serialize opfn function calls */ + spinlock_t lock; + struct work_struct opfn_work; +}; + +/* WR opcode for OPFN */ +#define IB_WR_OPFN IB_WR_RESERVED3 + +void opfn_send_conn_request(struct work_struct *work); +void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e, + struct ib_atomic_eth *ateth); +void opfn_conn_reply(struct rvt_qp *qp, u64 data); +void opfn_conn_error(struct rvt_qp *qp); +void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask); +void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1); +int opfn_init(void); +void opfn_exit(void); + #endif /* _HFI1_OPFN_H */ From ddf922c31fedd19c5b89a269c35e5c8b68c64327 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:21:01 -0800 Subject: [PATCH 4/6] IB/hfi1, IB/rdmavt: Allow for extending of QP's s_ack_queue The OPFN protocol uses the COMPARE_SWAP request to exchange data between the requester and the responder and therefore needs to be stored in the QP's s_ack_queue when the request is received on the responder side. However, because the user does not know anything about the OPFN protocol, this extra entry in the queue cannot be advertised to the user. This patch adds an extra entry in a QP's s_ack_queue. Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 13 +++++++------ drivers/infiniband/hw/hfi1/verbs.c | 1 + include/rdma/rdma_vt.h | 10 +++++++++- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 940e9236c328..8970fc7ffd4b 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -122,7 +122,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, * response has been sent instead of only being * constructed. */ - if (++qp->s_tail_ack_queue > HFI1_MAX_RDMA_ATOMIC) + if (++qp->s_tail_ack_queue > + rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) qp->s_tail_ack_queue = 0; /* FALLTHROUGH */ case OP(SEND_ONLY): @@ -1818,7 +1819,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, if (i) prev = i - 1; else - prev = HFI1_MAX_RDMA_ATOMIC; + prev = rvt_size_atomic(ib_to_rvt(qp->ibqp.device)); if (prev == qp->r_head_ack_queue) { e = NULL; break; @@ -1942,7 +1943,7 @@ static inline void update_ack_queue(struct rvt_qp *qp, unsigned n) unsigned next; next = n + 1; - if (next > HFI1_MAX_RDMA_ATOMIC) + if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) next = 0; qp->s_tail_ack_queue = next; qp->s_ack_state = OP(ACKNOWLEDGE); @@ -2298,8 +2299,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto nack_inv; next = qp->r_head_ack_queue + 1; - /* s_ack_queue is size HFI1_MAX_RDMA_ATOMIC+1 so use > not >= */ - if (next > HFI1_MAX_RDMA_ATOMIC) + /* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */ + if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) next = 0; spin_lock_irqsave(&qp->s_lock, flags); if (unlikely(next == qp->s_tail_ack_queue)) { @@ -2373,7 +2374,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto nack_inv; next = qp->r_head_ack_queue + 1; - if (next > HFI1_MAX_RDMA_ATOMIC) + if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) next = 0; spin_lock_irqsave(&qp->s_lock, flags); if (unlikely(next == qp->s_tail_ack_queue)) { diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c980345cf1e1..ec3899c0874c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1735,6 +1735,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; + dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1; /* post send table */ dd->verbs_dev.rdi.post_parms = hfi1_post_parms; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index acb3bc96dfa7..168e40be183c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -182,6 +182,7 @@ struct rvt_driver_params { u32 max_mad_size; u8 qos_shift; u8 max_rdma_atomic; + u8 extra_rdma_atomic; u8 reserved_operations; }; @@ -519,7 +520,14 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) */ static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) { - return rdi->dparms.max_rdma_atomic + 1; + return rdi->dparms.max_rdma_atomic + + rdi->dparms.extra_rdma_atomic + 1; +} + +static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi) +{ + return rdi->dparms.max_rdma_atomic + + rdi->dparms.extra_rdma_atomic; } /* From 48a615dc00aed68d58244b835b10eb3244aae31d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:21:11 -0800 Subject: [PATCH 5/6] IB/hfi1: Integrate OPFN into RC transactions OPFN parameter negotiation allows a pair of connected RC QPs to exchange a set of parameters in succession. This negotiation does not commence till the first ULP request. Because OPFN operations are operations private to the driver, they do not generate user completions or put the QP into error when they run out of retries. This patch integrates the OPFN protocol into the transactions of an RC QP. Reviewed-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 7 ++++ drivers/infiniband/hw/hfi1/qp.c | 13 ++++++ drivers/infiniband/hw/hfi1/rc.c | 58 ++++++++++++++++++++++----- drivers/infiniband/hw/hfi1/tid_rdma.c | 11 +++++ drivers/infiniband/hw/hfi1/tid_rdma.h | 1 + drivers/infiniband/hw/hfi1/verbs.c | 1 + 6 files changed, 81 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 09c898d0975c..a8dbd0f191f5 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1498,6 +1498,12 @@ static int __init hfi1_mod_init(void) /* sanitize link CRC options */ link_crc_mask &= SUPPORTED_CRCS; + ret = opfn_init(); + if (ret < 0) { + pr_err("Failed to allocate opfn_wq"); + goto bail_dev; + } + /* * These must be called before the driver is registered with * the PCI subsystem. @@ -1528,6 +1534,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); + opfn_exit(); node_affinity_destroy_all(); hfi1_dbg_exit(); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 5344e8993b28..f822f92b415f 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -132,6 +132,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { .qpt_support = BIT(IB_QPT_RC), }, +[IB_WR_OPFN] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_USE_RESERVE, +}, + }; static void flush_list_head(struct list_head *l) @@ -285,6 +291,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); qp_set_16b(qp); } + + opfn_qp_init(qp, attr, attr_mask); } /** @@ -696,6 +704,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + hfi1_qp_priv_tid_free(rdi, qp); kfree(priv->s_ahg); kfree(priv); } @@ -751,6 +760,10 @@ void notify_qp_reset(struct rvt_qp *qp) { qp->r_adefered = 0; clear_ahg(qp); + + /* Clear any OPFN state */ + if (qp->ibqp.qp_type == IB_QPT_RC) + opfn_conn_error(qp); } /* diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 8970fc7ffd4b..092d5eba980f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -57,6 +57,10 @@ /* cut down ridiculously long IB macro names */ #define OP(x) RC_OP(x) +static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, + struct rvt_swqe *wqe, + struct hfi1_ibport *ibp); + static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -517,10 +521,14 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; } qp->s_num_rd_atomic++; - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) - qp->s_lsn++; } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + + /* FALLTHROUGH */ + case IB_WR_OPFN: + if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_OPFN) { qp->s_state = OP(COMPARE_SWAP); put_ib_ateth_swap(wqe->atomic_wr.swap, &ohdr->u.atomic_eth); @@ -1040,6 +1048,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) */ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait) { + struct hfi1_qp_priv *priv = qp->priv; struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; @@ -1050,8 +1059,26 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait) hfi1_migrate_qp(qp); qp->s_retry = qp->s_retry_cnt; } else if (qp->s_last == qp->s_acked) { - rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + /* + * We need special handling for the OPFN request WQEs as + * they are not allowed to generate real user errors + */ + if (wqe->wr.opcode == IB_WR_OPFN) { + struct hfi1_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + /* + * Call opfn_conn_reply() with capcode and + * remaining data as 0 to close out the + * current request + */ + opfn_conn_reply(qp, priv->opfn.curr); + wqe = do_rc_completion(qp, wqe, ibp); + qp->s_flags &= ~RVT_S_WAIT_ACK; + } else { + rvt_send_complete(qp, wqe, + IB_WC_RETRY_EXC_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } return; } else { /* need to handle delayed completion */ return; @@ -1363,6 +1390,9 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 *vaddr = wqe->sg_list[0].vaddr; *vaddr = val; } + if (wqe->wr.opcode == IB_WR_OPFN) + opfn_conn_reply(qp, val); + if (qp->s_num_rd_atomic && (wqe->wr.opcode == IB_WR_RDMA_READ || wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || @@ -2068,6 +2098,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) return; fecn = process_ecn(qp, packet); + opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1])); /* * Process responses (ACKs) before anything else. Note that the @@ -2363,15 +2394,18 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) case OP(COMPARE_SWAP): case OP(FETCH_ADD): { - struct ib_atomic_eth *ateth; + struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth; + u64 vaddr = get_ib_ateth_vaddr(ateth); + bool opfn = opcode == OP(COMPARE_SWAP) && + vaddr == HFI1_VERBS_E_ATOMIC_VADDR; struct rvt_ack_entry *e; - u64 vaddr; atomic64_t *maddr; u64 sdata; u32 rkey; u8 next; - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !opfn)) goto nack_inv; next = qp->r_head_ack_queue + 1; if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) @@ -2387,8 +2421,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } - ateth = &ohdr->u.atomic_eth; - vaddr = get_ib_ateth_vaddr(ateth); + /* Process OPFN special virtual address */ + if (opfn) { + opfn_conn_response(qp, e, ateth); + goto ack; + } if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2407,6 +2444,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; +ack: e->opcode = opcode; e->sent = 0; e->psn = psn; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index a8fd66f31fee..0c9f313d6229 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -246,5 +246,16 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, qpriv->rcd = qp_to_rcd(rdi, qp); + spin_lock_init(&qpriv->opfn.lock); + INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request); + return 0; } + +void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) + cancel_work_sync(&priv->opfn.opfn_work); +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 18c6d4333f1e..ee8151558e3f 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -35,5 +35,6 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit); int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_init_attr *init_attr); +void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); #endif /* HFI1_TID_RDMA_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ec3899c0874c..571bfd549c2a 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1735,6 +1735,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; + dd->verbs_dev.rdi.dparms.reserved_operations = 1; dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1; /* post send table */ From a131d16460971353e7dd6916d9fd34c1c946a782 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 24 Jan 2019 06:10:09 -0800 Subject: [PATCH 6/6] IB/hfi1: Add static trace for OPFN This patch adds the static trace to the OPFN code and moves tid related static trace code into a new header file. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/opfn.c | 14 ++ drivers/infiniband/hw/hfi1/tid_rdma.c | 3 + drivers/infiniband/hw/hfi1/trace.h | 1 + drivers/infiniband/hw/hfi1/trace_rx.h | 107 +------- drivers/infiniband/hw/hfi1/trace_tid.h | 332 +++++++++++++++++++++++++ 5 files changed, 351 insertions(+), 106 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/trace_tid.h diff --git a/drivers/infiniband/hw/hfi1/opfn.c b/drivers/infiniband/hw/hfi1/opfn.c index 2d46c91eb129..2ca070690b2f 100644 --- a/drivers/infiniband/hw/hfi1/opfn.c +++ b/drivers/infiniband/hw/hfi1/opfn.c @@ -48,6 +48,7 @@ static void opfn_conn_request(struct rvt_qp *qp) unsigned long flags; int ret = 0; + trace_hfi1_opfn_state_conn_request(qp); spin_lock_irqsave(&priv->opfn.lock, flags); /* * Exit if the extended bit is not set, or if nothing is requested, or @@ -76,6 +77,7 @@ static void opfn_conn_request(struct rvt_qp *qp) goto done; } + trace_hfi1_opfn_data_conn_request(qp, capcode, data); data = (data & ~0xf) | capcode; memset(&wr, 0, sizeof(wr)); @@ -90,8 +92,11 @@ static void opfn_conn_request(struct rvt_qp *qp) ret = ib_post_send(&qp->ibqp, &wr.wr, NULL); if (ret) goto err; + trace_hfi1_opfn_state_conn_request(qp); return; err: + trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ", + (u64)ret); spin_lock_irqsave(&priv->opfn.lock, flags); /* * In case of an unexpected error return from ib_post_send @@ -123,6 +128,7 @@ static void opfn_schedule_conn_request(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + trace_hfi1_opfn_state_sched_conn_request(qp); queue_work(opfn_wq, &priv->opfn.opfn_work); } @@ -135,7 +141,9 @@ void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e, u8 capcode; unsigned long flags; + trace_hfi1_opfn_state_conn_response(qp); capcode = data & 0xf; + trace_hfi1_opfn_data_conn_response(qp, capcode, data); if (!capcode || capcode >= STL_VERBS_EXTD_MAX) return; @@ -160,6 +168,7 @@ void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e, if (extd->response(qp, &data)) priv->opfn.completed |= OPFN_CODE(capcode); e->atomic_data = (data & ~0xf) | capcode; + trace_hfi1_opfn_state_conn_response(qp); spin_unlock_irqrestore(&priv->opfn.lock, flags); } @@ -170,7 +179,9 @@ void opfn_conn_reply(struct rvt_qp *qp, u64 data) u8 capcode; unsigned long flags; + trace_hfi1_opfn_state_conn_reply(qp); capcode = data & 0xf; + trace_hfi1_opfn_data_conn_reply(qp, capcode, data); if (!capcode || capcode >= STL_VERBS_EXTD_MAX) return; @@ -195,6 +206,7 @@ void opfn_conn_reply(struct rvt_qp *qp, u64 data) * progress */ priv->opfn.curr = STL_VERBS_EXTD_NONE; + trace_hfi1_opfn_state_conn_reply(qp); done: spin_unlock_irqrestore(&priv->opfn.lock, flags); } @@ -206,6 +218,8 @@ void opfn_conn_error(struct rvt_qp *qp) unsigned long flags; u16 capcode; + trace_hfi1_opfn_state_conn_error(qp); + trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state); /* * The QP has gone into the Error state. We have to invalidate all * negotiated feature, including the one in progress (if any). The RC diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 0c9f313d6229..e8f57c0cd8bc 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -7,6 +7,7 @@ #include "hfi.h" #include "verbs.h" #include "tid_rdma.h" +#include "trace.h" /* * J_KEY for kernel contexts when TID RDMA is used. @@ -148,6 +149,8 @@ bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data) priv->tid_timer_timeout_jiffies = usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) / 1000UL) << 3) * 7); + trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local); + trace_hfi1_opfn_param(qp, 1, remote); rcu_assign_pointer(priv->tid_rdma.remote, remote); /* * A TID RDMA READ request's segment size is not equal to diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 84458f1325e1..1ce551864118 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -63,3 +63,4 @@ __print_symbolic(etype, \ #include "trace_tx.h" #include "trace_mmu.h" #include "trace_iowait.h" +#include "trace_tid.h" diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 7eceb57e0415..3cec960e9674 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -128,111 +128,6 @@ TRACE_EVENT(hfi1_receive_interrupt, ) ); -DECLARE_EVENT_CLASS( - hfi1_exp_tid_reg_unreg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, - u32 npages, unsigned long va, unsigned long pa, - dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned int, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); - -DEFINE_EVENT( - hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); - -DEFINE_EVENT( - hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); - -TRACE_EVENT( - hfi1_put_tid, - TP_PROTO(struct hfi1_devdata *dd, - u32 index, u32 type, unsigned long pa, u16 order), - TP_ARGS(dd, index, type, pa, order), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(unsigned long, pa); - __field(u32, index); - __field(u32, type); - __field(u16, order); - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->pa = pa; - __entry->index = index; - __entry->type = type; - __entry->order = order; - ), - TP_printk("[%s] type %s pa %lx index %u order %u", - __get_str(dev), - show_tidtype(__entry->type), - __entry->pa, - __entry->index, - __entry->order - ) -); - -TRACE_EVENT(hfi1_exp_tid_inval, - TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, - u32 npages, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), - TP_STRUCT__entry( - __field(unsigned int, ctxt) - __field(u16, subctxt) - __field(unsigned long, va) - __field(u32, rarr) - __field(u32, npages) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->va = va; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->va, - __entry->dma - ) - ); - TRACE_EVENT(hfi1_mmu_invalidate, TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type, unsigned long start, unsigned long end), diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h new file mode 100644 index 000000000000..57a973c97cde --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#if !defined(__HFI1_TRACE_TID_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_TID_H + +#include +#include + +#include "hfi.h" + +#define tidtype_name(type) { PT_##type, #type } +#define show_tidtype(type) \ +__print_symbolic(type, \ + tidtype_name(EXPECTED), \ + tidtype_name(EAGER), \ + tidtype_name(INVALID)) \ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_tid + +#define OPFN_PARAM_PRN "[%s] qpn 0x%x %s OPFN: qp 0x%x, max read %u, " \ + "max write %u, max length %u, jkey 0x%x timeout %u " \ + "urg %u" + +DECLARE_EVENT_CLASS(/* class */ + hfi1_exp_tid_reg_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), + TP_STRUCT__entry(/* entry */ + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) + ), + TP_fast_assign(/* assign */ + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma + ) +); + +DEFINE_EVENT(/* exp_tid_unreg */ + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma) +); + +DEFINE_EVENT(/* exp_tid_reg */ + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma) +); + +TRACE_EVENT(/* put_tid */ + hfi1_put_tid, + TP_PROTO(struct hfi1_devdata *dd, + u32 index, u32 type, unsigned long pa, u16 order), + TP_ARGS(dd, index, type, pa, order), + TP_STRUCT__entry(/* entry */ + DD_DEV_ENTRY(dd) + __field(unsigned long, pa); + __field(u32, index); + __field(u32, type); + __field(u16, order); + ), + TP_fast_assign(/* assign */ + DD_DEV_ASSIGN(dd); + __entry->pa = pa; + __entry->index = index; + __entry->type = type; + __entry->order = order; + ), + TP_printk("[%s] type %s pa %lx index %u order %u", + __get_str(dev), + show_tidtype(__entry->type), + __entry->pa, + __entry->index, + __entry->order + ) +); + +TRACE_EVENT(/* exp_tid_inval */ + hfi1_exp_tid_inval, + TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, + u32 npages, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), + TP_STRUCT__entry(/* entry */ + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(unsigned long, va) + __field(u32, rarr) + __field(u32, npages) + __field(dma_addr_t, dma) + ), + TP_fast_assign(/* assign */ + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->va = va; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->va, + __entry->dma + ) +); + +DECLARE_EVENT_CLASS(/* opfn_state */ + hfi1_opfn_state_template, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp), + TP_STRUCT__entry(/* entry */ + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u16, requested) + __field(u16, completed) + __field(u8, curr) + ), + TP_fast_assign(/* assign */ + struct hfi1_qp_priv *priv = qp->priv; + + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)); + __entry->qpn = qp->ibqp.qp_num; + __entry->requested = priv->opfn.requested; + __entry->completed = priv->opfn.completed; + __entry->curr = priv->opfn.curr; + ), + TP_printk(/* print */ + "[%s] qpn 0x%x requested 0x%x completed 0x%x curr 0x%x", + __get_str(dev), + __entry->qpn, + __entry->requested, + __entry->completed, + __entry->curr + ) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_state_template, hfi1_opfn_state_conn_request, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_state_template, hfi1_opfn_state_sched_conn_request, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_state_template, hfi1_opfn_state_conn_response, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_state_template, hfi1_opfn_state_conn_reply, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_state_template, hfi1_opfn_state_conn_error, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + +DECLARE_EVENT_CLASS(/* opfn_data */ + hfi1_opfn_data_template, + TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data), + TP_ARGS(qp, capcode, data), + TP_STRUCT__entry(/* entry */ + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, state) + __field(u8, capcode) + __field(u64, data) + ), + TP_fast_assign(/* assign */ + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)); + __entry->qpn = qp->ibqp.qp_num; + __entry->state = qp->state; + __entry->capcode = capcode; + __entry->data = data; + ), + TP_printk(/* printk */ + "[%s] qpn 0x%x (state 0x%x) Capcode %u data 0x%llx", + __get_str(dev), + __entry->qpn, + __entry->state, + __entry->capcode, + __entry->data + ) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_data_template, hfi1_opfn_data_conn_request, + TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data), + TP_ARGS(qp, capcode, data) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_data_template, hfi1_opfn_data_conn_response, + TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data), + TP_ARGS(qp, capcode, data) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_data_template, hfi1_opfn_data_conn_reply, + TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data), + TP_ARGS(qp, capcode, data) +); + +DECLARE_EVENT_CLASS(/* opfn_param */ + hfi1_opfn_param_template, + TP_PROTO(struct rvt_qp *qp, char remote, + struct tid_rdma_params *param), + TP_ARGS(qp, remote, param), + TP_STRUCT__entry(/* entry */ + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(char, remote) + __field(u32, param_qp) + __field(u32, max_len) + __field(u16, jkey) + __field(u8, max_read) + __field(u8, max_write) + __field(u8, timeout) + __field(u8, urg) + ), + TP_fast_assign(/* assign */ + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)); + __entry->qpn = qp->ibqp.qp_num; + __entry->remote = remote; + __entry->param_qp = param->qp; + __entry->max_len = param->max_len; + __entry->jkey = param->jkey; + __entry->max_read = param->max_read; + __entry->max_write = param->max_write; + __entry->timeout = param->timeout; + __entry->urg = param->urg; + ), + TP_printk(/* print */ + OPFN_PARAM_PRN, + __get_str(dev), + __entry->qpn, + __entry->remote ? "remote" : "local", + __entry->param_qp, + __entry->max_read, + __entry->max_write, + __entry->max_len, + __entry->jkey, + __entry->timeout, + __entry->urg + ) +); + +DEFINE_EVENT(/* event */ + hfi1_opfn_param_template, hfi1_opfn_param, + TP_PROTO(struct rvt_qp *qp, char remote, + struct tid_rdma_params *param), + TP_ARGS(qp, remote, param) +); + +DECLARE_EVENT_CLASS(/* msg */ + hfi1_msg_template, + TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more), + TP_ARGS(qp, msg, more), + TP_STRUCT__entry(/* entry */ + __field(u32, qpn) + __string(msg, msg) + __field(u64, more) + ), + TP_fast_assign(/* assign */ + __entry->qpn = qp ? qp->ibqp.qp_num : 0; + __assign_str(msg, msg); + __entry->more = more; + ), + TP_printk(/* print */ + "qpn 0x%x %s 0x%llx", + __entry->qpn, + __get_str(msg), + __entry->more + ) +); + +DEFINE_EVENT(/* event */ + hfi1_msg_template, hfi1_msg_opfn_conn_request, + TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more), + TP_ARGS(qp, msg, more) +); + +DEFINE_EVENT(/* event */ + hfi1_msg_template, hfi1_msg_opfn_conn_error, + TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more), + TP_ARGS(qp, msg, more) +); + +#endif /* __HFI1_TRACE_TID_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tid +#include