linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH resending 0/2] Providers/rxe: Replace AV by AH for UD sends
@ 2021-06-29 14:16 Bob Pearson
  2021-06-29 14:16 ` [PATCH resending 1/2] Update kernel headers Bob Pearson
  2021-06-29 14:16 ` [PATCH resending 2/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
  0 siblings, 2 replies; 3+ messages in thread
From: Bob Pearson @ 2021-06-29 14:16 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-kernel, leon, galpress; +Cc: Bob Pearson

These patches contains the changes to the rxe provider to match the
kernel patch series with a similar name.

Bob Pearson (2):
  Update kernel headers
  Providers/rxe: Replace AV by AH for UD sends

 kernel-headers/rdma/rdma_user_rxe.h |  16 +++-
 providers/rxe/rxe-abi.h             |   2 +
 providers/rxe/rxe.c                 | 117 ++++++++++++++++++----------
 providers/rxe/rxe.h                 |   8 +-
 4 files changed, 100 insertions(+), 43 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH resending 1/2] Update kernel headers
  2021-06-29 14:16 [PATCH resending 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
@ 2021-06-29 14:16 ` Bob Pearson
  2021-06-29 14:16 ` [PATCH resending 2/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
  1 sibling, 0 replies; 3+ messages in thread
From: Bob Pearson @ 2021-06-29 14:16 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-kernel, leon, galpress; +Cc: Bob Pearson

To commit ?? ("RDMA/rxe: Convert kernel UD post send to use ah_num").

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 kernel-headers/rdma/rdma_user_rxe.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index e283c222..e544832e 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -98,6 +98,8 @@ struct rxe_send_wr {
 			__u32	remote_qpn;
 			__u32	remote_qkey;
 			__u16	pkey_index;
+			__u16	reserved;
+			__u32	ah_num;
 		} ud;
 		struct {
 			__aligned_u64	addr;
@@ -148,7 +150,12 @@ struct rxe_dma_info {
 
 struct rxe_send_wqe {
 	struct rxe_send_wr	wr;
-	struct rxe_av		av;
+	union {
+		struct rxe_av av;
+		struct {
+			__u32		reserved[0];
+		} ex;
+	};
 	__u32			status;
 	__u32			state;
 	__aligned_u64		iova;
@@ -168,6 +175,11 @@ struct rxe_recv_wqe {
 	struct rxe_dma_info	dma;
 };
 
+struct rxe_create_ah_resp {
+	__u32 ah_num;
+	__u32 reserved;
+};
+
 struct rxe_create_cq_resp {
 	struct mminfo mi;
 };
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH resending 2/2] Providers/rxe: Replace AV by AH for UD sends
  2021-06-29 14:16 [PATCH resending 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
  2021-06-29 14:16 ` [PATCH resending 1/2] Update kernel headers Bob Pearson
@ 2021-06-29 14:16 ` Bob Pearson
  1 sibling, 0 replies; 3+ messages in thread
From: Bob Pearson @ 2021-06-29 14:16 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-kernel, leon, galpress; +Cc: Bob Pearson

Currently the rxe provider computes an AV for each AH created. These
are then passed to the kernel in each UD send WQE. This rxe provider
patch matches a set of kernel patches that creates an index for the
pool of AHs which are passed back to the rxe provider which in turn
returns them in the UD send WQEs instead of AVs. This falls back to
the older AV based WQE if either the rxe provider or the kernel does
not have these patches applied.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 providers/rxe/rxe-abi.h |   2 +
 providers/rxe/rxe.c     | 117 ++++++++++++++++++++++++++--------------
 providers/rxe/rxe.h     |   8 ++-
 3 files changed, 85 insertions(+), 42 deletions(-)

diff --git a/providers/rxe/rxe-abi.h b/providers/rxe/rxe-abi.h
index 0689c3f4..020201a9 100644
--- a/providers/rxe/rxe-abi.h
+++ b/providers/rxe/rxe-abi.h
@@ -39,6 +39,8 @@
 #include <rdma/rdma_user_rxe.h>
 #include <kernel-abi/rdma_user_rxe.h>
 
+DECLARE_DRV_CMD(urxe_create_ah, IB_USER_VERBS_CMD_CREATE_AH,
+		empty, rxe_create_ah_resp);
 DECLARE_DRV_CMD(urxe_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
 		empty, rxe_create_cq_resp);
 DECLARE_DRV_CMD(urxe_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 3c3ea8bb..12ffe779 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -975,16 +975,20 @@ static void wr_set_ud_addr(struct ibv_qp_ex *ibqp, struct ibv_ah *ibah,
 			   uint32_t remote_qpn, uint32_t remote_qkey)
 {
 	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
-	struct rxe_ah *ah = container_of(ibah, struct rxe_ah, ibv_ah);
+	struct rxe_ah *ah = to_rah(ibah);
 	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue,
 						   qp->cur_index - 1);
 
 	if (qp->err)
 		return;
 
-	memcpy(&wqe->av, &ah->av, sizeof(ah->av));
 	wqe->wr.wr.ud.remote_qpn = remote_qpn;
 	wqe->wr.wr.ud.remote_qkey = remote_qkey;
+	wqe->wr.wr.ud.ah_num = ah->ah_num;
+
+	if (!ah->ah_num)
+		/* old kernels only */
+		memcpy(&wqe->av, &ah->av, sizeof(ah->av));
 }
 
 static void wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr,
@@ -1404,7 +1408,8 @@ static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
 	return 0;
 }
 
-static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
+static void convert_send_wr(struct rxe_qp *qp, struct rxe_send_wr *kwr,
+					struct ibv_send_wr *uwr)
 {
 	struct ibv_mw *ibmw;
 	struct ibv_mr *ibmr;
@@ -1427,8 +1432,13 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
 
 	case IBV_WR_SEND:
 	case IBV_WR_SEND_WITH_IMM:
-		kwr->wr.ud.remote_qpn		= uwr->wr.ud.remote_qpn;
-		kwr->wr.ud.remote_qkey		= uwr->wr.ud.remote_qkey;
+		if (qp_type(qp) == IBV_QPT_UD) {
+			struct rxe_ah *ah = to_rah(uwr->wr.ud.ah);
+
+			kwr->wr.ud.remote_qpn	= uwr->wr.ud.remote_qpn;
+			kwr->wr.ud.remote_qkey	= uwr->wr.ud.remote_qkey;
+			kwr->wr.ud.ah_num	= ah->ah_num;
+		}
 		break;
 
 	case IBV_WR_ATOMIC_CMP_AND_SWP:
@@ -1464,11 +1474,15 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
 	int i;
 	unsigned int opcode = ibwr->opcode;
 
-	convert_send_wr(&wqe->wr, ibwr);
+	convert_send_wr(qp, &wqe->wr, ibwr);
 
-	if (qp_type(qp) == IBV_QPT_UD)
-		memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
-		       sizeof(struct rxe_av));
+	if (qp_type(qp) == IBV_QPT_UD) {
+		struct rxe_ah *ah = to_rah(ibwr->wr.ud.ah);
+
+		if (!ah->ah_num)
+			/* old kernels only */
+			memcpy(&wqe->av, &ah->av, sizeof(struct rxe_av));
+	}
 
 	if (ibwr->send_flags & IBV_SEND_INLINE) {
 		uint8_t *inline_data = wqe->dma.inline_data;
@@ -1645,59 +1659,82 @@ static inline int rdma_gid2ip(sockaddr_union_t *out, union ibv_gid *gid)
 	return 0;
 }
 
-static struct ibv_ah *rxe_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+static int rxe_create_av(struct rxe_ah *ah, struct ibv_pd *pd,
+					struct ibv_ah_attr *attr)
 {
-	int err;
-	struct rxe_ah *ah;
-	struct rxe_av *av;
+	struct rxe_av *av = &ah->av;
 	union ibv_gid sgid;
-	struct ib_uverbs_create_ah_resp resp;
-
-	err = ibv_query_gid(pd->context, attr->port_num, attr->grh.sgid_index,
-			    &sgid);
-	if (err) {
-		fprintf(stderr, "rxe: Failed to query sgid.\n");
-		return NULL;
-	}
+	int ret;
 
-	ah = malloc(sizeof(*ah));
-	if (ah == NULL)
-		return NULL;
+	ret = ibv_query_gid(pd->context, attr->port_num,
+			attr->grh.sgid_index, &sgid);
+	if (ret)
+		return ret;
 
-	av = &ah->av;
 	av->port_num = attr->port_num;
 	memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
-	av->network_type =
-		ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ?
-		RXE_NETWORK_TYPE_IPV4 : RXE_NETWORK_TYPE_IPV6;
+
+	ret = ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw);
+	av->network_type = ret ? RXE_NETWORK_TYPE_IPV4 :
+				 RXE_NETWORK_TYPE_IPV6;
 
 	rdma_gid2ip(&av->sgid_addr, &sgid);
 	rdma_gid2ip(&av->dgid_addr, &attr->grh.dgid);
-	if (ibv_resolve_eth_l2_from_gid(pd->context, attr, av->dmac, NULL)) {
-		free(ah);
-		return NULL;
-	}
 
-	memset(&resp, 0, sizeof(resp));
-	if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp, sizeof(resp))) {
-		free(ah);
+	ret = ibv_resolve_eth_l2_from_gid(pd->context, attr,
+			av->dmac, NULL);
+
+	return ret;
+}
+
+/*
+ * Newer kernels will return a non-zero AH index in resp.ah_num
+ * which can be returned in UD send WQEs.
+ * Older kernels will leave ah_num == 0. For these create an AV and use
+ * in UD send WQEs.
+ */
+static struct ibv_ah *rxe_create_ah(struct ibv_pd *pd,
+					struct ibv_ah_attr *attr)
+{
+	struct rxe_ah *ah;
+	struct urxe_create_ah_resp resp = {};
+	int ret;
+
+	ah = calloc(1, sizeof(*ah));
+	if (!ah)
 		return NULL;
+
+	ret = ibv_cmd_create_ah(pd, &ah->ibv_ah, attr,
+					&resp.ibv_resp, sizeof(resp));
+	if (ret)
+		goto err_free;
+
+	ah->ah_num = resp.ah_num;
+
+	if (!ah->ah_num) {
+		/* old kernels only */
+		ret = rxe_create_av(ah, pd, attr);
+		if (ret)
+			goto err_free;
 	}
 
 	return &ah->ibv_ah;
+
+err_free:
+	free(ah);
+	return NULL;
 }
 
 static int rxe_destroy_ah(struct ibv_ah *ibah)
 {
-	int ret;
 	struct rxe_ah *ah = to_rah(ibah);
+	int ret;
 
 	ret = ibv_cmd_destroy_ah(&ah->ibv_ah);
-	if (ret)
-		return ret;
+	if (!ret)
+		free(ah);
 
-	free(ah);
-	return 0;
+	return ret;
 }
 
 static const struct verbs_context_ops rxe_ctx_ops = {
diff --git a/providers/rxe/rxe.h b/providers/rxe/rxe.h
index ddb8fe89..6882d9c7 100644
--- a/providers/rxe/rxe.h
+++ b/providers/rxe/rxe.h
@@ -67,6 +67,7 @@ struct rxe_cq {
 struct rxe_ah {
 	struct ibv_ah		ibv_ah;
 	struct rxe_av		av;
+	int			ah_num;
 };
 
 struct rxe_wq {
@@ -89,8 +90,6 @@ struct rxe_qp {
 	int			err;
 };
 
-#define qp_type(qp)		((qp)->vqp.qp.qp_type)
-
 struct rxe_srq {
 	struct ibv_srq		ibv_srq;
 	struct mminfo		mmap_info;
@@ -130,4 +129,9 @@ static inline struct rxe_ah *to_rah(struct ibv_ah *ibah)
 	return to_rxxx(ah, ah);
 }
 
+static inline enum ibv_qp_type qp_type(struct rxe_qp *qp)
+{
+	return qp->vqp.qp.qp_type;
+}
+
 #endif /* RXE_H */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-29 14:17 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-29 14:16 [PATCH resending 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
2021-06-29 14:16 ` [PATCH resending 1/2] Update kernel headers Bob Pearson
2021-06-29 14:16 ` [PATCH resending 2/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).