All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe
@ 2021-07-29 22:49 Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff Bob Pearson
                   ` (12 more replies)
  0 siblings, 13 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

This series of patches implements the XRC transport for the rxe driver.

These patches should be applied after the (v2) "Three rxe bug fixes",
(v3) "Replace AV by AH in UD sends", and "Let RDMA-core manage certain objects"
patches to the current for-next branch.

Bob Pearson (13):
  RDMA/rxe: Decouple rxe_pkt_info from sk_buff
  IB/core: Add xrc opcodes to ib_pack.h
  RDMA/rxe: Extend rxe_send_wr to support xrceth
  RDMA/rxe: Extend rxe_opcode.h to support xrc
  RDMA/rxe: Add XRC ETH to rxe_hdr.h
  RDMA/rxe: Add XRC QP type to rxe_wr_opcode_info
  RDMA/rxe: Add XRC opcodes to rxe_opcode_info
  RDMA/rxe: Support alloc/dealloc xrcd
  RDMA/rxe: Extend SRQs to support extensions
  RDMA/rxe: Compute next opcode for XRC
  RDMA/rxe: Extend rxe_verbs and rxe_qp to support XRC
  RDMA/rxe: Extend rxe send XRC packets
  RDMA/rxe: Enable receiving XRC packets

 drivers/infiniband/sw/rxe/rxe.c        |  39 ++-
 drivers/infiniband/sw/rxe/rxe_av.c     |   6 +-
 drivers/infiniband/sw/rxe/rxe_comp.c   |  47 +--
 drivers/infiniband/sw/rxe/rxe_hdr.h    |  58 +++-
 drivers/infiniband/sw/rxe/rxe_loc.h    |  15 +-
 drivers/infiniband/sw/rxe/rxe_mw.c     |   6 +-
 drivers/infiniband/sw/rxe/rxe_net.c    |  36 ++-
 drivers/infiniband/sw/rxe/rxe_opcode.c | 388 +++++++++++++++++++++++--
 drivers/infiniband/sw/rxe/rxe_opcode.h |  38 +--
 drivers/infiniband/sw/rxe/rxe_param.h  |   1 +
 drivers/infiniband/sw/rxe/rxe_pool.c   |   6 +
 drivers/infiniband/sw/rxe/rxe_pool.h   |   1 +
 drivers/infiniband/sw/rxe/rxe_qp.c     | 235 ++++++++-------
 drivers/infiniband/sw/rxe/rxe_recv.c   |  51 +++-
 drivers/infiniband/sw/rxe/rxe_req.c    |  91 +++++-
 drivers/infiniband/sw/rxe/rxe_resp.c   | 209 +++++++++----
 drivers/infiniband/sw/rxe/rxe_srq.c    |  71 ++---
 drivers/infiniband/sw/rxe/rxe_verbs.c  |  37 ++-
 drivers/infiniband/sw/rxe/rxe_verbs.h  |  42 ++-
 include/rdma/ib_pack.h                 |  28 +-
 include/uapi/rdma/rdma_user_rxe.h      |   4 +
 21 files changed, 1098 insertions(+), 311 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-08-27 13:01   ` Jason Gunthorpe
  2021-07-29 22:49 ` [PATCH for-next 02/13] IB/core: Add xrc opcodes to ib_pack.h Bob Pearson
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Currently on the receive path the rxe_pkt_info struct is stored in the
skb->cb array. But this patch series requires extending it beyond 48 bytes
and it is already at the limit. This patch places a pointer to the
pkt info struct in skb->cb and allocates it separately. All instances of
freeing the skb on the receive path are collected into rxe_free_pkt() calls
which is extended to free the pkt info struct. In rxe_rcv_mcast_pkt()
if skb_clone fails continue is replaced by break since we are out of
memory and there is no point going on to the other mcast QPs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 20 +++-------------
 drivers/infiniband/sw/rxe/rxe_hdr.h  | 13 ++++++++---
 drivers/infiniband/sw/rxe/rxe_loc.h  |  3 +++
 drivers/infiniband/sw/rxe/rxe_net.c  | 14 +++++++++--
 drivers/infiniband/sw/rxe/rxe_recv.c | 35 +++++++++++++++++++++-------
 drivers/infiniband/sw/rxe/rxe_resp.c | 18 ++++----------
 6 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 1ccd2deff835..4d62e5bdf820 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -522,11 +522,8 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
 	struct rxe_send_wqe *wqe;
 	struct rxe_queue *q = qp->sq.queue;
 
-	while ((skb = skb_dequeue(&qp->resp_pkts))) {
-		rxe_drop_ref(qp);
-		kfree_skb(skb);
-		ib_device_put(qp->ibqp.device);
-	}
+	while ((skb = skb_dequeue(&qp->resp_pkts)))
+		rxe_free_pkt(SKB_TO_PKT(skb));
 
 	while ((wqe = queue_head(q, q->type))) {
 		if (notify) {
@@ -538,17 +535,6 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
 	}
 }
 
-static void free_pkt(struct rxe_pkt_info *pkt)
-{
-	struct sk_buff *skb = PKT_TO_SKB(pkt);
-	struct rxe_qp *qp = pkt->qp;
-	struct ib_device *dev = qp->ibqp.device;
-
-	kfree_skb(skb);
-	rxe_drop_ref(qp);
-	ib_device_put(dev);
-}
-
 int rxe_completer(void *arg)
 {
 	struct rxe_qp *qp = (struct rxe_qp *)arg;
@@ -757,7 +743,7 @@ int rxe_completer(void *arg)
 
 done:
 	if (pkt)
-		free_pkt(pkt);
+		rxe_free_pkt(pkt);
 	rxe_drop_ref(qp);
 
 	return ret;
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index e432f9e37795..d9d15c672f86 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -12,6 +12,7 @@
  * sk_buff for received packets.
  */
 struct rxe_pkt_info {
+	struct sk_buff		*skb;		/* back pointer to skb */
 	struct rxe_dev		*rxe;		/* device that owns packet */
 	struct rxe_qp		*qp;		/* qp that owns packet */
 	struct rxe_send_wqe	*wqe;		/* send wqe */
@@ -24,16 +25,22 @@ struct rxe_pkt_info {
 	u8			opcode;		/* bth opcode of packet */
 };
 
+/* rxe info in skb->cb */
+struct rxe_cb {
+	struct rxe_pkt_info	*pkt;		/* pointer to pkt info */
+};
+
+#define RXE_CB(skb) ((struct rxe_cb *)skb->cb)
+
 /* Macros should be used only for received skb */
 static inline struct rxe_pkt_info *SKB_TO_PKT(struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(struct rxe_pkt_info) > sizeof(skb->cb));
-	return (void *)skb->cb;
+	return RXE_CB(skb)->pkt;
 }
 
 static inline struct sk_buff *PKT_TO_SKB(struct rxe_pkt_info *pkt)
 {
-	return container_of((void *)pkt, struct sk_buff, cb);
+	return pkt->skb;
 }
 
 /*
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index de75413fb4d9..b4d45c592bd7 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -139,6 +139,9 @@ static inline int qp_mtu(struct rxe_qp *qp)
 		return IB_MTU_4096;
 }
 
+/* rxe_recv.c */
+void rxe_free_pkt(struct rxe_pkt_info *pkt);
+
 static inline int rcv_wqe_size(int max_sge)
 {
 	return sizeof(struct rxe_recv_wqe) +
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 4f96437a2a8e..6212e61d267b 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -155,7 +155,7 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct udphdr *udph;
 	struct rxe_dev *rxe;
 	struct net_device *ndev = skb->dev;
-	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+	struct rxe_pkt_info *pkt;
 
 	/* takes a reference on rxe->ib_dev
 	 * drop when skb is freed
@@ -172,6 +172,10 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
+	pkt = kzalloc(sizeof(*pkt), GFP_ATOMIC);
+	RXE_CB(skb)->pkt = pkt;
+	pkt->skb = skb;
+
 	udph = udp_hdr(skb);
 	pkt->rxe = rxe;
 	pkt->port_num = 1;
@@ -407,15 +411,21 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
  */
 static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 {
-	memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
+	struct rxe_pkt_info *new_pkt;
 
 	if (skb->protocol == htons(ETH_P_IP))
 		skb_pull(skb, sizeof(struct iphdr));
 	else
 		skb_pull(skb, sizeof(struct ipv6hdr));
 
+	new_pkt = kzalloc(sizeof(*new_pkt), GFP_ATOMIC);
+	memcpy(new_pkt, pkt, sizeof(*pkt));
+	RXE_CB(skb)->pkt = new_pkt;
+	new_pkt->skb = skb;
+
 	if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) {
 		kfree_skb(skb);
+		kfree(new_pkt);
 		return -EIO;
 	}
 
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 8ed4f3bcc779..cf5ac6bba59c 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -9,6 +9,20 @@
 #include "rxe.h"
 #include "rxe_loc.h"
 
+void rxe_free_pkt(struct rxe_pkt_info *pkt)
+{
+	struct sk_buff *skb = PKT_TO_SKB(pkt);
+	struct rxe_qp *qp = pkt->qp;
+
+	if (qp)
+		rxe_drop_ref(qp);
+
+	ib_device_put(&pkt->rxe->ib_dev);
+
+	kfree_skb(skb);
+	kfree(pkt);
+}
+
 /* check that QP matches packet opcode type and is in a valid state */
 static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 			    struct rxe_qp *qp)
@@ -279,14 +293,22 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 
 			cskb = skb_clone(skb, GFP_ATOMIC);
 			if (unlikely(!cskb))
-				continue;
+				break;
+
+			cpkt = kzalloc(sizeof(*cpkt), GFP_ATOMIC);
+			if (unlikely(!cpkt)) {
+				kfree_skb(cskb);
+				break;
+			}
+			RXE_CB(cskb)->pkt = cpkt;
+			cpkt->skb = cskb;
 
 			if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
 				kfree_skb(cskb);
+				kfree(cpkt);
 				break;
 			}
 
-			cpkt = SKB_TO_PKT(cskb);
 			cpkt->qp = qp;
 			rxe_add_ref(qp);
 			rxe_rcv_pkt(cpkt, cskb);
@@ -310,8 +332,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 	 */
 
 drop:
-	kfree_skb(skb);
-	ib_device_put(&rxe->ib_dev);
+	rxe_free_pkt(SKB_TO_PKT(skb));
 }
 
 /**
@@ -396,9 +417,5 @@ void rxe_rcv(struct sk_buff *skb)
 	return;
 
 drop:
-	if (pkt->qp)
-		rxe_drop_ref(pkt->qp);
-
-	kfree_skb(skb);
-	ib_device_put(&rxe->ib_dev);
+	rxe_free_pkt(pkt);
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c6a6257a299f..ac8d823eb416 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -98,11 +98,8 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
 	struct sk_buff *skb;
 
 	if (qp->resp.state == QP_STATE_ERROR) {
-		while ((skb = skb_dequeue(&qp->req_pkts))) {
-			rxe_drop_ref(qp);
-			kfree_skb(skb);
-			ib_device_put(qp->ibqp.device);
-		}
+		while ((skb = skb_dequeue(&qp->req_pkts)))
+			rxe_free_pkt(SKB_TO_PKT(skb));
 
 		/* go drain recv wr queue */
 		return RESPST_CHK_RESOURCE;
@@ -1020,9 +1017,7 @@ static enum resp_states cleanup(struct rxe_qp *qp,
 
 	if (pkt) {
 		skb = skb_dequeue(&qp->req_pkts);
-		rxe_drop_ref(qp);
-		kfree_skb(skb);
-		ib_device_put(qp->ibqp.device);
+		rxe_free_pkt(SKB_TO_PKT(skb));
 	}
 
 	if (qp->resp.mr) {
@@ -1183,11 +1178,8 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
 	struct sk_buff *skb;
 	struct rxe_queue *q = qp->rq.queue;
 
-	while ((skb = skb_dequeue(&qp->req_pkts))) {
-		rxe_drop_ref(qp);
-		kfree_skb(skb);
-		ib_device_put(qp->ibqp.device);
-	}
+	while ((skb = skb_dequeue(&qp->req_pkts)))
+		rxe_free_pkt(SKB_TO_PKT(skb));
 
 	if (notify)
 		return;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 02/13] IB/core: Add xrc opcodes to ib_pack.h
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 03/13] RDMA/rxe: Extend rxe_send_wr to support xrceth Bob Pearson
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

ib_pack.h defines enums for all the RDMA opcodes except for the XRC
opcodes. This patch adds those opcodes.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 include/rdma/ib_pack.h | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index a9162f25beaf..afbf78a6669e 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -56,6 +56,7 @@ enum {
 	IB_OPCODE_UD                                = 0x60,
 	/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
 	IB_OPCODE_CNP                               = 0x80,
+	IB_OPCODE_XRC                               = 0xa0,
 	/* Manufacturer specific */
 	IB_OPCODE_MSP                               = 0xe0,
 
@@ -152,7 +153,32 @@ enum {
 
 	/* UD */
 	IB_OPCODE(UD, SEND_ONLY),
-	IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
+	IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE),
+
+	/* XRC */
+	IB_OPCODE(XRC, SEND_FIRST),
+	IB_OPCODE(XRC, SEND_MIDDLE),
+	IB_OPCODE(XRC, SEND_LAST),
+	IB_OPCODE(XRC, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(XRC, SEND_ONLY),
+	IB_OPCODE(XRC, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(XRC, RDMA_WRITE_FIRST),
+	IB_OPCODE(XRC, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(XRC, RDMA_WRITE_LAST),
+	IB_OPCODE(XRC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(XRC, RDMA_WRITE_ONLY),
+	IB_OPCODE(XRC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(XRC, RDMA_READ_REQUEST),
+	IB_OPCODE(XRC, RDMA_READ_RESPONSE_FIRST),
+	IB_OPCODE(XRC, RDMA_READ_RESPONSE_MIDDLE),
+	IB_OPCODE(XRC, RDMA_READ_RESPONSE_LAST),
+	IB_OPCODE(XRC, RDMA_READ_RESPONSE_ONLY),
+	IB_OPCODE(XRC, ACKNOWLEDGE),
+	IB_OPCODE(XRC, ATOMIC_ACKNOWLEDGE),
+	IB_OPCODE(XRC, COMPARE_SWAP),
+	IB_OPCODE(XRC, FETCH_ADD),
+	IB_OPCODE(XRC, SEND_LAST_WITH_INVALIDATE),
+	IB_OPCODE(XRC, SEND_ONLY_WITH_INVALIDATE)
 };
 
 enum {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 03/13] RDMA/rxe: Extend rxe_send_wr to support xrceth
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 02/13] IB/core: Add xrc opcodes to ib_pack.h Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 04/13] RDMA/rxe: Extend rxe_opcode.h to support xrc Bob Pearson
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add srq_num field aligned to lie in the space above the rdma
and atomic fields so it can be used in parallel.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 include/uapi/rdma/rdma_user_rxe.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index dc9f7a5e203a..1d80586c731f 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -82,6 +82,10 @@ struct rxe_send_wr {
 		__u32		invalidate_rkey;
 	} ex;
 	union {
+		struct {
+			__aligned_u64 pad[4];
+			__u32	srq_num;
+		} xrc;
 		struct {
 			__aligned_u64 remote_addr;
 			__u32	rkey;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 04/13] RDMA/rxe: Extend rxe_opcode.h to support xrc
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (2 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 03/13] RDMA/rxe: Extend rxe_send_wr to support xrceth Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 05/13] RDMA/rxe: Add XRC ETH to rxe_hdr.h Bob Pearson
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend rxe_wr_opcode_info to support more QP types.

Extend rxe_hdr_type and rxe_hdr_mask enums to support XRCETH headers.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_opcode.h | 38 +++++++++++++-------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index e02f039b8c44..a5349eecc9c0 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -27,28 +27,27 @@ enum rxe_wr_mask {
 	WR_ATOMIC_OR_READ_MASK		= WR_ATOMIC_MASK | WR_READ_MASK,
 };
 
-#define WR_MAX_QPT		(8)
-
 struct rxe_wr_opcode_info {
 	char			*name;
-	enum rxe_wr_mask	mask[WR_MAX_QPT];
+	enum rxe_wr_mask	mask[IB_QPT_MAX];
 };
 
 extern struct rxe_wr_opcode_info rxe_wr_opcode_info[];
 
 enum rxe_hdr_type {
-	RXE_LRH,
-	RXE_GRH,
-	RXE_BTH,
-	RXE_RETH,
-	RXE_AETH,
-	RXE_ATMETH,
-	RXE_ATMACK,
-	RXE_IETH,
-	RXE_RDETH,
-	RXE_DETH,
-	RXE_IMMDT,
-	RXE_PAYLOAD,
+	RXE_LRH,		/* IBA 5.2.1 not used by rxe */
+	RXE_GRH,		/* IBA 5.2.2 */
+	RXE_BTH,		/* IBA 5.2.3 */
+	RXE_RDETH,		/* IBA 5.2.4 not supported by rxe */
+	RXE_DETH,		/* IBA 5.2.5 */
+	RXE_RETH,		/* IBA 5.2.6 */
+	RXE_ATMETH,		/* IBA 5.2.7 */
+	RXE_XRCETH,		/* IBA 5.2.8 */
+	RXE_AETH,		/* IBA 5.2.9 */
+	RXE_ATMACK,		/* IBA 5.2.10 */
+	RXE_IMMDT,		/* IBA 5.2.11 */
+	RXE_IETH,		/* IBA 5.2.12 */
+	RXE_PAYLOAD,		/* IBA 5.2.13 */
 	NUM_HDR_TYPES
 };
 
@@ -56,14 +55,15 @@ enum rxe_hdr_mask {
 	RXE_LRH_MASK		= BIT(RXE_LRH),
 	RXE_GRH_MASK		= BIT(RXE_GRH),
 	RXE_BTH_MASK		= BIT(RXE_BTH),
-	RXE_IMMDT_MASK		= BIT(RXE_IMMDT),
+	RXE_RDETH_MASK		= BIT(RXE_RDETH),
+	RXE_DETH_MASK		= BIT(RXE_DETH),
 	RXE_RETH_MASK		= BIT(RXE_RETH),
-	RXE_AETH_MASK		= BIT(RXE_AETH),
 	RXE_ATMETH_MASK		= BIT(RXE_ATMETH),
+	RXE_XRCETH_MASK		= BIT(RXE_XRCETH),
+	RXE_AETH_MASK		= BIT(RXE_AETH),
 	RXE_ATMACK_MASK		= BIT(RXE_ATMACK),
+	RXE_IMMDT_MASK		= BIT(RXE_IMMDT),
 	RXE_IETH_MASK		= BIT(RXE_IETH),
-	RXE_RDETH_MASK		= BIT(RXE_RDETH),
-	RXE_DETH_MASK		= BIT(RXE_DETH),
 	RXE_PAYLOAD_MASK	= BIT(RXE_PAYLOAD),
 
 	RXE_REQ_MASK		= BIT(NUM_HDR_TYPES + 0),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 05/13] RDMA/rxe: Add XRC ETH to rxe_hdr.h
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (3 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 04/13] RDMA/rxe: Extend rxe_opcode.h to support xrc Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 06/13] RDMA/rxe: Add XRC QP type to rxe_wr_opcode_info Bob Pearson
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend rxe_hdr.h to support XRC ETH.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_hdr.h | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index d9d15c672f86..499807b11405 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -907,11 +907,47 @@ static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
 		rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);
 }
 
+/******************************************************************************
+ * XRC Extended Transport Header
+ ******************************************************************************/
+struct rxe_xrceth {
+	__be32			rxrcsrq;
+};
+
+#define XRCETH_XRCSRQ_MASK	(0x00ffffff)
+
+static inline u32 __xrceth_xrcsrq(void *arg)
+{
+	struct rxe_xrceth *xrceth = arg;
+
+	return XRCETH_XRCSRQ_MASK & be32_to_cpu(xrceth->rxrcsrq);
+}
+
+static inline void __xrceth_set_xrcsrq(void *arg, u32 xrcsrq)
+{
+	struct rxe_xrceth *xrceth = arg;
+
+	xrceth->rxrcsrq = cpu_to_be32(XRCETH_XRCSRQ_MASK & xrcsrq);
+}
+
+static inline u32 xrceth_xrcsrq(struct rxe_pkt_info *pkt)
+{
+	return __xrceth_xrcsrq(pkt->hdr +
+		rxe_opcode[pkt->opcode].offset[RXE_XRCETH]);
+}
+
+static inline void xrceth_set_xrcsrq(struct rxe_pkt_info *pkt, u32 xrcsrq)
+{
+	__xrceth_set_xrcsrq(pkt->hdr +
+		rxe_opcode[pkt->opcode].offset[RXE_XRCETH], xrcsrq);
+}
+
 enum rxe_hdr_length {
 	RXE_BTH_BYTES		= sizeof(struct rxe_bth),
 	RXE_DETH_BYTES		= sizeof(struct rxe_deth),
 	RXE_IMMDT_BYTES		= sizeof(struct rxe_immdt),
 	RXE_RETH_BYTES		= sizeof(struct rxe_reth),
+	RXE_XRCETH_BYTES	= sizeof(struct rxe_xrceth),
 	RXE_AETH_BYTES		= sizeof(struct rxe_aeth),
 	RXE_ATMACK_BYTES	= sizeof(struct rxe_atmack),
 	RXE_ATMETH_BYTES	= sizeof(struct rxe_atmeth),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 06/13] RDMA/rxe: Add XRC QP type to rxe_wr_opcode_info
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (4 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 05/13] RDMA/rxe: Add XRC ETH to rxe_hdr.h Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 07/13] RDMA/rxe: Add XRC opcodes to rxe_opcode_info Bob Pearson
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add IB_QPT_XRC_INI QP type to rxe_rw_opcode_info.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_opcode.c | 61 +++++++++++++++-----------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 3ef5a10a6efd..da719abc1846 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -15,53 +15,60 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 	[IB_WR_RDMA_WRITE]				= {
 		.name	= "IB_WR_RDMA_WRITE",
 		.mask	= {
-			[IB_QPT_RC]	= WR_INLINE_MASK | WR_WRITE_MASK,
-			[IB_QPT_UC]	= WR_INLINE_MASK | WR_WRITE_MASK,
+			[IB_QPT_RC]	 = WR_INLINE_MASK | WR_WRITE_MASK,
+			[IB_QPT_UC]	 = WR_INLINE_MASK | WR_WRITE_MASK,
+			[IB_QPT_XRC_INI] = WR_INLINE_MASK | WR_WRITE_MASK,
 		},
 	},
 	[IB_WR_RDMA_WRITE_WITH_IMM]			= {
 		.name	= "IB_WR_RDMA_WRITE_WITH_IMM",
 		.mask	= {
-			[IB_QPT_RC]	= WR_INLINE_MASK | WR_WRITE_MASK,
-			[IB_QPT_UC]	= WR_INLINE_MASK | WR_WRITE_MASK,
+			[IB_QPT_RC]	 = WR_INLINE_MASK | WR_WRITE_MASK,
+			[IB_QPT_UC]	 = WR_INLINE_MASK | WR_WRITE_MASK,
+			[IB_QPT_XRC_INI] = WR_INLINE_MASK | WR_WRITE_MASK,
 		},
 	},
 	[IB_WR_SEND]					= {
 		.name	= "IB_WR_SEND",
 		.mask	= {
-			[IB_QPT_SMI]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_GSI]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_RC]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_UC]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_UD]	= WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_SMI]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_GSI]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_RC]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_UC]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_UD]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_XRC_INI] = WR_INLINE_MASK | WR_WRITE_MASK,
 		},
 	},
 	[IB_WR_SEND_WITH_IMM]				= {
 		.name	= "IB_WR_SEND_WITH_IMM",
 		.mask	= {
-			[IB_QPT_SMI]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_GSI]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_RC]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_UC]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_UD]	= WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_SMI]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_GSI]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_RC]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_UC]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_UD]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_XRC_INI] = WR_INLINE_MASK | WR_WRITE_MASK,
 		},
 	},
 	[IB_WR_RDMA_READ]				= {
 		.name	= "IB_WR_RDMA_READ",
 		.mask	= {
-			[IB_QPT_RC]	= WR_READ_MASK,
+			[IB_QPT_RC]	 = WR_READ_MASK,
+			[IB_QPT_XRC_INI] = WR_READ_MASK,
 		},
 	},
 	[IB_WR_ATOMIC_CMP_AND_SWP]			= {
 		.name	= "IB_WR_ATOMIC_CMP_AND_SWP",
 		.mask	= {
-			[IB_QPT_RC]	= WR_ATOMIC_MASK,
+			[IB_QPT_RC]	 = WR_ATOMIC_MASK,
+			[IB_QPT_XRC_INI] = WR_ATOMIC_MASK,
 		},
 	},
 	[IB_WR_ATOMIC_FETCH_AND_ADD]			= {
 		.name	= "IB_WR_ATOMIC_FETCH_AND_ADD",
 		.mask	= {
-			[IB_QPT_RC]	= WR_ATOMIC_MASK,
+			[IB_QPT_RC]	 = WR_ATOMIC_MASK,
+			[IB_QPT_XRC_INI] = WR_ATOMIC_MASK,
 		},
 	},
 	[IB_WR_LSO]					= {
@@ -73,34 +80,38 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 	[IB_WR_SEND_WITH_INV]				= {
 		.name	= "IB_WR_SEND_WITH_INV",
 		.mask	= {
-			[IB_QPT_RC]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_UC]	= WR_INLINE_MASK | WR_SEND_MASK,
-			[IB_QPT_UD]	= WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_RC]	 = WR_INLINE_MASK | WR_SEND_MASK,
+			[IB_QPT_XRC_INI] = WR_INLINE_MASK | WR_WRITE_MASK,
 		},
 	},
 	[IB_WR_RDMA_READ_WITH_INV]			= {
 		.name	= "IB_WR_RDMA_READ_WITH_INV",
 		.mask	= {
-			[IB_QPT_RC]	= WR_READ_MASK,
+			[IB_QPT_RC]	 = WR_READ_MASK,
+			/* TODO get rid of this no such thing for RoCE */
 		},
 	},
 	[IB_WR_LOCAL_INV]				= {
 		.name	= "IB_WR_LOCAL_INV",
 		.mask	= {
-			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
+			[IB_QPT_RC]	 = WR_LOCAL_OP_MASK,
+			[IB_QPT_UC]	 = WR_LOCAL_OP_MASK,
+			[IB_QPT_XRC_INI] = WR_LOCAL_OP_MASK,
 		},
 	},
 	[IB_WR_REG_MR]					= {
 		.name	= "IB_WR_REG_MR",
 		.mask	= {
-			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
+			[IB_QPT_RC]	 = WR_LOCAL_OP_MASK,
+			[IB_QPT_UC]	 = WR_LOCAL_OP_MASK,
+			[IB_QPT_XRC_INI] = WR_LOCAL_OP_MASK,
 		},
 	},
 	[IB_WR_BIND_MW]					= {
 		.name	= "IB_WR_BIND_MW",
 		.mask	= {
-			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
-			[IB_QPT_UC]	= WR_LOCAL_OP_MASK,
+			[IB_QPT_RC]	 = WR_LOCAL_OP_MASK,
+			[IB_QPT_UC]	 = WR_LOCAL_OP_MASK,
 		},
 	},
 };
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 07/13] RDMA/rxe: Add XRC opcodes to rxe_opcode_info
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (5 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 06/13] RDMA/rxe: Add XRC QP type to rxe_wr_opcode_info Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 08/13] RDMA/rxe: Support alloc/dealloc xrcd Bob Pearson
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add XRC opcodes to rxe_opcode_info.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_opcode.c | 328 +++++++++++++++++++++++++
 1 file changed, 328 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index da719abc1846..af8e05bc63b2 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -949,4 +949,332 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
 		}
 	},
 
+	/* XRC */
+	[IB_OPCODE_XRC_SEND_FIRST]			= {
+		.name	= "IB_OPCODE_XRC_SEND_FIRST",
+		.mask	= RXE_XRCETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_RWR_MASK | RXE_SEND_MASK | RXE_START_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_MIDDLE]		= {
+		.name	= "IB_OPCODE_XRC_SEND_MIDDLE]",
+		.mask	= RXE_XRCETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_SEND_MASK | RXE_MIDDLE_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_LAST]			= {
+		.name	= "IB_OPCODE_XRC_SEND_LAST",
+		.mask	= RXE_XRCETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_LAST_WITH_IMMEDIATE]		= {
+		.name	= "IB_OPCODE_XRC_SEND_LAST_WITH_IMMEDIATE",
+		.mask	= RXE_XRCETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_IMMDT_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_IMMDT]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_IMMDT_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_ONLY]			= {
+		.name	= "IB_OPCODE_XRC_SEND_ONLY",
+		.mask	= RXE_XRCETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+				| RXE_START_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_ONLY_WITH_IMMEDIATE]		= {
+		.name	= "IB_OPCODE_XRC_SEND_ONLY_WITH_IMMEDIATE",
+		.mask	= RXE_XRCETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+				| RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_IMMDT_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_IMMDT]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_IMMDT_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_WRITE_FIRST]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_WRITE_FIRST",
+		.mask	= RXE_XRCETH_MASK | RXE_RETH_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_WRITE_MASK
+				| RXE_START_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_RETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_RETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_RETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_WRITE_MIDDLE]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_WRITE_MIDDLE",
+		.mask	= RXE_XRCETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_WRITE_MASK | RXE_MIDDLE_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_WRITE_LAST]			= {
+		.name	= "IB_OPCODE_XRC_RDMA_WRITE_LAST",
+		.mask	= RXE_XRCETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_WRITE_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_WRITE_LAST_WITH_IMMEDIATE]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
+		.mask	= RXE_XRCETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_WRITE_MASK | RXE_COMP_MASK
+				| RXE_RWR_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_IMMDT_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_IMMDT]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_IMMDT_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_WRITE_ONLY]			= {
+		.name	= "IB_OPCODE_XRC_RDMA_WRITE_ONLY",
+		.mask	= RXE_XRCETH_MASK | RXE_RETH_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_WRITE_MASK | RXE_START_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_RETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_RETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_RETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_WRITE_ONLY_WITH_IMMEDIATE]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
+		.mask	= RXE_XRCETH_MASK | RXE_RETH_MASK | RXE_IMMDT_MASK
+				| RXE_PAYLOAD_MASK | RXE_REQ_MASK
+				| RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+				| RXE_START_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_RETH_BYTES
+						+ RXE_IMMDT_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_RETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_IMMDT]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_RETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_RETH_BYTES
+						+ RXE_IMMDT_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_READ_REQUEST]			= {
+		.name	= "IB_OPCODE_XRC_RDMA_READ_REQUEST",
+		.mask	= RXE_XRCETH_MASK | RXE_RETH_MASK | RXE_REQ_MASK
+				| RXE_READ_MASK | RXE_START_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_RETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_RETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_RETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_READ_RESPONSE_FIRST]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_READ_RESPONSE_FIRST",
+		.mask	= RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+				| RXE_START_MASK,
+		.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_AETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_AETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_READ_RESPONSE_MIDDLE]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_READ_RESPONSE_MIDDLE",
+		.mask	= RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK,
+		.length = RXE_BTH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_READ_RESPONSE_LAST]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_READ_RESPONSE_LAST",
+		.mask	= RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_AETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_AETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_RDMA_READ_RESPONSE_ONLY]		= {
+		.name	= "IB_OPCODE_XRC_RDMA_READ_RESPONSE_ONLY",
+		.mask	= RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+				| RXE_START_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_AETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_AETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_ACKNOWLEDGE]			= {
+		.name	= "IB_OPCODE_XRC_ACKNOWLEDGE",
+		.mask	= RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_AETH]	= RXE_BTH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_AETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_ATOMIC_ACKNOWLEDGE]			= {
+		.name	= "IB_OPCODE_XRC_ATOMIC_ACKNOWLEDGE",
+		.mask	= RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK
+				| RXE_START_MASK | RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_AETH]	= RXE_BTH_BYTES,
+			[RXE_ATMACK]	= RXE_BTH_BYTES
+						+ RXE_AETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+					+ RXE_ATMACK_BYTES + RXE_AETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_COMPARE_SWAP]			= {
+		.name	= "IB_OPCODE_XRC_COMPARE_SWAP",
+		.mask	= RXE_XRCETH_MASK | RXE_ATMETH_MASK | RXE_REQ_MASK
+				| RXE_ATOMIC_MASK | RXE_START_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_ATMETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_ATMETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_ATMETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_FETCH_ADD]			= {
+		.name	= "IB_OPCODE_XRC_FETCH_ADD",
+		.mask	= RXE_XRCETH_MASK | RXE_ATMETH_MASK | RXE_REQ_MASK
+				| RXE_ATOMIC_MASK | RXE_START_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_ATMETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_ATMETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_ATMETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_LAST_WITH_INVALIDATE]		= {
+		.name	= "IB_OPCODE_XRC_SEND_LAST_WITH_INVALIDATE",
+		.mask	= RXE_XRCETH_MASK | RXE_IETH_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK
+				| RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_IETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_IETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_IETH_BYTES,
+		}
+	},
+	[IB_OPCODE_XRC_SEND_ONLY_WITH_INVALIDATE]		= {
+		.name	= "IB_OPCODE_XRC_SEND_ONLY_INV",
+		.mask	= RXE_XRCETH_MASK | RXE_IETH_MASK | RXE_PAYLOAD_MASK
+				| RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+				| RXE_SEND_MASK | RXE_END_MASK | RXE_START_MASK,
+		.length = RXE_BTH_BYTES + RXE_XRCETH_BYTES + RXE_IETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_XRCETH]	= RXE_BTH_BYTES,
+			[RXE_IETH]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES,
+			[RXE_PAYLOAD]	= RXE_BTH_BYTES
+						+ RXE_XRCETH_BYTES
+						+ RXE_IETH_BYTES,
+		}
+	},
 };
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 08/13] RDMA/rxe: Support alloc/dealloc xrcd
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (6 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 07/13] RDMA/rxe: Add XRC opcodes to rxe_opcode_info Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 09/13] RDMA/rxe: Extend SRQs to support extensions Bob Pearson
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Implement code to support ibv_alloc_xrcd and ibv_dealloc_xrcd verbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe.c       | 39 ++++++++++++++++-----------
 drivers/infiniband/sw/rxe/rxe_param.h |  1 +
 drivers/infiniband/sw/rxe/rxe_pool.c  |  6 +++++
 drivers/infiniband/sw/rxe/rxe_pool.h  |  1 +
 drivers/infiniband/sw/rxe/rxe_verbs.c | 19 +++++++++++++
 drivers/infiniband/sw/rxe/rxe_verbs.h | 12 +++++++++
 6 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 8e0f9c489cab..fbbb3d6f172b 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -24,6 +24,7 @@ void rxe_dealloc(struct ib_device *ib_dev)
 
 	rxe_pool_cleanup(&rxe->uc_pool);
 	rxe_pool_cleanup(&rxe->pd_pool);
+	rxe_pool_cleanup(&rxe->xrcd_pool);
 	rxe_pool_cleanup(&rxe->ah_pool);
 	rxe_pool_cleanup(&rxe->srq_pool);
 	rxe_pool_cleanup(&rxe->qp_pool);
@@ -74,6 +75,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 			rxe->ndev->dev_addr);
 
 	rxe->max_ucontext			= RXE_MAX_UCONTEXT;
+	rxe->max_xrcd				= RXE_MAX_XRCD;
 }
 
 /* initialize port attributes */
@@ -130,62 +132,69 @@ static int rxe_init_pools(struct rxe_dev *rxe)
 	if (err)
 		goto err2;
 
+	err = rxe_pool_init(rxe, &rxe->xrcd_pool, RXE_TYPE_XRCD,
+			    rxe->max_xrcd);
+	if (err)
+		goto err3;
+
 	err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,
 			    rxe->attr.max_ah);
 	if (err)
-		goto err3;
+		goto err4;
 
 	err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,
 			    rxe->attr.max_srq);
 	if (err)
-		goto err4;
+		goto err5;
 
 	err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,
 			    rxe->attr.max_qp);
 	if (err)
-		goto err5;
+		goto err6;
 
 	err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,
 			    rxe->attr.max_cq);
 	if (err)
-		goto err6;
+		goto err7;
 
 	err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,
 			    rxe->attr.max_mr);
 	if (err)
-		goto err7;
+		goto err8;
 
 	err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,
 			    rxe->attr.max_mw);
 	if (err)
-		goto err8;
+		goto err9;
 
 	err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,
 			    rxe->attr.max_mcast_grp);
 	if (err)
-		goto err9;
+		goto err10;
 
 	err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
 			    rxe->attr.max_total_mcast_qp_attach);
 	if (err)
-		goto err10;
+		goto err11;
 
 	return 0;
 
-err10:
+err11:
 	rxe_pool_cleanup(&rxe->mc_grp_pool);
-err9:
+err10:
 	rxe_pool_cleanup(&rxe->mw_pool);
-err8:
+err9:
 	rxe_pool_cleanup(&rxe->mr_pool);
-err7:
+err8:
 	rxe_pool_cleanup(&rxe->cq_pool);
-err6:
+err7:
 	rxe_pool_cleanup(&rxe->qp_pool);
-err5:
+err6:
 	rxe_pool_cleanup(&rxe->srq_pool);
-err4:
+err5:
 	rxe_pool_cleanup(&rxe->ah_pool);
+err4:
+	rxe_pool_cleanup(&rxe->xrcd_pool);
 err3:
 	rxe_pool_cleanup(&rxe->pd_pool);
 err2:
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index ec5c6331bee8..b843be4cc25a 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -79,6 +79,7 @@ enum rxe_device_param {
 	RXE_LOCAL_CA_ACK_DELAY		= 15,
 
 	RXE_MAX_UCONTEXT		= 512,
+	RXE_MAX_XRCD			= 512,
 
 	RXE_NUM_PORT			= 1,
 
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 342f090152d1..76caef5790b0 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -22,6 +22,12 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 		.elem_offset	= offsetof(struct rxe_pd, pelem),
 		.flags		= RXE_POOL_NO_ALLOC,
 	},
+	[RXE_TYPE_XRCD] = {
+		.name		= "rxe-xrcd",
+		.size		= sizeof(struct rxe_xrcd),
+		.elem_offset	= offsetof(struct rxe_xrcd, pelem),
+		.flags		= RXE_POOL_NO_ALLOC,
+	},
 	[RXE_TYPE_AH] = {
 		.name		= "rxe-ah",
 		.size		= sizeof(struct rxe_ah),
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 1feca1bffced..1475b9374315 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -19,6 +19,7 @@ enum rxe_pool_flags {
 enum rxe_elem_type {
 	RXE_TYPE_UC,
 	RXE_TYPE_PD,
+	RXE_TYPE_XRCD,
 	RXE_TYPE_AH,
 	RXE_TYPE_SRQ,
 	RXE_TYPE_QP,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 7ff98a60ddcd..b4b993f1ce92 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -156,6 +156,22 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	return 0;
 }
 
+static int rxe_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+	struct rxe_dev *rxe = to_rdev(ibxrcd->device);
+	struct rxe_xrcd *xrcd = to_rxrcd(ibxrcd);
+
+	return rxe_add_to_pool(&rxe->xrcd_pool, xrcd);
+}
+
+static int rxe_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+	struct rxe_xrcd *xrcd = to_rxrcd(ibxrcd);
+
+	rxe_drop_ref(xrcd);
+	return 0;
+}
+
 static int rxe_create_ah(struct ib_ah *ibah,
 			 struct rdma_ah_init_attr *init_attr,
 			 struct ib_udata *udata)
@@ -1078,6 +1094,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.alloc_mw = rxe_alloc_mw,
 	.alloc_pd = rxe_alloc_pd,
 	.alloc_ucontext = rxe_alloc_ucontext,
+	.alloc_xrcd = rxe_alloc_xrcd,
 	.attach_mcast = rxe_attach_mcast,
 	.create_ah = rxe_create_ah,
 	.create_cq = rxe_create_cq,
@@ -1088,6 +1105,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.dealloc_mw = rxe_dealloc_mw,
 	.dealloc_pd = rxe_dealloc_pd,
 	.dealloc_ucontext = rxe_dealloc_ucontext,
+	.dealloc_xrcd = rxe_dealloc_xrcd,
 	.dereg_mr = rxe_dereg_mr,
 	.destroy_ah = rxe_destroy_ah,
 	.destroy_cq = rxe_destroy_cq,
@@ -1128,6 +1146,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
 	INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
+	INIT_RDMA_OBJ_SIZE(ib_xrcd, rxe_xrcd, ibxrcd),
 };
 
 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 0a433f4c0222..5b75de74a992 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -43,6 +43,11 @@ struct rxe_pd {
 	struct rxe_pool_entry	pelem;
 };
 
+struct rxe_xrcd {
+	struct ib_xrcd		ibxrcd;
+	struct rxe_pool_entry	pelem;
+};
+
 struct rxe_ah {
 	struct ib_ah		ibah;
 	struct rxe_pool_entry	pelem;
@@ -384,6 +389,7 @@ struct rxe_dev {
 	struct ib_device	ib_dev;
 	struct ib_device_attr	attr;
 	int			max_ucontext;
+	int			max_xrcd;
 	int			max_inline_data;
 	struct mutex	usdev_lock;
 
@@ -393,6 +399,7 @@ struct rxe_dev {
 
 	struct rxe_pool		uc_pool;
 	struct rxe_pool		pd_pool;
+	struct rxe_pool		xrcd_pool;
 	struct rxe_pool		ah_pool;
 	struct rxe_pool		srq_pool;
 	struct rxe_pool		qp_pool;
@@ -434,6 +441,11 @@ static inline struct rxe_pd *to_rpd(struct ib_pd *pd)
 	return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;
 }
 
+static inline struct rxe_xrcd *to_rxrcd(struct ib_xrcd *xrcd)
+{
+	return xrcd ? container_of(xrcd, struct rxe_xrcd, ibxrcd) : NULL;
+}
+
 static inline struct rxe_ah *to_rah(struct ib_ah *ah)
 {
 	return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 09/13] RDMA/rxe: Extend SRQs to support extensions
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (7 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 08/13] RDMA/rxe: Support alloc/dealloc xrcd Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 10/13] RDMA/rxe: Compute next opcode for XRC Bob Pearson
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend create_srq to support basic and xrc SRQs. Drop srq->pd in favor
of the PD referenced by rdma-core.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h   |  5 +-
 drivers/infiniband/sw/rxe/rxe_srq.c   | 71 ++++++++++++++-------------
 drivers/infiniband/sw/rxe/rxe_verbs.c | 10 ++--
 drivers/infiniband/sw/rxe/rxe_verbs.h | 22 ++++++++-
 4 files changed, 66 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index b4d45c592bd7..eac56e0c64ba 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -161,15 +161,12 @@ void retransmit_timer(struct timer_list *t);
 void rnr_nak_timer(struct timer_list *t);
 
 /* rxe_srq.c */
-#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
-
+int rxe_srq_chk_init_attr(struct rxe_dev *rxe, struct ib_srq_init_attr *init);
 int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 		     struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
-
 int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
 		      struct ib_srq_init_attr *init, struct ib_udata *udata,
 		      struct rxe_create_srq_resp __user *uresp);
-
 int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 		      struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
 		      struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata);
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index a9e7817e2732..edbfda0cc242 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -9,6 +9,32 @@
 #include "rxe_loc.h"
 #include "rxe_queue.h"
 
+int rxe_srq_chk_init_attr(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
+{
+	switch (init->srq_type) {
+	case IB_SRQT_BASIC:
+	case IB_SRQT_XRC:
+		break;
+	case IB_SRQT_TM:
+		pr_warn("Tag matching SRQ not supported\n");
+		return -EOPNOTSUPP;
+	default:
+		pr_warn("Unexpected SRQ type (%d)\n", init->srq_type);
+		return -EINVAL;
+	}
+
+	if (init->attr.max_sge > rxe->attr.max_srq_sge) {
+		pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+			init->attr.max_sge, rxe->attr.max_srq_sge);
+		return -EINVAL;
+	}
+
+	if (init->attr.max_sge < RXE_MIN_SRQ_SGE)
+		init->attr.max_sge = RXE_MIN_SRQ_SGE;
+
+	return rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_MAX_WR);
+}
+
 int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 		     struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
 {
@@ -48,23 +74,12 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 
 		if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
 			pr_warn("srq_limit (%d) > cur limit(%d)\n",
-				attr->srq_limit,
-				 srq->rq.queue->buf->index_mask);
+					attr->srq_limit,
+					srq->rq.queue->buf->index_mask);
 			goto err1;
 		}
 	}
 
-	if (mask == IB_SRQ_INIT_MASK) {
-		if (attr->max_sge > rxe->attr.max_srq_sge) {
-			pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
-				attr->max_sge, rxe->attr.max_srq_sge);
-			goto err1;
-		}
-
-		if (attr->max_sge < RXE_MIN_SRQ_SGE)
-			attr->max_sge = RXE_MIN_SRQ_SGE;
-	}
-
 	return 0;
 
 err1:
@@ -78,24 +93,22 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
 	int err;
 	int srq_wqe_size;
 	struct rxe_queue *q;
-	enum queue_type type;
 
-	srq->ibsrq.event_handler	= init->event_handler;
-	srq->ibsrq.srq_context		= init->srq_context;
-	srq->limit		= init->attr.srq_limit;
-	srq->srq_num		= srq->pelem.index;
-	srq->rq.max_wr		= init->attr.max_wr;
-	srq->rq.max_sge		= init->attr.max_sge;
-	srq->rq.is_user		= srq->is_user;
+	srq->limit = init->attr.srq_limit;
+	srq->rq.max_wr = init->attr.max_wr;
+	srq->rq.max_sge = init->attr.max_sge;
+	srq->rq.is_user = srq->is_user;
 
-	srq_wqe_size		= rcv_wqe_size(srq->rq.max_sge);
+	if (init->srq_type == IB_SRQT_XRC)
+		srq->ibsrq.ext.xrc.srq_num = srq->pelem.index;
+
+	srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
 
 	spin_lock_init(&srq->rq.producer_lock);
 	spin_lock_init(&srq->rq.consumer_lock);
 
-	type = QUEUE_TYPE_FROM_CLIENT;
-	q = rxe_queue_init(rxe, &srq->rq.max_wr,
-			srq_wqe_size, type);
+	q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size,
+			   QUEUE_TYPE_FROM_CLIENT);
 	if (!q) {
 		pr_warn("unable to allocate queue for srq\n");
 		return -ENOMEM;
@@ -111,14 +124,6 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
 		return err;
 	}
 
-	if (uresp) {
-		if (copy_to_user(&uresp->srq_num, &srq->srq_num,
-				 sizeof(uresp->srq_num))) {
-			rxe_queue_cleanup(q);
-			return -EFAULT;
-		}
-	}
-
 	return 0;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index b4b993f1ce92..fbd1e2d70682 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -307,9 +307,6 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 	struct rxe_srq *srq = to_rsrq(ibsrq);
 	struct rxe_create_srq_resp __user *uresp = NULL;
 
-	if (init->srq_type != IB_SRQT_BASIC)
-		return -EOPNOTSUPP;
-
 	if (udata) {
 		if (udata->outlen < sizeof(*uresp))
 			return -EINVAL;
@@ -319,7 +316,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 		srq->is_user = false;
 	}
 
-	err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
+	err = rxe_srq_chk_init_attr(rxe, init);
 	if (err)
 		goto err_out;
 
@@ -327,6 +324,8 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 	if (err)
 		goto err_out;
 
+	rxe_add_index(srq);
+
 	err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
 	if (err)
 		goto err_drop_srq_ref;
@@ -334,6 +333,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 	return 0;
 
 err_drop_srq_ref:
+	rxe_drop_index(srq);
 	rxe_drop_ref(srq);
 err_out:
 	return err;
@@ -391,7 +391,9 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 	if (srq->rq.queue)
 		rxe_queue_cleanup(srq->rq.queue);
 
+	rxe_drop_index(srq);
 	rxe_drop_ref(srq);
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5b75de74a992..52599f398ddd 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -104,7 +104,6 @@ struct rxe_srq {
 	struct ib_srq		ibsrq;
 	struct rxe_pool_entry	pelem;
 	struct rxe_rq		rq;
-	u32			srq_num;
 	bool			is_user;
 
 	int			limit;
@@ -542,11 +541,32 @@ static inline enum ib_qp_type rxe_qp_type(struct rxe_qp *qp)
 	return qp->ibqp.qp_type;
 }
 
+/* SRQ extractors */
+static inline struct rxe_cq *rxe_srq_cq(struct rxe_srq *srq)
+{
+	return to_rcq(srq->ibsrq.ext.cq);
+}
+
+static inline int rxe_srq_num(struct rxe_srq *srq)
+{
+	return srq->ibsrq.ext.xrc.srq_num;
+}
+
 static inline struct rxe_pd *rxe_srq_pd(struct rxe_srq *srq)
 {
 	return to_rpd(srq->ibsrq.pd);
 }
 
+static inline enum ib_srq_type rxe_srq_type(struct rxe_srq *srq)
+{
+	return srq->ibsrq.srq_type;
+}
+
+static inline struct rxe_xrcd *rxe_srq_xrcd(struct rxe_srq *srq)
+{
+	return to_rxrcd(srq->ibsrq.ext.xrc.xrcd);
+}
+
 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
 
 void rxe_mc_cleanup(struct rxe_pool_entry *arg);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 10/13] RDMA/rxe: Compute next opcode for XRC
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (8 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 09/13] RDMA/rxe: Extend SRQs to support extensions Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 11/13] RDMA/rxe: Extend rxe_verbs and rxe_qp to support XRC Bob Pearson
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend rxe_req.c to compute next opcodes for XRC work requests.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 75 +++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 3eee4d8dbe48..b6f6614a3f32 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -240,9 +240,75 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
 		else
 			return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
 				IB_OPCODE_RC_SEND_FIRST;
-	case IB_WR_REG_MR:
-	case IB_WR_LOCAL_INV:
-		return opcode;
+	}
+
+	return -EINVAL;
+}
+
+static int next_opcode_xrc(struct rxe_qp *qp, u32 opcode, int fits)
+{
+	switch (opcode) {
+	case IB_WR_RDMA_WRITE:
+		if (qp->req.opcode == IB_OPCODE_XRC_RDMA_WRITE_FIRST ||
+		    qp->req.opcode == IB_OPCODE_XRC_RDMA_WRITE_MIDDLE)
+			return fits ?
+				IB_OPCODE_XRC_RDMA_WRITE_LAST :
+				IB_OPCODE_XRC_RDMA_WRITE_MIDDLE;
+		else
+			return fits ?
+				IB_OPCODE_XRC_RDMA_WRITE_ONLY :
+				IB_OPCODE_XRC_RDMA_WRITE_FIRST;
+
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (qp->req.opcode == IB_OPCODE_XRC_RDMA_WRITE_FIRST ||
+		    qp->req.opcode == IB_OPCODE_XRC_RDMA_WRITE_MIDDLE)
+			return fits ?
+				IB_OPCODE_XRC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
+				IB_OPCODE_XRC_RDMA_WRITE_MIDDLE;
+		else
+			return fits ?
+				IB_OPCODE_XRC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
+				IB_OPCODE_XRC_RDMA_WRITE_FIRST;
+
+	case IB_WR_SEND:
+		if (qp->req.opcode == IB_OPCODE_XRC_SEND_FIRST ||
+		    qp->req.opcode == IB_OPCODE_XRC_SEND_MIDDLE)
+			return fits ?
+				IB_OPCODE_XRC_SEND_LAST :
+				IB_OPCODE_XRC_SEND_MIDDLE;
+		else
+			return fits ?
+				IB_OPCODE_XRC_SEND_ONLY :
+				IB_OPCODE_XRC_SEND_FIRST;
+
+	case IB_WR_SEND_WITH_IMM:
+		if (qp->req.opcode == IB_OPCODE_XRC_SEND_FIRST ||
+		    qp->req.opcode == IB_OPCODE_XRC_SEND_MIDDLE)
+			return fits ?
+				IB_OPCODE_XRC_SEND_LAST_WITH_IMMEDIATE :
+				IB_OPCODE_XRC_SEND_MIDDLE;
+		else
+			return fits ?
+				IB_OPCODE_XRC_SEND_ONLY_WITH_IMMEDIATE :
+				IB_OPCODE_XRC_SEND_FIRST;
+
+	case IB_WR_RDMA_READ:
+		return IB_OPCODE_XRC_RDMA_READ_REQUEST;
+
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+		return IB_OPCODE_XRC_COMPARE_SWAP;
+
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		return IB_OPCODE_XRC_FETCH_ADD;
+
+	case IB_WR_SEND_WITH_INV:
+		if (qp->req.opcode == IB_OPCODE_XRC_SEND_FIRST ||
+		    qp->req.opcode == IB_OPCODE_XRC_SEND_MIDDLE)
+			return fits ? IB_OPCODE_XRC_SEND_LAST_WITH_INVALIDATE :
+				IB_OPCODE_XRC_SEND_MIDDLE;
+		else
+			return fits ? IB_OPCODE_XRC_SEND_ONLY_WITH_INVALIDATE :
+				IB_OPCODE_XRC_SEND_FIRST;
 	}
 
 	return -EINVAL;
@@ -323,6 +389,9 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		}
 		break;
 
+	case IB_QPT_XRC_INI:
+		return next_opcode_xrc(qp, opcode, fits);
+
 	default:
 		break;
 	}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 11/13] RDMA/rxe: Extend rxe_verbs and rxe_qp to support XRC
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (9 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 10/13] RDMA/rxe: Compute next opcode for XRC Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 12/13] RDMA/rxe: Extend rxe send XRC packets Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 13/13] RDMA/rxe: Enable receiving " Bob Pearson
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend rxe_verbs.c and rxe_qp.c to support XRC QP types.
This patch supports ib_create_qp, ib_query_qp, ib_modify_qp and
ib_destroy_qp for IB_QPT_XRC_INI and IB_QPT_XRC_TGT QP types.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h   |   4 +-
 drivers/infiniband/sw/rxe/rxe_qp.c    | 235 +++++++++++++++-----------
 drivers/infiniband/sw/rxe/rxe_verbs.c |   8 +-
 3 files changed, 141 insertions(+), 106 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index eac56e0c64ba..790884a5e9d5 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -115,7 +115,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp,
 		     struct rxe_create_qp_resp __user *uresp,
 		     struct ib_udata *udata);
 
-int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
+void rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
 
 int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 		    struct ib_qp_attr *attr, int mask);
@@ -123,7 +123,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
 		     int mask, struct ib_udata *udata);
 
-int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
+void rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
 
 void rxe_qp_error(struct rxe_qp *qp);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 177edf684f2a..3ff9c832047c 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -68,16 +68,23 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_UD:
+		if (!init->recv_cq || !init->send_cq) {
+			pr_warn("missing cq\n");
+			goto err1;
+		}
+		break;
+	case IB_QPT_XRC_INI:
+		if (!init->send_cq) {
+			pr_warn("missing send cq\n");
+			goto err1;
+		}
+		break;
+	case IB_QPT_XRC_TGT:
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
-	if (!init->recv_cq || !init->send_cq) {
-		pr_warn("missing cq\n");
-		goto err1;
-	}
-
 	if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
 		goto err1;
 
@@ -199,48 +206,30 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
 	atomic_set(&qp->skb_out, 0);
 }
 
-static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
-			   struct ib_qp_init_attr *init, struct ib_udata *udata,
-			   struct rxe_create_qp_resp __user *uresp)
+static int rxe_qp_init_sq(struct rxe_dev *rxe, struct rxe_qp *qp,
+			  struct ib_qp_init_attr *init, struct ib_udata *udata,
+			  struct rxe_create_qp_resp __user *uresp)
 {
-	int err;
 	int wqe_size;
-	enum queue_type type;
-
-	err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
-	if (err < 0)
-		return err;
-	qp->sk->sk->sk_user_data = qp;
-
-	/* pick a source UDP port number for this QP based on
-	 * the source QPN. this spreads traffic for different QPs
-	 * across different NIC RX queues (while using a single
-	 * flow for a given QP to maintain packet order).
-	 * the port number must be in the Dynamic Ports range
-	 * (0xc000 - 0xffff).
-	 */
-	qp->src_port = RXE_ROCE_V2_SPORT +
-		(hash_32_generic(rxe_qp_num(qp), 14) & 0x3fff);
-	qp->sq.max_wr		= init->cap.max_send_wr;
+	int err;
 
-	/* These caps are limited by rxe_qp_chk_cap() done by the caller */
-	wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge),
+	qp->sq.max_wr = init->cap.max_send_wr;
+	wqe_size = max_t(int, init->cap.max_send_sge*sizeof(struct ib_sge),
 			 init->cap.max_inline_data);
-	qp->sq.max_sge = init->cap.max_send_sge =
-		wqe_size / sizeof(struct ib_sge);
+	qp->sq.max_sge = init->cap.max_send_sge = wqe_size/
+						sizeof(struct ib_sge);
 	qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
+
 	wqe_size += sizeof(struct rxe_send_wqe);
 
-	type = QUEUE_TYPE_FROM_CLIENT;
 	qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
-				wqe_size, type);
+				      wqe_size, QUEUE_TYPE_FROM_CLIENT);
 	if (!qp->sq.queue)
 		return -ENOMEM;
 
 	err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata,
 			   qp->sq.queue->buf, qp->sq.queue->buf_size,
 			   &qp->sq.queue->ip);
-
 	if (err) {
 		vfree(qp->sq.queue->buf);
 		kfree(qp->sq.queue);
@@ -248,14 +237,38 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 		return err;
 	}
 
-		qp->req.wqe_index = producer_index(qp->sq.queue,
-					QUEUE_TYPE_FROM_CLIENT);
+	qp->req.wqe_index = producer_index(qp->sq.queue,
+					   QUEUE_TYPE_FROM_CLIENT);
 
-	qp->req.state		= QP_STATE_RESET;
-	qp->req.opcode		= -1;
-	qp->comp.opcode		= -1;
+	qp->req.state = QP_STATE_RESET;
+	qp->req.opcode = -1;
+	qp->comp.opcode = -1;
 
 	spin_lock_init(&qp->sq.sq_lock);
+
+	return 0;
+}
+
+static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
+			   struct ib_qp_init_attr *init, struct ib_udata *udata,
+			   struct rxe_create_qp_resp __user *uresp)
+{
+	int err;
+
+	/* pick a source UDP port number for this QP based on
+	 * the source QPN. this spreads traffic for different QPs
+	 * across different NIC RX queues (while using a single
+	 * flow for a given QP to maintain packet order).
+	 * the port number must be in the Dynamic Ports range
+	 * (0xc000 - 0xffff).
+	 */
+	qp->src_port = RXE_ROCE_V2_SPORT +
+		(hash_32_generic(rxe_qp_num(qp), 14) & 0x3fff);
+
+	err = rxe_qp_init_sq(rxe, qp, init, udata, uresp);
+	if (err)
+		return err;
+
 	skb_queue_head_init(&qp->req_pkts);
 
 	rxe_init_task(rxe, &qp->req.task, qp,
@@ -264,10 +277,51 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 		      rxe_completer, "comp");
 
 	qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
-	if (init->qp_type == IB_QPT_RC) {
+
+	if (init->qp_type == IB_QPT_RC ||
+	    init->qp_type == IB_QPT_XRC_INI) {
 		timer_setup(&qp->rnr_nak_timer, rnr_nak_timer, 0);
 		timer_setup(&qp->retrans_timer, retransmit_timer, 0);
 	}
+
+	return 0;
+}
+
+static int rxe_qp_init_rq(struct rxe_dev *rxe, struct rxe_qp *qp,
+			  struct ib_qp_init_attr *init,
+			  struct ib_udata *udata,
+			  struct rxe_create_qp_resp __user *uresp)
+{
+	int wqe_size;
+	int err;
+
+	qp->rq.max_wr		= init->cap.max_recv_wr;
+	qp->rq.max_sge		= init->cap.max_recv_sge;
+
+	wqe_size = rcv_wqe_size(qp->rq.max_sge);
+
+	pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
+		 rxe_qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
+
+	qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
+				      wqe_size, QUEUE_TYPE_FROM_CLIENT);
+	if (!qp->rq.queue)
+		return -ENOMEM;
+
+	err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata,
+			   qp->rq.queue->buf, qp->rq.queue->buf_size,
+			   &qp->rq.queue->ip);
+	if (err) {
+		vfree(qp->rq.queue->buf);
+		kfree(qp->rq.queue);
+		qp->rq.queue = NULL;
+		return err;
+	}
+
+	spin_lock_init(&qp->rq.producer_lock);
+	spin_lock_init(&qp->rq.consumer_lock);
+	qp->rq.is_user = qp->is_user;
+
 	return 0;
 }
 
@@ -277,48 +331,21 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
 			    struct rxe_create_qp_resp __user *uresp)
 {
 	int err;
-	int wqe_size;
-	enum queue_type type;
 
-	if (!rxe_qp_srq(qp)) {
-		qp->rq.max_wr		= init->cap.max_recv_wr;
-		qp->rq.max_sge		= init->cap.max_recv_sge;
-
-		wqe_size = rcv_wqe_size(qp->rq.max_sge);
-
-		pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
-			 rxe_qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
-
-		type = QUEUE_TYPE_FROM_CLIENT;
-		qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
-					wqe_size, type);
-		if (!qp->rq.queue)
-			return -ENOMEM;
-
-		err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata,
-				   qp->rq.queue->buf, qp->rq.queue->buf_size,
-				   &qp->rq.queue->ip);
-		if (err) {
-			vfree(qp->rq.queue->buf);
-			kfree(qp->rq.queue);
-			qp->rq.queue = NULL;
+	if (!rxe_qp_srq(qp) && rxe_qp_type(qp) != IB_QPT_XRC_TGT) {
+		err = rxe_qp_init_rq(rxe, qp, init, udata, uresp);
+		if (err)
 			return err;
-		}
 	}
 
-	spin_lock_init(&qp->rq.producer_lock);
-	spin_lock_init(&qp->rq.consumer_lock);
-
-	qp->rq.is_user = qp->is_user;
-
 	skb_queue_head_init(&qp->resp_pkts);
 
 	rxe_init_task(rxe, &qp->resp.task, qp,
 		      rxe_responder, "resp");
 
-	qp->resp.opcode		= OPCODE_NONE;
-	qp->resp.msn		= 0;
-	qp->resp.state		= QP_STATE_RESET;
+	qp->resp.opcode = OPCODE_NONE;
+	qp->resp.msn = 0;
+	qp->resp.state = QP_STATE_RESET;
 
 	return 0;
 }
@@ -331,15 +358,24 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp,
 {
 	int err;
 
+	err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
+	if (err < 0)
+		return err;
+	qp->sk->sk->sk_user_data = qp;
+
 	rxe_qp_init_misc(rxe, qp, init);
 
-	err = rxe_qp_init_req(rxe, qp, init, udata, uresp);
-	if (err)
-		goto err1;
+	if (rxe_qp_type(qp) != IB_QPT_XRC_TGT) {
+		err = rxe_qp_init_req(rxe, qp, init, udata, uresp);
+		if (err)
+			goto err1;
+	}
 
-	err = rxe_qp_init_resp(rxe, qp, init, udata, uresp);
-	if (err)
-		goto err2;
+	if (rxe_qp_type(qp) != IB_QPT_XRC_INI) {
+		err = rxe_qp_init_resp(rxe, qp, init, udata, uresp);
+		if (err)
+			goto err2;
+	}
 
 	qp->attr.qp_state = IB_QPS_RESET;
 	qp->valid = 1;
@@ -352,30 +388,21 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp,
 	return err;
 }
 
-/* called by the query qp verb */
-int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
+void rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
 {
-	init->event_handler		= qp->ibqp.event_handler;
-	init->qp_context		= qp->ibqp.qp_context;
-	init->send_cq			= qp->ibqp.send_cq;
-	init->recv_cq			= qp->ibqp.recv_cq;
-	init->srq			= qp->ibqp.srq;
-
 	init->cap.max_send_wr		= qp->sq.max_wr;
 	init->cap.max_send_sge		= qp->sq.max_sge;
 	init->cap.max_inline_data	= qp->sq.max_inline;
 
 	if (!rxe_qp_srq(qp)) {
-		init->cap.max_recv_wr		= qp->rq.max_wr;
-		init->cap.max_recv_sge		= qp->rq.max_sge;
+		init->cap.max_recv_wr = qp->rq.max_wr;
+		init->cap.max_recv_sge = qp->rq.max_sge;
+	} else {
+		init->cap.max_recv_wr = 0;
+		init->cap.max_recv_sge = 0;
 	}
 
 	init->sq_sig_type		= qp->sq_sig_type;
-
-	init->qp_type			= rxe_qp_type(qp);
-	init->port_num			= 1;
-
-	return 0;
 }
 
 /* called by the modify qp verb, this routine checks all the parameters before
@@ -517,7 +544,8 @@ static void rxe_qp_reset(struct rxe_qp *qp)
 	rxe_enable_task(&qp->resp.task);
 
 	if (qp->sq.queue) {
-		if (rxe_qp_type(qp) == IB_QPT_RC)
+		if (rxe_qp_type(qp) == IB_QPT_RC ||
+		    rxe_qp_type(qp) == IB_QPT_XRC_INI)
 			rxe_enable_task(&qp->comp.task);
 
 		rxe_enable_task(&qp->req.task);
@@ -530,7 +558,8 @@ static void rxe_qp_drain(struct rxe_qp *qp)
 	if (qp->sq.queue) {
 		if (qp->req.state != QP_STATE_DRAINED) {
 			qp->req.state = QP_STATE_DRAIN;
-			if (rxe_qp_type(qp) == IB_QPT_RC)
+			if (rxe_qp_type(qp) == IB_QPT_RC ||
+			    rxe_qp_type(qp) == IB_QPT_XRC_INI)
 				rxe_run_task(&qp->comp.task, 1);
 			else
 				__rxe_do_task(&qp->comp.task);
@@ -549,7 +578,8 @@ void rxe_qp_error(struct rxe_qp *qp)
 	/* drain work and packet queues */
 	rxe_run_task(&qp->resp.task, 1);
 
-	if (rxe_qp_type(qp) == IB_QPT_RC)
+	if (rxe_qp_type(qp) == IB_QPT_RC ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_INI)
 		rxe_run_task(&qp->comp.task, 1);
 	else
 		__rxe_do_task(&qp->comp.task);
@@ -715,7 +745,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 }
 
 /* called by the query qp verb */
-int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
+void rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
 {
 	*attr = qp->attr;
 
@@ -744,10 +774,6 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
 	} else {
 		attr->sq_draining = 0;
 	}
-
-	pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
-
-	return 0;
 }
 
 /* called by the destroy qp verb */
@@ -757,7 +783,8 @@ void rxe_qp_destroy(struct rxe_qp *qp)
 	qp->qp_timeout_jiffies = 0;
 	rxe_cleanup_task(&qp->resp.task);
 
-	if (rxe_qp_type(qp) == IB_QPT_RC) {
+	if (rxe_qp_type(qp) == IB_QPT_RC ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_INI) {
 		del_timer_sync(&qp->retrans_timer);
 		del_timer_sync(&qp->rnr_nak_timer);
 	}
@@ -791,7 +818,9 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
 		qp->resp.mr = NULL;
 	}
 
-	if (rxe_qp_type(qp) == IB_QPT_RC)
+	if (rxe_qp_type(qp) == IB_QPT_RC ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_INI ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_TGT)
 		sk_dst_reset(qp->sk->sk);
 
 	free_rd_atomic_resources(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index fbd1e2d70682..f5014a187411 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -426,7 +426,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
 				   struct ib_udata *udata)
 {
 	int err;
-	struct rxe_dev *rxe = to_rdev(ibpd->device);
+	struct rxe_dev *rxe;
 	struct rxe_qp *qp;
 	struct rxe_create_qp_resp __user *uresp = NULL;
 
@@ -436,6 +436,12 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
 		uresp = udata->outbuf;
 	}
 
+	/* at least one of PD or XRCD is valid */
+	if (ibpd)
+		rxe = to_rdev(ibpd->device);
+	else
+		rxe = to_rdev(init->xrcd->device);
+
 	if (init->create_flags)
 		return ERR_PTR(-EOPNOTSUPP);
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 12/13] RDMA/rxe: Extend rxe send XRC packets
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (10 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 11/13] RDMA/rxe: Extend rxe_verbs and rxe_qp to support XRC Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  2021-07-29 22:49 ` [PATCH for-next 13/13] RDMA/rxe: Enable receiving " Bob Pearson
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend the rxe driver to support sending XRC packets. This patch
  - Expands lists of QP types to include IB_XRC_INIT and IB_XRC_TGT
    as appropriate.
  - Fills in XRCETH header in XRC packets

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_av.c  |  4 +++-
 drivers/infiniband/sw/rxe/rxe_net.c | 14 ++++++++++----
 drivers/infiniband/sw/rxe/rxe_req.c | 13 ++++++++++---
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 46cd7e2d2806..05d4cb772dc6 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -108,7 +108,9 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
 		return NULL;
 
 	if (rxe_qp_type(pkt->qp) == IB_QPT_RC ||
-	    rxe_qp_type(pkt->qp) == IB_QPT_UC)
+	    rxe_qp_type(pkt->qp) == IB_QPT_UC ||
+	    rxe_qp_type(pkt->qp) == IB_QPT_XRC_INI ||
+	    rxe_qp_type(pkt->qp) == IB_QPT_XRC_TGT)
 		return &pkt->qp->pri_av;
 
 	if (!pkt->wqe)
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 6212e61d267b..b1353d456a4c 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -114,7 +114,9 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
 {
 	struct dst_entry *dst = NULL;
 
-	if (rxe_qp_type(qp) == IB_QPT_RC)
+	if (rxe_qp_type(qp) == IB_QPT_RC ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_INI ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_TGT)
 		dst = sk_dst_get(qp->sk->sk);
 
 	if (!dst || !dst_check(dst, qp->dst_cookie)) {
@@ -142,7 +144,9 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
 #endif
 		}
 
-		if (dst && (rxe_qp_type(qp) == IB_QPT_RC)) {
+		if (dst && (rxe_qp_type(qp) == IB_QPT_RC ||
+			    rxe_qp_type(qp) == IB_QPT_XRC_INI ||
+			    rxe_qp_type(qp) == IB_QPT_XRC_TGT)) {
 			dst_hold(dst);
 			sk_dst_set(qp->sk->sk, dst);
 		}
@@ -459,8 +463,10 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 		return err;
 	}
 
-	if ((rxe_qp_type(qp) != IB_QPT_RC) &&
-	    (pkt->mask & RXE_END_MASK)) {
+	if ((rxe_qp_type(qp) != IB_QPT_RC &&
+	     rxe_qp_type(qp) != IB_QPT_XRC_INI &&
+	     rxe_qp_type(qp) != IB_QPT_XRC_TGT) &&
+	     pkt->mask & RXE_END_MASK) {
 		pkt->wqe->state = wqe_state_done;
 		rxe_run_task(&qp->comp.task, 1);
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index b6f6614a3f32..166d4aeef5e9 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -423,7 +423,9 @@ static inline int get_mtu(struct rxe_qp *qp)
 {
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 
-	if ((rxe_qp_type(qp) == IB_QPT_RC) || (rxe_qp_type(qp) == IB_QPT_UC))
+	if (rxe_qp_type(qp) == IB_QPT_RC ||
+	    rxe_qp_type(qp) == IB_QPT_UC ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_INI)
 		return qp->mtu;
 
 	return rxe->port.mtu_cap;
@@ -487,6 +489,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 		 ack_req, pkt->psn);
 
 	/* init optional headers */
+	if (pkt->mask & RXE_XRCETH_MASK)
+		xrceth_set_xrcsrq(pkt, ibwr->wr.xrc.srq_num);
+
 	if (pkt->mask & RXE_RETH_MASK) {
 		reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
 		reth_set_va(pkt, wqe->iova);
@@ -562,7 +567,8 @@ static void update_wqe_state(struct rxe_qp *qp,
 		struct rxe_pkt_info *pkt)
 {
 	if (pkt->mask & RXE_END_MASK) {
-		if (rxe_qp_type(qp) == IB_QPT_RC)
+		if (rxe_qp_type(qp) == IB_QPT_RC ||
+		    rxe_qp_type(qp) == IB_QPT_XRC_INI)
 			wqe->state = wqe_state_pending;
 	} else {
 		wqe->state = wqe_state_processing;
@@ -730,7 +736,8 @@ int rxe_requester(void *arg)
 			goto next_wqe;
 	}
 
-	if (unlikely(rxe_qp_type(qp) == IB_QPT_RC &&
+	if (unlikely((rxe_qp_type(qp) == IB_QPT_RC ||
+		      rxe_qp_type(qp) == IB_QPT_XRC_INI) &&
 		psn_compare(qp->req.psn, (qp->comp.psn +
 				RXE_MAX_UNACKED_PSNS)) > 0)) {
 		qp->req.wait_psn = 1;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH for-next 13/13] RDMA/rxe: Enable receiving XRC packets
  2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
                   ` (11 preceding siblings ...)
  2021-07-29 22:49 ` [PATCH for-next 12/13] RDMA/rxe: Extend rxe send XRC packets Bob Pearson
@ 2021-07-29 22:49 ` Bob Pearson
  12 siblings, 0 replies; 15+ messages in thread
From: Bob Pearson @ 2021-07-29 22:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend rxe to support receiving XRC trafic.
  - Extend lists of QP types where appropriate.
  - Add an error state for invalid XRCETH header.
  - Add code to handle srq, pd amd rcq coming from XRCSRQ for XRC packets
    Save pointers in pkt info struct for later use and dropping refs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_av.c     |   2 +
 drivers/infiniband/sw/rxe/rxe_comp.c   |  27 +++-
 drivers/infiniband/sw/rxe/rxe_hdr.h    |   9 +-
 drivers/infiniband/sw/rxe/rxe_loc.h    |   3 +-
 drivers/infiniband/sw/rxe/rxe_mw.c     |   6 +-
 drivers/infiniband/sw/rxe/rxe_net.c    |   8 ++
 drivers/infiniband/sw/rxe/rxe_opcode.c |   1 -
 drivers/infiniband/sw/rxe/rxe_recv.c   |  22 ++-
 drivers/infiniband/sw/rxe/rxe_req.c    |   3 +-
 drivers/infiniband/sw/rxe/rxe_resp.c   | 191 ++++++++++++++++++++-----
 drivers/infiniband/sw/rxe/rxe_verbs.h  |  16 ++-
 11 files changed, 232 insertions(+), 56 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 05d4cb772dc6..673b65d415b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -122,9 +122,11 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
 		ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
 		if (!ah || ah->ah_num != ah_num ||
 		    rxe_ah_pd(ah) != rxe_qp_pd(pkt->qp)) {
+			rxe_drop_ref(ah);
 			pr_warn("Unable to find AH matching ah_num\n");
 			return NULL;
 		}
+		pkt->ah = ah;
 		return &ah->av;
 	}
 
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 4d62e5bdf820..987bd8e67fb6 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -242,6 +242,23 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 			return COMPST_ERROR;
 		}
 		break;
+
+	case IB_OPCODE_XRC_RDMA_READ_RESPONSE_FIRST:
+	case IB_OPCODE_XRC_RDMA_READ_RESPONSE_MIDDLE:
+		if (pkt->opcode != IB_OPCODE_XRC_RDMA_READ_RESPONSE_MIDDLE &&
+		    pkt->opcode != IB_OPCODE_XRC_RDMA_READ_RESPONSE_LAST) {
+			if ((pkt->psn == wqe->first_psn &&
+			     pkt->opcode ==
+			     IB_OPCODE_XRC_RDMA_READ_RESPONSE_FIRST) ||
+			    (wqe->first_psn == wqe->last_psn &&
+			     pkt->opcode ==
+			     IB_OPCODE_XRC_RDMA_READ_RESPONSE_ONLY))
+				break;
+
+			return COMPST_ERROR;
+		}
+		break;
+
 	default:
 		WARN_ON_ONCE(1);
 	}
@@ -251,6 +268,9 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST:
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY:
+	case IB_OPCODE_XRC_RDMA_READ_RESPONSE_FIRST:
+	case IB_OPCODE_XRC_RDMA_READ_RESPONSE_LAST:
+	case IB_OPCODE_XRC_RDMA_READ_RESPONSE_ONLY:
 		syn = aeth_syn(pkt);
 
 		if ((syn & AETH_TYPE_MASK) != AETH_ACK)
@@ -260,6 +280,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 		/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
 		 */
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+	case IB_OPCODE_XRC_RDMA_READ_RESPONSE_MIDDLE:
 		if (wqe->wr.opcode != IB_WR_RDMA_READ &&
 		    wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
 			wqe->status = IB_WC_FATAL_ERR;
@@ -269,6 +290,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 		return COMPST_READ;
 
 	case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE:
+	case IB_OPCODE_XRC_ATOMIC_ACKNOWLEDGE:
 		syn = aeth_syn(pkt);
 
 		if ((syn & AETH_TYPE_MASK) != AETH_ACK)
@@ -281,6 +303,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 		return COMPST_ATOMIC;
 
 	case IB_OPCODE_RC_ACKNOWLEDGE:
+	case IB_OPCODE_XRC_ACKNOWLEDGE:
 		syn = aeth_syn(pkt);
 		switch (syn & AETH_TYPE_MASK) {
 		case AETH_ACK:
@@ -570,8 +593,8 @@ int rxe_completer(void *arg)
 	state = COMPST_GET_ACK;
 
 	while (1) {
-		pr_debug("qp#%d state = %s\n", rxe_qp_num(qp),
-			 comp_state_name[state]);
+		pr_debug("qp#%d type %d state = %s\n", rxe_qp_num(qp),
+			 rxe_qp_type(qp), comp_state_name[state]);
 		switch (state) {
 		case COMPST_GET_ACK:
 			skb = skb_dequeue(&qp->resp_pkts);
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index 499807b11405..db6033f49697 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -14,7 +14,14 @@
 struct rxe_pkt_info {
 	struct sk_buff		*skb;		/* back pointer to skb */
 	struct rxe_dev		*rxe;		/* device that owns packet */
-	struct rxe_qp		*qp;		/* qp that owns packet */
+
+	/* these are objects that need references dropped when pkt freed */
+	struct rxe_ah		*ah;
+	struct rxe_qp		*qp;
+	struct rxe_srq		*srq;
+
+	struct rxe_pd		*pd;
+	struct rxe_cq		*rcq;
 	struct rxe_send_wqe	*wqe;		/* send wqe */
 	u8			*hdr;		/* points to bth */
 	u32			mask;		/* useful info about pkt */
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 790884a5e9d5..2580273c1806 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -94,7 +94,8 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
 int rxe_dealloc_mw(struct ib_mw *ibmw);
 int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
 int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
-struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
+struct rxe_mw *rxe_lookup_mw(struct rxe_pd *pd, struct rxe_qp *qp,
+			     int access, u32 rkey);
 void rxe_mw_cleanup(struct rxe_pool_entry *arg);
 
 /* rxe_net.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 17936a0b8320..0de65c89b7c5 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -305,10 +305,10 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
 	return ret;
 }
 
-struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
+struct rxe_mw *rxe_lookup_mw(struct rxe_pd *pd, struct rxe_qp *qp,
+			     int access, u32 rkey)
 {
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-	struct rxe_pd *pd = rxe_qp_pd(qp);
+	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
 	struct rxe_mw *mw;
 	int index = rkey >> 8;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b1353d456a4c..1597d5313af3 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -424,6 +424,14 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 
 	new_pkt = kzalloc(sizeof(*new_pkt), GFP_ATOMIC);
 	memcpy(new_pkt, pkt, sizeof(*pkt));
+
+	/* don't keep the references */
+	new_pkt->ah = NULL;
+	new_pkt->qp = NULL;
+	new_pkt->srq = NULL;
+	new_pkt->pd = NULL;
+	new_pkt->rcq = NULL;
+
 	RXE_CB(skb)->pkt = new_pkt;
 	new_pkt->skb = skb;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index af8e05bc63b2..1685a29efaf7 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -88,7 +88,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 		.name	= "IB_WR_RDMA_READ_WITH_INV",
 		.mask	= {
 			[IB_QPT_RC]	 = WR_READ_MASK,
-			/* TODO get rid of this no such thing for RoCE */
 		},
 	},
 	[IB_WR_LOCAL_INV]				= {
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index cf5ac6bba59c..7ac40857def3 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -12,10 +12,13 @@
 void rxe_free_pkt(struct rxe_pkt_info *pkt)
 {
 	struct sk_buff *skb = PKT_TO_SKB(pkt);
-	struct rxe_qp *qp = pkt->qp;
 
-	if (qp)
-		rxe_drop_ref(qp);
+	if (pkt->qp)
+		rxe_drop_ref(pkt->qp);
+	if (pkt->srq)
+		rxe_drop_ref(pkt->srq);
+	if (pkt->ah)
+		rxe_drop_ref(pkt->ah);
 
 	ib_device_put(&pkt->rxe->ib_dev);
 
@@ -37,13 +40,13 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	switch (rxe_qp_type(qp)) {
 	case IB_QPT_RC:
 		if (unlikely(pkt_type != IB_OPCODE_RC)) {
-			pr_warn_ratelimited("bad qp type\n");
+			pr_warn_ratelimited("bad qp type for RC packet\n");
 			goto err1;
 		}
 		break;
 	case IB_QPT_UC:
 		if (unlikely(pkt_type != IB_OPCODE_UC)) {
-			pr_warn_ratelimited("bad qp type\n");
+			pr_warn_ratelimited("bad qp type for UC packet\n");
 			goto err1;
 		}
 		break;
@@ -51,7 +54,14 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
 		if (unlikely(pkt_type != IB_OPCODE_UD)) {
-			pr_warn_ratelimited("bad qp type\n");
+			pr_warn_ratelimited("bad qp type for UD packet\n");
+			goto err1;
+		}
+		break;
+	case IB_QPT_XRC_INI:
+	case IB_QPT_XRC_TGT:
+		if (unlikely(pkt_type != IB_OPCODE_XRC)) {
+			pr_warn_ratelimited("bad qp type for XRC packet\n");
 			goto err1;
 		}
 		break;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 166d4aeef5e9..592101ea0461 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -451,6 +451,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
 
 	/* pkt->hdr, port_num and mask are initialized in ifc layer */
+	memset(pkt, 0, sizeof(*pkt));
 	pkt->rxe	= rxe;
 	pkt->opcode	= opcode;
 	pkt->qp		= qp;
@@ -507,7 +508,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 	if (pkt->mask & RXE_ATMETH_MASK) {
 		atmeth_set_va(pkt, wqe->iova);
 		if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
-		    opcode == IB_OPCODE_RD_COMPARE_SWAP) {
+		    opcode == IB_OPCODE_XRC_COMPARE_SWAP) {
 			atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
 			atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
 		} else {
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index ac8d823eb416..4c57e5495d4c 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -35,6 +35,7 @@ enum resp_states {
 	RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
 	RESPST_ERR_RNR,
 	RESPST_ERR_RKEY_VIOLATION,
+	RESPST_ERR_INVALID_XRCETH,
 	RESPST_ERR_INVALIDATE_RKEY,
 	RESPST_ERR_LENGTH,
 	RESPST_ERR_CQ_OVERFLOW,
@@ -69,6 +70,7 @@ static char *resp_state_name[] = {
 	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
 	[RESPST_ERR_RNR]			= "ERR_RNR",
 	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
+	[RESPST_ERR_INVALID_XRCETH]		= "ERR_INVALID_XRCETH",
 	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
 	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
 	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
@@ -122,6 +124,7 @@ static enum resp_states check_psn(struct rxe_qp *qp,
 
 	switch (rxe_qp_type(qp)) {
 	case IB_QPT_RC:
+	case IB_QPT_XRC_TGT:
 		if (diff > 0) {
 			if (qp->resp.sent_psn_nak)
 				return RESPST_CLEANUP;
@@ -243,6 +246,47 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
 		}
 		break;
 
+	case IB_QPT_XRC_TGT:
+		switch (qp->resp.opcode) {
+		case IB_OPCODE_XRC_SEND_FIRST:
+		case IB_OPCODE_XRC_SEND_MIDDLE:
+			switch (pkt->opcode) {
+			case IB_OPCODE_XRC_SEND_MIDDLE:
+			case IB_OPCODE_XRC_SEND_LAST:
+			case IB_OPCODE_XRC_SEND_LAST_WITH_IMMEDIATE:
+			case IB_OPCODE_XRC_SEND_LAST_WITH_INVALIDATE:
+				return RESPST_CHK_OP_VALID;
+			default:
+				return RESPST_ERR_MISSING_OPCODE_LAST_C;
+			}
+
+		case IB_OPCODE_XRC_RDMA_WRITE_FIRST:
+		case IB_OPCODE_XRC_RDMA_WRITE_MIDDLE:
+			switch (pkt->opcode) {
+			case IB_OPCODE_XRC_RDMA_WRITE_MIDDLE:
+			case IB_OPCODE_XRC_RDMA_WRITE_LAST:
+			case IB_OPCODE_XRC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+				return RESPST_CHK_OP_VALID;
+			default:
+				return RESPST_ERR_MISSING_OPCODE_LAST_C;
+			}
+
+		default:
+			switch (pkt->opcode) {
+			case IB_OPCODE_XRC_SEND_MIDDLE:
+			case IB_OPCODE_XRC_SEND_LAST:
+			case IB_OPCODE_XRC_SEND_LAST_WITH_IMMEDIATE:
+			case IB_OPCODE_XRC_SEND_LAST_WITH_INVALIDATE:
+			case IB_OPCODE_XRC_RDMA_WRITE_MIDDLE:
+			case IB_OPCODE_XRC_RDMA_WRITE_LAST:
+			case IB_OPCODE_XRC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+				return RESPST_ERR_MISSING_OPCODE_FIRST;
+			default:
+				return RESPST_CHK_OP_VALID;
+			}
+		}
+		break;
+
 	default:
 		return RESPST_CHK_OP_VALID;
 	}
@@ -253,6 +297,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
 {
 	switch (rxe_qp_type(qp)) {
 	case IB_QPT_RC:
+	case IB_QPT_XRC_TGT:
 		if (((pkt->mask & RXE_READ_MASK) &&
 		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
 		    ((pkt->mask & RXE_WRITE_MASK) &&
@@ -286,9 +331,8 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
 	return RESPST_CHK_RESOURCE;
 }
 
-static enum resp_states get_srq_wqe(struct rxe_qp *qp)
+static enum resp_states get_srq_wqe(struct rxe_qp *qp, struct rxe_srq *srq)
 {
-	struct rxe_srq *srq = rxe_qp_srq(qp);
 	struct rxe_queue *q = srq->rq.queue;
 	struct rxe_recv_wqe *wqe;
 	struct ib_event ev;
@@ -339,8 +383,11 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
 static enum resp_states check_resource(struct rxe_qp *qp,
 				       struct rxe_pkt_info *pkt)
 {
-	struct rxe_srq *srq = rxe_qp_srq(qp);
+	struct rxe_srq *srq = NULL;
 
+	/* can come here from get_req() with no packet
+	 * to drain/flush recv work request queue
+	 */
 	if (qp->resp.state == QP_STATE_ERROR) {
 		if (qp->resp.wqe) {
 			qp->resp.status = IB_WC_WR_FLUSH_ERR;
@@ -359,6 +406,37 @@ static enum resp_states check_resource(struct rxe_qp *qp,
 		}
 	}
 
+	/* not supposed to happen */
+	if (WARN_ON_ONCE(!pkt))
+		return RESPST_EXIT;
+
+	/*
+	 * srq, pd and rcq can come from XRCSRQ or QP
+	 * decide which and store in pkt
+	 */
+	if (pkt->mask & RXE_XRCETH_MASK) {
+		int srq_num = xrceth_xrcsrq(pkt);
+
+		srq = rxe_pool_get_index(&pkt->rxe->srq_pool, srq_num);
+		if (!srq || rxe_qp_xrcd(qp) != rxe_srq_xrcd(srq)) {
+			pr_warn("Unable to get srq from xrceth\n");
+			return RESPST_ERR_INVALID_XRCETH;
+		}
+
+		pkt->srq = srq;
+		pkt->pd = rxe_srq_pd(srq);
+		pkt->rcq = rxe_srq_cq(srq);
+	} else {
+		srq = rxe_qp_srq(qp);
+		pkt->pd = rxe_qp_pd(qp);
+		pkt->rcq = rxe_qp_rcq(qp);
+	}
+
+	if (!pkt->pd)
+		pr_info("%s: no PD for pkt\n", __func__);
+	if (!pkt->rcq)
+		pr_info("%s: no RCQ for pkt\n", __func__);
+
 	if (pkt->mask & RXE_READ_OR_ATOMIC) {
 		/* it is the requesters job to not send
 		 * too many read/atomic ops, we just
@@ -372,7 +450,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
 
 	if (pkt->mask & RXE_RWR_MASK) {
 		if (srq)
-			return get_srq_wqe(qp);
+			return get_srq_wqe(qp, srq);
 
 		qp->resp.wqe = queue_head(qp->rq.queue,
 				QUEUE_TYPE_FROM_CLIENT);
@@ -387,6 +465,7 @@ static enum resp_states check_length(struct rxe_qp *qp,
 {
 	switch (rxe_qp_type(qp)) {
 	case IB_QPT_RC:
+	case IB_QPT_XRC_TGT:
 		return RESPST_CHK_RKEY;
 
 	case IB_QPT_UC:
@@ -443,16 +522,16 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	pktlen	= payload_size(pkt);
 
 	if (rkey_is_mw(rkey)) {
-		mw = rxe_lookup_mw(qp, access, rkey);
+		mw = rxe_lookup_mw(pkt->pd, qp, access, rkey);
 		if (!mw) {
-			pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+			pr_warn("%s: no MW matches rkey %#x\n", __func__, rkey);
 			state = RESPST_ERR_RKEY_VIOLATION;
 			goto err;
 		}
 
 		mr = mw->mr;
 		if (!mr) {
-			pr_err("%s: MW doesn't have an MR\n", __func__);
+			pr_warn("%s: MW doesn't have an MR\n", __func__);
 			state = RESPST_ERR_RKEY_VIOLATION;
 			goto err;
 		}
@@ -463,9 +542,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 		rxe_drop_ref(mw);
 		rxe_add_ref(mr);
 	} else {
-		mr = lookup_mr(rxe_qp_pd(qp), access, rkey, RXE_LOOKUP_REMOTE);
+		mr = lookup_mr(pkt->pd, access, rkey, RXE_LOOKUP_REMOTE);
 		if (!mr) {
-			pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+			pr_warn("%s: no MR matches rkey %#x\n", __func__, rkey);
 			state = RESPST_ERR_RKEY_VIOLATION;
 			goto err;
 		}
@@ -511,12 +590,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	return state;
 }
 
-static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
-				     int data_len)
+static enum resp_states send_data_in(struct rxe_pd *pd, struct rxe_qp *qp,
+				     void *data_addr, int data_len)
 {
 	int err;
 
-	err = copy_data(rxe_qp_pd(qp), IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
+	err = copy_data(pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
 			data_addr, data_len, RXE_TO_MR_OBJ);
 	if (unlikely(err))
 		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
@@ -574,7 +653,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp,
 	qp->resp.atomic_orig = *vaddr;
 
 	if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
-	    pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
+	    pkt->opcode == IB_OPCODE_XRC_COMPARE_SWAP) {
 		if (*vaddr == atmeth_comp(pkt))
 			*vaddr = atmeth_swap_add(pkt);
 	} else {
@@ -705,6 +784,10 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
 	}
 
+	/* fixup opcode if this is XRC add 0xa0 */
+	if (rxe_qp_type(qp) == IB_QPT_XRC_TGT)
+		opcode += IB_OPCODE_XRC;
+
 	res->state = rdatm_res_state_next;
 
 	payload = min_t(int, res->read.resid, mtu);
@@ -717,7 +800,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
 			  payload, RXE_FROM_MR_OBJ);
 	if (err)
-		pr_err("Failed copying memory\n");
+		pr_warn("Failed copying memory\n");
 
 	if (bth_pad(&ack_pkt)) {
 		u8 *pad = payload_addr(&ack_pkt) + payload;
@@ -727,7 +810,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 
 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
 	if (err) {
-		pr_err("Failed sending RDMA reply.\n");
+		pr_warn("Failed sending RDMA reply.\n");
 		return RESPST_ERR_RNR;
 	}
 
@@ -775,15 +858,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 						sizeof(hdr.reserved));
 				memcpy(&hdr.roce4grh, ip_hdr(skb),
 						sizeof(hdr.roce4grh));
-				err = send_data_in(qp, &hdr, sizeof(hdr));
+				err = send_data_in(pkt->pd, qp, &hdr,
+						   sizeof(hdr));
 			} else {
-				err = send_data_in(qp, ipv6_hdr(skb),
-						sizeof(hdr));
+				err = send_data_in(pkt->pd, qp, ipv6_hdr(skb),
+						   sizeof(hdr));
 			}
 			if (err)
 				return err;
 		}
-		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
+		err = send_data_in(pkt->pd, qp, payload_addr(pkt),
+				   payload_size(pkt));
 		if (err)
 			return err;
 	} else if (pkt->mask & RXE_WRITE_MASK) {
@@ -822,7 +907,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 		/* We successfully processed this new request. */
 		qp->resp.msn++;
 		return RESPST_COMPLETE;
-	} else if (rxe_qp_type(qp) == IB_QPT_RC)
+	} else if (rxe_qp_type(qp) == IB_QPT_RC ||
+		   rxe_qp_type(qp) == IB_QPT_XRC_TGT)
 		return RESPST_ACKNOWLEDGE;
 	else
 		return RESPST_CLEANUP;
@@ -836,13 +922,19 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 	struct ib_uverbs_wc *uwc = &cqe.uibwc;
 	struct rxe_recv_wqe *wqe = qp->resp.wqe;
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	struct rxe_cq *rcq;
+
+	/* can come here from check_resources to flush the
+	 * recv work queue entries. Otherwise pkt will be there
+	 */
+	rcq = pkt ? pkt->rcq : rxe_qp_rcq(qp);
 
 	if (!wqe)
 		goto finish;
 
 	memset(&cqe, 0, sizeof(cqe));
 
-	if (rxe_qp_rcq(qp)->is_user) {
+	if (rcq->is_user) {
 		uwc->status		= qp->resp.status;
 		uwc->qp_num		= rxe_qp_num(qp);
 		uwc->wr_id		= wqe->wr_id;
@@ -865,7 +957,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 		/* fields after byte_len are different between kernel and user
 		 * space
 		 */
-		if (rxe_qp_rcq(qp)->is_user) {
+		if (rcq->is_user) {
 			uwc->wc_flags = IB_WC_GRH;
 
 			if (pkt->mask & RXE_IMMDT_MASK) {
@@ -917,12 +1009,12 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 	}
 
 	/* have copy for srq and reference for !srq */
-	if (!rxe_qp_srq(qp))
+	if (qp->rq.queue)
 		advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
 
 	qp->resp.wqe = NULL;
 
-	if (rxe_cq_post(rxe_qp_rcq(qp), &cqe, pkt ? bth_se(pkt) : 1))
+	if (rxe_cq_post(rcq, &cqe, pkt ? bth_se(pkt) : 1))
 		return RESPST_ERR_CQ_OVERFLOW;
 
 finish:
@@ -930,7 +1022,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 		return RESPST_CHK_RESOURCE;
 	if (unlikely(!pkt))
 		return RESPST_DONE;
-	if (rxe_qp_type(qp) == IB_QPT_RC)
+	if (rxe_qp_type(qp) == IB_QPT_RC ||
+	    rxe_qp_type(qp) == IB_QPT_XRC_TGT)
 		return RESPST_ACKNOWLEDGE;
 	else
 		return RESPST_CLEANUP;
@@ -942,8 +1035,13 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	int err = 0;
 	struct rxe_pkt_info ack_pkt;
 	struct sk_buff *skb;
+	int opcode;
+
+	opcode = (rxe_qp_type(qp) == IB_QPT_XRC_TGT) ?
+		IB_OPCODE_XRC_ACKNOWLEDGE :
+		IB_OPCODE_RC_ACKNOWLEDGE;
 
-	skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
+	skb = prepare_ack_packet(qp, pkt, &ack_pkt, opcode,
 				 0, psn, syndrome);
 	if (!skb) {
 		err = -ENOMEM;
@@ -952,7 +1050,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 
 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
 	if (err)
-		pr_err_ratelimited("Failed sending ack\n");
+		pr_warn("Failed sending ack\n");
 
 err1:
 	return err;
@@ -987,7 +1085,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 
 	rc = rxe_xmit_packet(qp, &ack_pkt, skb);
 	if (rc) {
-		pr_err_ratelimited("Failed sending ack\n");
+		pr_warn("Failed sending ack\n");
 		rxe_drop_ref(qp);
 	}
 out:
@@ -997,7 +1095,8 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 static enum resp_states acknowledge(struct rxe_qp *qp,
 				    struct rxe_pkt_info *pkt)
 {
-	if (rxe_qp_type(qp) != IB_QPT_RC)
+	if (rxe_qp_type(qp) != IB_QPT_RC &&
+	    rxe_qp_type(qp) != IB_QPT_XRC_TGT)
 		return RESPST_CLEANUP;
 
 	if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
@@ -1114,7 +1213,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
 			/* Resend the result. */
 			rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
 			if (rc) {
-				pr_err("Failed resending result. This flow is not handled - skb ignored\n");
+				pr_warn("Failed resending result. This flow is not handled - skb ignored\n");
 				rc = RESPST_CLEANUP;
 				goto out;
 			}
@@ -1176,7 +1275,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
 static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
 {
 	struct sk_buff *skb;
-	struct rxe_queue *q = qp->rq.queue;
+	struct rxe_queue *q;
 
 	while ((skb = skb_dequeue(&qp->req_pkts)))
 		rxe_free_pkt(SKB_TO_PKT(skb));
@@ -1184,7 +1283,11 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
 	if (notify)
 		return;
 
-	while (!rxe_qp_srq(qp) && q && queue_head(q, q->type))
+	q = qp->rq.queue;
+	if (!q)
+		return;
+
+	while (queue_head(q, q->type))
 		advance_consumer(q, q->type);
 }
 
@@ -1216,8 +1319,8 @@ int rxe_responder(void *arg)
 	}
 
 	while (1) {
-		pr_debug("qp#%d state = %s\n", rxe_qp_num(qp),
-			 resp_state_name[state]);
+		pr_debug("qp#%d type=%d state = %s\n", rxe_qp_num(qp),
+			 rxe_qp_type(qp), resp_state_name[state]);
 		switch (state) {
 		case RESPST_GET_REQ:
 			state = get_req(qp, &pkt);
@@ -1279,7 +1382,8 @@ int rxe_responder(void *arg)
 			state = do_class_d1e_error(qp);
 			break;
 		case RESPST_ERR_RNR:
-			if (rxe_qp_type(qp) == IB_QPT_RC) {
+			if (rxe_qp_type(qp) == IB_QPT_RC ||
+			    rxe_qp_type(qp) == IB_QPT_XRC_TGT) {
 				rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
 				/* RC - class B */
 				send_ack(qp, pkt, AETH_RNR_NAK |
@@ -1294,7 +1398,8 @@ int rxe_responder(void *arg)
 			break;
 
 		case RESPST_ERR_RKEY_VIOLATION:
-			if (rxe_qp_type(qp) == IB_QPT_RC) {
+			if (rxe_qp_type(qp) == IB_QPT_RC ||
+			    rxe_qp_type(qp) == IB_QPT_XRC_TGT) {
 				/* Class C */
 				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
 						  IB_WC_REM_ACCESS_ERR);
@@ -1312,6 +1417,17 @@ int rxe_responder(void *arg)
 			}
 			break;
 
+		case RESPST_ERR_INVALID_XRCETH:
+			if (rxe_qp_type(qp) == IB_QPT_XRC_TGT) {
+				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
+						  IB_WC_REM_ACCESS_ERR);
+				state = RESPST_COMPLETE;
+			} else {
+				pr_info("can't happen\n");
+				state = RESPST_CLEANUP;
+			}
+			break;
+
 		case RESPST_ERR_INVALIDATE_RKEY:
 			/* RC - Class J. */
 			qp->resp.goto_error = 1;
@@ -1320,7 +1436,8 @@ int rxe_responder(void *arg)
 			break;
 
 		case RESPST_ERR_LENGTH:
-			if (rxe_qp_type(qp) == IB_QPT_RC) {
+			if (rxe_qp_type(qp) == IB_QPT_RC ||
+			    rxe_qp_type(qp) == IB_QPT_XRC_TGT) {
 				/* Class C */
 				do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
 						  IB_WC_REM_INV_REQ_ERR);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 52599f398ddd..1fdd07d6a94d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -475,11 +475,13 @@ static inline struct rxe_mw *to_rmw(struct ib_mw *mw)
 	return mw ? container_of(mw, struct rxe_mw, ibmw) : NULL;
 }
 
+/* AH extractors */
 static inline struct rxe_pd *rxe_ah_pd(struct rxe_ah *ah)
 {
 	return to_rpd(ah->ibah.pd);
 }
 
+/* MR extractors */
 static inline struct rxe_pd *rxe_mr_pd(struct rxe_mr *mr)
 {
 	return to_rpd(mr->ibmr.pd);
@@ -495,6 +497,7 @@ static inline u32 rxe_mr_rkey(struct rxe_mr *mr)
 	return mr->ibmr.rkey;
 }
 
+/* MW extractors */
 static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
 {
 	return to_rpd(mw->ibmw.pd);
@@ -531,6 +534,11 @@ static inline struct rxe_srq *rxe_qp_srq(struct rxe_qp *qp)
 	return to_rsrq(qp->ibqp.srq);
 }
 
+static inline struct rxe_xrcd *rxe_qp_xrcd(struct rxe_qp *qp)
+{
+	return to_rxrcd(qp->ibqp.xrcd);
+}
+
 static inline enum ib_qp_state rxe_qp_state(struct rxe_qp *qp)
 {
 	return qp->attr.qp_state;
@@ -557,14 +565,14 @@ static inline struct rxe_pd *rxe_srq_pd(struct rxe_srq *srq)
 	return to_rpd(srq->ibsrq.pd);
 }
 
-static inline enum ib_srq_type rxe_srq_type(struct rxe_srq *srq)
+static inline struct rxe_xrcd *rxe_srq_xrcd(struct rxe_srq *srq)
 {
-	return srq->ibsrq.srq_type;
+	return to_rxrcd(srq->ibsrq.ext.xrc.xrcd);
 }
 
-static inline struct rxe_xrcd *rxe_srq_xrcd(struct rxe_srq *srq)
+static inline enum ib_srq_type rxe_srq_type(struct rxe_srq *srq)
 {
-	return to_rxrcd(srq->ibsrq.ext.xrc.xrcd);
+	return srq->ibsrq.srq_type;
 }
 
 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff
  2021-07-29 22:49 ` [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff Bob Pearson
@ 2021-08-27 13:01   ` Jason Gunthorpe
  0 siblings, 0 replies; 15+ messages in thread
From: Jason Gunthorpe @ 2021-08-27 13:01 UTC (permalink / raw)
  To: Bob Pearson; +Cc: zyjzyj2000, linux-rdma

On Thu, Jul 29, 2021 at 05:49:04PM -0500, Bob Pearson wrote:
>  	return sizeof(struct rxe_recv_wqe) +
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 4f96437a2a8e..6212e61d267b 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -155,7 +155,7 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
>  	struct udphdr *udph;
>  	struct rxe_dev *rxe;
>  	struct net_device *ndev = skb->dev;
> -	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
> +	struct rxe_pkt_info *pkt;
>  
>  	/* takes a reference on rxe->ib_dev
>  	 * drop when skb is freed
> @@ -172,6 +172,10 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
>  		goto drop;
>  	}
>  
> +	pkt = kzalloc(sizeof(*pkt), GFP_ATOMIC);
> +	RXE_CB(skb)->pkt = pkt;
> +	pkt->skb = skb;
> +

Isn't this a huge performance cost? Not even a kmem cache or something?

Jason

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-08-27 13:01 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-29 22:49 [PATCH for-next 00/13] RDMA:rxe: Implement XRC for rxe Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 01/13] RDMA/rxe: Decouple rxe_pkt_info from sk_buff Bob Pearson
2021-08-27 13:01   ` Jason Gunthorpe
2021-07-29 22:49 ` [PATCH for-next 02/13] IB/core: Add xrc opcodes to ib_pack.h Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 03/13] RDMA/rxe: Extend rxe_send_wr to support xrceth Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 04/13] RDMA/rxe: Extend rxe_opcode.h to support xrc Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 05/13] RDMA/rxe: Add XRC ETH to rxe_hdr.h Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 06/13] RDMA/rxe: Add XRC QP type to rxe_wr_opcode_info Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 07/13] RDMA/rxe: Add XRC opcodes to rxe_opcode_info Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 08/13] RDMA/rxe: Support alloc/dealloc xrcd Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 09/13] RDMA/rxe: Extend SRQs to support extensions Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 10/13] RDMA/rxe: Compute next opcode for XRC Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 11/13] RDMA/rxe: Extend rxe_verbs and rxe_qp to support XRC Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 12/13] RDMA/rxe: Extend rxe send XRC packets Bob Pearson
2021-07-29 22:49 ` [PATCH for-next 13/13] RDMA/rxe: Enable receiving " Bob Pearson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.