All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers
@ 2022-10-31 20:27 Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 02/18] RDMA/rxe: Isolate request payload code in a subroutine Bob Pearson
                   ` (16 more replies)
  0 siblings, 17 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Isolate the code to fill in roce headers in a request packet into
a subroutine named init_roce_headers.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 106 +++++++++++++++-------------
 1 file changed, 57 insertions(+), 49 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index f63771207970..bcfbc78c0b53 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -377,79 +377,87 @@ static inline int get_mtu(struct rxe_qp *qp)
 	return rxe->port.mtu_cap;
 }
 
-static struct sk_buff *init_req_packet(struct rxe_qp *qp,
-				       struct rxe_av *av,
-				       struct rxe_send_wqe *wqe,
-				       int opcode, u32 payload,
-				       struct rxe_pkt_info *pkt)
+static void rxe_init_roce_hdrs(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+			       struct rxe_pkt_info *pkt, int pad)
 {
-	struct rxe_dev		*rxe = to_rdev(qp->ibqp.device);
-	struct sk_buff		*skb;
-	struct rxe_send_wr	*ibwr = &wqe->wr;
-	int			pad = (-payload) & 0x3;
-	int			paylen;
-	int			solicited;
-	u32			qp_num;
-	int			ack_req;
-
-	/* length from start of bth to end of icrc */
-	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
-	pkt->paylen = paylen;
-
-	/* init skb */
-	skb = rxe_init_packet(rxe, av, paylen, pkt);
-	if (unlikely(!skb))
-		return NULL;
+	struct rxe_send_wr *wr = &wqe->wr;
+	int is_send;
+	int is_write_imm;
+	int is_end;
+	int solicited;
+	u32 dst_qpn;
+	u32 qkey;
+	int ack_req;
 
 	/* init bth */
-	solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
-			(pkt->mask & RXE_END_MASK) &&
-			((pkt->mask & (RXE_SEND_MASK)) ||
-			(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
-			(RXE_WRITE_MASK | RXE_IMMDT_MASK));
-
-	qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
-					 qp->attr.dest_qp_num;
-
-	ack_req = ((pkt->mask & RXE_END_MASK) ||
-		(qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
+	is_send = pkt->mask & RXE_SEND_MASK;
+	is_write_imm = (pkt->mask & RXE_WRITE_MASK) &&
+		       (pkt->mask & RXE_IMMDT_MASK);
+	is_end = pkt->mask & RXE_END_MASK;
+	solicited = (wr->send_flags & IB_SEND_SOLICITED) && is_end &&
+		    (is_send || is_write_imm);
+	dst_qpn = (pkt->mask & RXE_DETH_MASK) ? wr->wr.ud.remote_qpn :
+					       qp->attr.dest_qp_num;
+	ack_req = is_end || (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK);
 	if (ack_req)
 		qp->req.noack_pkts = 0;
 
-	bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num,
-		 ack_req, pkt->psn);
+	bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL,
+		 dst_qpn, ack_req, pkt->psn);
 
-	/* init optional headers */
+	/* init extended headers */
 	if (pkt->mask & RXE_RETH_MASK) {
-		reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
+		reth_set_rkey(pkt, wr->wr.rdma.rkey);
 		reth_set_va(pkt, wqe->iova);
 		reth_set_len(pkt, wqe->dma.resid);
 	}
 
 	if (pkt->mask & RXE_IMMDT_MASK)
-		immdt_set_imm(pkt, ibwr->ex.imm_data);
+		immdt_set_imm(pkt, wr->ex.imm_data);
 
 	if (pkt->mask & RXE_IETH_MASK)
-		ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
+		ieth_set_rkey(pkt, wr->ex.invalidate_rkey);
 
 	if (pkt->mask & RXE_ATMETH_MASK) {
 		atmeth_set_va(pkt, wqe->iova);
-		if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
-			atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
-			atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
+		if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
+			atmeth_set_swap_add(pkt, wr->wr.atomic.swap);
+			atmeth_set_comp(pkt, wr->wr.atomic.compare_add);
 		} else {
-			atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
+			atmeth_set_swap_add(pkt, wr->wr.atomic.compare_add);
 		}
-		atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
+		atmeth_set_rkey(pkt, wr->wr.atomic.rkey);
 	}
 
 	if (pkt->mask & RXE_DETH_MASK) {
-		if (qp->ibqp.qp_num == 1)
-			deth_set_qkey(pkt, GSI_QKEY);
-		else
-			deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
-		deth_set_sqp(pkt, qp->ibqp.qp_num);
+		qkey = (qp->ibqp.qp_num == 1) ? GSI_QKEY :
+						wr->wr.ud.remote_qkey;
+		deth_set_qkey(pkt, qkey);
+		deth_set_sqp(pkt, qp_num(qp));
 	}
+}
+
+static struct sk_buff *init_req_packet(struct rxe_qp *qp,
+				       struct rxe_av *av,
+				       struct rxe_send_wqe *wqe,
+				       int opcode, u32 payload,
+				       struct rxe_pkt_info *pkt)
+{
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	struct sk_buff *skb;
+	int pad = (-payload) & 0x3;
+	int paylen;
+
+	/* length from start of bth to end of icrc */
+	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
+	pkt->paylen = paylen;
+
+	/* init skb */
+	skb = rxe_init_packet(rxe, av, paylen, pkt);
+	if (unlikely(!skb))
+		return NULL;
+
+	rxe_init_roce_hdrs(qp, wqe, pkt, pad);
 
 	return skb;
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 02/18] RDMA/rxe: Isolate request payload code in a subroutine
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 03/18] RDMA/rxe: Remove paylen parameter from rxe_init_packet Bob Pearson
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Isolate the code that fills the payload of a request packet into
a subroutine named rxe_init_payload().

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 34 +++++++++++++++++------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index bcfbc78c0b53..10a75f4e3608 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -437,6 +437,25 @@ static void rxe_init_roce_hdrs(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	}
 }
 
+static int rxe_init_payload(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+			    struct rxe_pkt_info *pkt, u32 payload)
+{
+	void *data;
+	int err = 0;
+
+	if (wqe->wr.send_flags & IB_SEND_INLINE) {
+		data = &wqe->dma.inline_data[wqe->dma.sge_offset];
+		memcpy(payload_addr(pkt), data, payload);
+		wqe->dma.resid -= payload;
+		wqe->dma.sge_offset += payload;
+	} else {
+		err = copy_data(qp->pd, 0, &wqe->dma, payload_addr(pkt),
+				payload, RXE_FROM_MR_OBJ);
+	}
+
+	return err;
+}
+
 static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 				       struct rxe_av *av,
 				       struct rxe_send_wqe *wqe,
@@ -473,20 +492,7 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
 		return err;
 
 	if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
-		if (wqe->wr.send_flags & IB_SEND_INLINE) {
-			u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
-
-			memcpy(payload_addr(pkt), tmp, payload);
-
-			wqe->dma.resid -= payload;
-			wqe->dma.sge_offset += payload;
-		} else {
-			err = copy_data(qp->pd, 0, &wqe->dma,
-					payload_addr(pkt), payload,
-					RXE_FROM_MR_OBJ);
-			if (err)
-				return err;
-		}
+		err = rxe_init_payload(qp, wqe, pkt, payload);
 		if (bth_pad(pkt)) {
 			u8 *pad = payload_addr(pkt) + payload;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 03/18] RDMA/rxe: Remove paylen parameter from rxe_init_packet
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 02/18] RDMA/rxe: Isolate request payload code in a subroutine Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 04/18] RDMA/rxe: Isolate code to build request packet Bob Pearson
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Cleanup rxe_init_paylen by removing paylen as a parameter since it
is already available in pkt.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h  | 2 +-
 drivers/infiniband/sw/rxe/rxe_net.c  | 6 +++---
 drivers/infiniband/sw/rxe/rxe_req.c  | 2 +-
 drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index c2a5c8814a48..574a6afc1199 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -92,7 +92,7 @@ void rxe_mw_cleanup(struct rxe_pool_elem *elem);
 
 /* rxe_net.c */
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
-				int paylen, struct rxe_pkt_info *pkt);
+				struct rxe_pkt_info *pkt);
 int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
 		struct sk_buff *skb);
 int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 35f327b9d4b8..1e4456f5cda2 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -443,7 +443,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 }
 
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
-				int paylen, struct rxe_pkt_info *pkt)
+				struct rxe_pkt_info *pkt)
 {
 	unsigned int hdr_len;
 	struct sk_buff *skb = NULL;
@@ -468,7 +468,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 		rcu_read_unlock();
 		goto out;
 	}
-	skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
+	skb = alloc_skb(pkt->paylen + hdr_len + LL_RESERVED_SPACE(ndev),
 			GFP_ATOMIC);
 
 	if (unlikely(!skb)) {
@@ -489,7 +489,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 
 	pkt->rxe	= rxe;
 	pkt->port_num	= port_num;
-	pkt->hdr	= skb_put(skb, paylen);
+	pkt->hdr	= skb_put(skb, pkt->paylen);
 	pkt->mask	|= RXE_GRH_MASK;
 
 out:
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 10a75f4e3608..e9e865a5674f 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -472,7 +472,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 	pkt->paylen = paylen;
 
 	/* init skb */
-	skb = rxe_init_packet(rxe, av, paylen, pkt);
+	skb = rxe_init_packet(rxe, av, pkt);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 95d372db934d..c7f60c7b361c 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -670,15 +670,15 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	 */
 	pad = (-payload) & 0x3;
 	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
+	ack->paylen = paylen;
 
-	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
+	skb = rxe_init_packet(rxe, &qp->pri_av, ack);
 	if (!skb)
 		return NULL;
 
 	ack->qp = qp;
 	ack->opcode = opcode;
 	ack->mask = rxe_opcode[opcode].mask;
-	ack->paylen = paylen;
 	ack->psn = psn;
 
 	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 04/18] RDMA/rxe: Isolate code to build request packet
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 02/18] RDMA/rxe: Isolate request payload code in a subroutine Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 03/18] RDMA/rxe: Remove paylen parameter from rxe_init_packet Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops Bob Pearson
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Isolate all the code to build a request packet into a single
subroutine called rxe_init_req_packet().

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c  | 121 ++++++++++++---------------
 drivers/infiniband/sw/rxe/rxe_resp.c |  11 +--
 2 files changed, 58 insertions(+), 74 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index e9e865a5674f..6177c513e5b5 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -456,51 +456,76 @@ static int rxe_init_payload(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	return err;
 }
 
-static struct sk_buff *init_req_packet(struct rxe_qp *qp,
-				       struct rxe_av *av,
-				       struct rxe_send_wqe *wqe,
-				       int opcode, u32 payload,
-				       struct rxe_pkt_info *pkt)
+static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
+					   struct rxe_send_wqe *wqe,
+					   int opcode, u32 payload,
+					   struct rxe_pkt_info *pkt)
 {
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-	struct sk_buff *skb;
-	int pad = (-payload) & 0x3;
-	int paylen;
+	struct sk_buff *skb = NULL;
+	struct rxe_av *av;
+	struct rxe_ah *ah = NULL;
+	void *padp;
+	int pad;
+	int err = -EINVAL;
+
+	pkt->rxe = rxe;
+	pkt->opcode = opcode;
+	pkt->qp = qp;
+	pkt->psn = qp->req.psn;
+	pkt->mask = rxe_opcode[opcode].mask;
+	pkt->wqe = wqe;
+	pkt->port_num = 1;
+
+	/* get address vector and address handle for UD qps only */
+	av = rxe_get_av(pkt, &ah);
+	if (unlikely(!av))
+		goto err_out;
 
 	/* length from start of bth to end of icrc */
-	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
-	pkt->paylen = paylen;
+	pad = (-payload) & 0x3;
+	pkt->paylen = rxe_opcode[opcode].length + payload +
+						pad + RXE_ICRC_SIZE;
 
 	/* init skb */
 	skb = rxe_init_packet(rxe, av, pkt);
 	if (unlikely(!skb))
-		return NULL;
+		goto err_out;
 
 	rxe_init_roce_hdrs(qp, wqe, pkt, pad);
 
-	return skb;
-}
+	if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
+		err = rxe_init_payload(qp, wqe, pkt, payload);
+		if (err)
+			goto err_out;
+	}
 
-static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
-			 struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
-			 struct sk_buff *skb, u32 payload)
-{
-	int err;
+	if (pad) {
+		padp = payload_addr(pkt) + payload;
+		memset(padp, 0, pad);
+	}
 
+	/* IP and UDP network headers */
 	err = rxe_prepare(av, pkt, skb);
 	if (err)
-		return err;
+		goto err_out;
 
-	if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
-		err = rxe_init_payload(qp, wqe, pkt, payload);
-		if (bth_pad(pkt)) {
-			u8 *pad = payload_addr(pkt) + payload;
+	if (ah)
+		rxe_put(ah);
 
-			memset(pad, 0, bth_pad(pkt));
-		}
-	}
+	return skb;
 
-	return 0;
+err_out:
+	if (err == -EFAULT)
+		wqe->status = IB_WC_LOC_PROT_ERR;
+	else
+		wqe->status = IB_WC_LOC_QP_OP_ERR;
+	if (skb)
+		kfree_skb(skb);
+	if (ah)
+		rxe_put(ah);
+
+	return NULL;
 }
 
 static void update_wqe_state(struct rxe_qp *qp,
@@ -630,7 +655,6 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 int rxe_requester(void *arg)
 {
 	struct rxe_qp *qp = (struct rxe_qp *)arg;
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	struct rxe_pkt_info pkt;
 	struct sk_buff *skb;
 	struct rxe_send_wqe *wqe;
@@ -643,8 +667,6 @@ int rxe_requester(void *arg)
 	struct rxe_send_wqe rollback_wqe;
 	u32 rollback_psn;
 	struct rxe_queue *q = qp->sq.queue;
-	struct rxe_ah *ah;
-	struct rxe_av *av;
 
 	if (!rxe_get(qp))
 		return -EAGAIN;
@@ -753,44 +775,9 @@ int rxe_requester(void *arg)
 		payload = mtu;
 	}
 
-	pkt.rxe = rxe;
-	pkt.opcode = opcode;
-	pkt.qp = qp;
-	pkt.psn = qp->req.psn;
-	pkt.mask = rxe_opcode[opcode].mask;
-	pkt.wqe = wqe;
-
-	av = rxe_get_av(&pkt, &ah);
-	if (unlikely(!av)) {
-		pr_err("qp#%d Failed no address vector\n", qp_num(qp));
-		wqe->status = IB_WC_LOC_QP_OP_ERR;
-		goto err;
-	}
-
-	skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
-	if (unlikely(!skb)) {
-		pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
-		wqe->status = IB_WC_LOC_QP_OP_ERR;
-		if (ah)
-			rxe_put(ah);
-		goto err;
-	}
-
-	err = finish_packet(qp, av, wqe, &pkt, skb, payload);
-	if (unlikely(err)) {
-		pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
-		if (err == -EFAULT)
-			wqe->status = IB_WC_LOC_PROT_ERR;
-		else
-			wqe->status = IB_WC_LOC_QP_OP_ERR;
-		kfree_skb(skb);
-		if (ah)
-			rxe_put(ah);
+	skb = rxe_init_req_packet(qp, wqe, opcode, payload, &pkt);
+	if (unlikely(!skb))
 		goto err;
-	}
-
-	if (ah)
-		rxe_put(ah);
 
 	/*
 	 * To prevent a race on wqe access between requester and completer,
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c7f60c7b361c..44b5c159cef9 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -665,22 +665,19 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	int pad;
 	int err;
 
-	/*
-	 * allocate packet
-	 */
 	pad = (-payload) & 0x3;
 	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
 	ack->paylen = paylen;
 
-	skb = rxe_init_packet(rxe, &qp->pri_av, ack);
-	if (!skb)
-		return NULL;
-
 	ack->qp = qp;
 	ack->opcode = opcode;
 	ack->mask = rxe_opcode[opcode].mask;
 	ack->psn = psn;
 
+	skb = rxe_init_packet(rxe, &qp->pri_av, ack);
+	if (!skb)
+		return NULL;
+
 	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
 		 qp->attr.dest_qp_num, 0, psn);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (2 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 04/18] RDMA/rxe: Isolate code to build request packet Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-11-24 19:05   ` Jason Gunthorpe
  2022-10-31 20:27 ` [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c Bob Pearson
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Rename rxe_mr_copy_dir to rxe_mr_copy_op and add new operations for
copying between an skb fragment list and an mr.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c  |  4 ++--
 drivers/infiniband/sw/rxe/rxe_loc.h   |  4 ++--
 drivers/infiniband/sw/rxe/rxe_mr.c    | 14 +++++++-------
 drivers/infiniband/sw/rxe/rxe_req.c   |  2 +-
 drivers/infiniband/sw/rxe/rxe_resp.c  |  9 +++++----
 drivers/infiniband/sw/rxe/rxe_verbs.h | 15 ++++++++++++---
 6 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index c9170dd99f3a..77640e35ae88 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -356,7 +356,7 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
 
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, payload_addr(pkt),
-			payload_size(pkt), RXE_TO_MR_OBJ);
+			payload_size(pkt), RXE_COPY_TO_MR);
 	if (ret) {
 		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
@@ -378,7 +378,7 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
 
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, &atomic_orig,
-			sizeof(u64), RXE_TO_MR_OBJ);
+			sizeof(u64), RXE_COPY_TO_MR);
 	if (ret) {
 		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 574a6afc1199..ff803a957ac1 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -69,9 +69,9 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr);
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
-		enum rxe_mr_copy_dir dir);
+		enum rxe_mr_copy_op op);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
-	      void *addr, int length, enum rxe_mr_copy_dir dir);
+	      void *addr, int length, enum rxe_mr_copy_op op);
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 			 enum rxe_mr_lookup_type type);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index d4f10c2d1aa7..60a8034f1416 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -290,7 +290,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
  * a mr object starting at iova.
  */
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
-		enum rxe_mr_copy_dir dir)
+		enum rxe_mr_copy_op op)
 {
 	int			err;
 	int			bytes;
@@ -307,9 +307,9 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
 		u8 *src, *dest;
 
-		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
+		src = (op == RXE_COPY_TO_MR) ? addr : ((void *)(uintptr_t)iova);
 
-		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
+		dest = (op == RXE_COPY_TO_MR) ? ((void *)(uintptr_t)iova) : addr;
 
 		memcpy(dest, src, length);
 
@@ -333,8 +333,8 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		u8 *src, *dest;
 
 		va	= (u8 *)(uintptr_t)buf->addr + offset;
-		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
-		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
+		src = (op == RXE_COPY_TO_MR) ? addr : va;
+		dest = (op == RXE_COPY_TO_MR) ? va : addr;
 
 		bytes	= buf->size - offset;
 
@@ -372,7 +372,7 @@ int copy_data(
 	struct rxe_dma_info	*dma,
 	void			*addr,
 	int			length,
-	enum rxe_mr_copy_dir	dir)
+	enum rxe_mr_copy_op	op)
 {
 	int			bytes;
 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
@@ -433,7 +433,7 @@ int copy_data(
 		if (bytes > 0) {
 			iova = sge->addr + offset;
 
-			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
+			err = rxe_mr_copy(mr, iova, addr, bytes, op);
 			if (err)
 				goto err2;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 6177c513e5b5..b111a6ddf66c 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -450,7 +450,7 @@ static int rxe_init_payload(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		wqe->dma.sge_offset += payload;
 	} else {
 		err = copy_data(qp->pd, 0, &wqe->dma, payload_addr(pkt),
-				payload, RXE_FROM_MR_OBJ);
+				payload, RXE_COPY_FROM_MR);
 	}
 
 	return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 44b5c159cef9..023df0562258 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -524,7 +524,7 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 	int err;
 
 	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
-			data_addr, data_len, RXE_TO_MR_OBJ);
+			data_addr, data_len, RXE_COPY_TO_MR);
 	if (unlikely(err))
 		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
 					: RESPST_ERR_MALFORMED_WQE;
@@ -540,7 +540,7 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
 	int data_len = payload_size(pkt);
 
 	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
-			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
+			  payload_addr(pkt), data_len, RXE_COPY_TO_MR);
 	if (err) {
 		rc = RESPST_ERR_RKEY_VIOLATION;
 		goto out;
@@ -807,8 +807,9 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 		return RESPST_ERR_RNR;
 
 	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
-			  payload, RXE_FROM_MR_OBJ);
-	rxe_put(mr);
+			  payload, RXE_COPY_FROM_MR);
+	if (mr)
+		rxe_put(mr);
 	if (err) {
 		kfree_skb(skb);
 		return RESPST_ERR_RKEY_VIOLATION;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 22a299b0a9f0..08275b0c7a6e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -267,9 +267,18 @@ enum rxe_mr_state {
 	RXE_MR_STATE_VALID,
 };
 
-enum rxe_mr_copy_dir {
-	RXE_TO_MR_OBJ,
-	RXE_FROM_MR_OBJ,
+/**
+ * enum rxe_mr_copy_op - Operations peformed by rxe_copy_mr/dma_data()
+ * @RXE_COPY_TO_MR:	Copy data from packet to MR(s)
+ * @RXE_COPY_FROM_MR:	Copy data from MR(s) to packet
+ * @RXE_FRAG_TO_MR:	Copy data from frag list to MR(s)
+ * @RXE_FRAG_FROM_MR:	Copy data from MR(s) to frag list
+ */
+enum rxe_mr_copy_op {
+	RXE_COPY_TO_MR,
+	RXE_COPY_FROM_MR,
+	RXE_FRAG_TO_MR,
+	RXE_FRAG_FROM_MR,
 };
 
 enum rxe_mr_lookup_type {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (3 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-11-24 19:10   ` Jason Gunthorpe
  2022-10-31 20:27 ` [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags Bob Pearson
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add the subroutine rxe_add_frag() to add a fragment to an skb.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h |  2 ++
 drivers/infiniband/sw/rxe/rxe_mr.c  | 34 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index ff803a957ac1..81a611778d44 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr);
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
+int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
+		 int length, int offset);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 60a8034f1416..2dcf37f32330 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -286,6 +286,40 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 	return addr;
 }
 
+/**
+ * rxe_add_frag() - Add a frag to a nonlinear packet
+ * @skb: The packet buffer
+ * @buf: Kernel buffer info
+ * @length: Length of fragment
+ * @offset: Offset of fragment in buf
+ *
+ * Returns: 0 on success else a negative errno
+ */
+int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
+		 int length, int offset)
+{
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
+
+	if (nr_frags >= MAX_SKB_FRAGS) {
+		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
+			__func__, nr_frags);
+		return -EINVAL;
+	}
+
+	frag->bv_len = length;
+	frag->bv_offset = offset;
+	frag->bv_page = virt_to_page(buf->addr);
+	/* because kfree_skb will call put_page() */
+	get_page(frag->bv_page);
+	skb_shinfo(skb)->nr_frags++;
+
+	skb->data_len += length;
+	skb->len += length;
+
+	return 0;
+}
+
 /* copy data from a range (vaddr, vaddr+length-1) to or from
  * a mr object starting at iova.
  */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (4 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-11-24 19:15   ` Jason Gunthorpe
  2022-10-31 20:27 ` [PATCH for-next v2 08/18] RDMA/rxe: Extend rxe_mr_copy to support skb frags Bob Pearson
                   ` (10 subsequent siblings)
  16 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add a subroutine named rxe_num_mr_frags() to compute the
number of skb frags needed to hold length bytes in an skb
when sending data from an mr starting at iova.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h |  1 +
 drivers/infiniband/sw/rxe/rxe_mr.c  | 68 +++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 81a611778d44..87fb052c1d0a 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -70,6 +70,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
 		 int length, int offset);
+int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 2dcf37f32330..23abcf2a0198 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -320,6 +320,74 @@ int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
 	return 0;
 }
 
+/**
+ * rxe_num_mr_frags() - Compute the number of skb frags needed to copy
+ *			length bytes from an mr to an skb frag list.
+ * @mr: mr to copy data from
+ * @iova: iova in memory region as starting point
+ * @length: number of bytes to transfer
+ *
+ * Returns: the number of frags needed or a negative error
+ */
+int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
+{
+	struct rxe_phys_buf *buf;
+	struct rxe_map **map;
+	size_t buf_offset;
+	int bytes;
+	int m;
+	int i;
+	int num_frags = 0;
+	int err;
+
+	if (length == 0)
+		return 0;
+
+	if (mr->type == IB_MR_TYPE_DMA) {
+		while (length > 0) {
+			buf_offset = iova & ~PAGE_MASK;
+			bytes = PAGE_SIZE - buf_offset;
+			if (bytes > length)
+				bytes = length;
+			length -= bytes;
+			num_frags++;
+		}
+
+		return num_frags;
+	}
+
+	WARN_ON_ONCE(!mr->map);
+
+	err = mr_check_range(mr, iova, length);
+	if (err)
+		return err;
+
+	lookup_iova(mr, iova, &m, &i, &buf_offset);
+
+	map = mr->map + m;
+	buf = map[0]->buf + i;
+
+	while (length > 0) {
+		bytes = buf->size - buf_offset;
+		if (bytes > length)
+			bytes = length;
+		length -= bytes;
+		buf_offset = 0;
+		buf++;
+		i++;
+		num_frags++;
+
+		/* we won't overrun since we checked range above */
+		if (i == RXE_BUF_PER_MAP) {
+			i = 0;
+			map++;
+			buf = map[0]->buf;
+		}
+	}
+
+	return num_frags;
+}
+
 /* copy data from a range (vaddr, vaddr+length-1) to or from
  * a mr object starting at iova.
  */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 08/18] RDMA/rxe: Extend rxe_mr_copy to support skb frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (5 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-10-31 20:27 ` [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma Bob Pearson
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

rxe_mr_copy() currently supports copying between an mr and
a contiguous region of kernel memory.

Rename rxe_mr_copy() to rxe_copy_mr_data().
Extend the operations to support copying between an mr and an skb
fragment list. Fixup calls to rxe_mr_copy() to support the new
API.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h  |   3 +
 drivers/infiniband/sw/rxe/rxe_mr.c   | 144 +++++++++++++++++++--------
 drivers/infiniband/sw/rxe/rxe_resp.c |  20 ++--
 3 files changed, 117 insertions(+), 50 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 87fb052c1d0a..c62fc2613a01 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -71,6 +71,9 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
 		 int length, int offset);
 int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length);
+int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova,
+		     void *addr, int skb_offset, int length,
+		     enum rxe_mr_copy_op op);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 23abcf2a0198..37d35413da94 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -343,7 +343,7 @@ int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
 	if (length == 0)
 		return 0;
 
-	if (mr->type == IB_MR_TYPE_DMA) {
+	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
 		while (length > 0) {
 			buf_offset = iova & ~PAGE_MASK;
 			bytes = PAGE_SIZE - buf_offset;
@@ -388,70 +388,130 @@ int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
 	return num_frags;
 }
 
-/* copy data from a range (vaddr, vaddr+length-1) to or from
- * a mr object starting at iova.
+/**
+ * rxe_copy_mr_data() - transfer data between an MR and a packet
+ * @skb: the packet buffer
+ * @mr: the MR
+ * @iova: the address in the MR
+ * @addr: the address in the packet (TO/FROM MR only)
+ * @length: the length to transfer
+ * @op: copy operation (TO MR, FROM MR or FRAG MR)
+ *
+ * Copy data from a range (addr, addr+length-1) in a packet
+ * to or from a range in an MR object at (iova, iova+length-1).
+ * Or, build a frag list referencing the MR range.
+ *
+ * Caller must verify that the access permissions support the
+ * operation.
+ *
+ * Returns: 0 on success or an error
  */
-int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
-		enum rxe_mr_copy_op op)
+int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova,
+		     void *addr, int skb_offset, int length,
+		     enum rxe_mr_copy_op op)
 {
-	int			err;
-	int			bytes;
-	u8			*va;
-	struct rxe_map		**map;
-	struct rxe_phys_buf	*buf;
-	int			m;
-	int			i;
-	size_t			offset;
+	struct rxe_phys_buf dmabuf;
+	struct rxe_phys_buf *buf;
+	struct rxe_map **map;
+	size_t buf_offset;
+	int bytes;
+	void *va;
+	int m;
+	int i;
+	int err = 0;
 
 	if (length == 0)
 		return 0;
 
-	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
-		u8 *src, *dest;
-
-		src = (op == RXE_COPY_TO_MR) ? addr : ((void *)(uintptr_t)iova);
-
-		dest = (op == RXE_COPY_TO_MR) ? ((void *)(uintptr_t)iova) : addr;
+	switch (mr->ibmr.type) {
+	case IB_MR_TYPE_DMA:
+		va = (void *)(uintptr_t)iova;
+		switch (op) {
+		case RXE_COPY_TO_MR:
+			memcpy(va, addr, length);
+			break;
+		case RXE_COPY_FROM_MR:
+			memcpy(addr, va, length);
+			break;
+		case RXE_FRAG_TO_MR:
+			err = skb_copy_bits(skb, skb_offset, va, length);
+			if (err)
+				return err;
+			break;
+		case RXE_FRAG_FROM_MR:
+			/* limit frag length to PAGE_SIZE */
+			while (length) {
+				dmabuf.addr = iova & PAGE_MASK;
+				buf_offset = iova & ~PAGE_MASK;
+				bytes = PAGE_SIZE - buf_offset;
+				if (bytes > length)
+					bytes = length;
+				err = rxe_add_frag(skb, &dmabuf, bytes,
+						   buf_offset);
+				if (err)
+					return err;
+				iova += bytes;
+				length -= bytes;
+			}
+			break;
+		}
+		return 0;
 
-		memcpy(dest, src, length);
+	case IB_MR_TYPE_MEM_REG:
+	case IB_MR_TYPE_USER:
+		break;
 
-		return 0;
+	default:
+		pr_warn("%s: mr type (%d) not supported\n",
+			__func__, mr->ibmr.type);
+		return -EINVAL;
 	}
 
 	WARN_ON_ONCE(!mr->map);
 
 	err = mr_check_range(mr, iova, length);
-	if (err) {
-		err = -EFAULT;
-		goto err1;
-	}
+	if (err)
+		return -EFAULT;
 
-	lookup_iova(mr, iova, &m, &i, &offset);
+	lookup_iova(mr, iova, &m, &i, &buf_offset);
 
 	map = mr->map + m;
-	buf	= map[0]->buf + i;
+	buf = map[0]->buf + i;
 
 	while (length > 0) {
-		u8 *src, *dest;
-
-		va	= (u8 *)(uintptr_t)buf->addr + offset;
-		src = (op == RXE_COPY_TO_MR) ? addr : va;
-		dest = (op == RXE_COPY_TO_MR) ? va : addr;
-
-		bytes	= buf->size - offset;
-
+		va = (void *)(uintptr_t)buf->addr + buf_offset;
+		bytes = buf->size - buf_offset;
 		if (bytes > length)
 			bytes = length;
 
-		memcpy(dest, src, bytes);
+		switch (op) {
+		case RXE_COPY_TO_MR:
+			memcpy(va, addr, bytes);
+			break;
+		case RXE_COPY_FROM_MR:
+			memcpy(addr, va, bytes);
+			break;
+		case RXE_FRAG_TO_MR:
+			err = skb_copy_bits(skb, skb_offset, va, bytes);
+			if (err)
+				return err;
+			break;
+		case RXE_FRAG_FROM_MR:
+			err = rxe_add_frag(skb, buf, bytes, buf_offset);
+			if (err)
+				return err;
+			break;
+		}
 
-		length	-= bytes;
-		addr	+= bytes;
+		length -= bytes;
+		addr += bytes;
 
-		offset	= 0;
+		buf_offset = 0;
+		skb_offset += bytes;
 		buf++;
 		i++;
 
+		/* we won't overrun since we checked range above */
 		if (i == RXE_BUF_PER_MAP) {
 			i = 0;
 			map++;
@@ -460,9 +520,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	}
 
 	return 0;
-
-err1:
-	return err;
 }
 
 /* copy data in or out of a wqe, i.e. sg list
@@ -535,7 +592,8 @@ int copy_data(
 		if (bytes > 0) {
 			iova = sge->addr + offset;
 
-			err = rxe_mr_copy(mr, iova, addr, bytes, op);
+			err = rxe_copy_mr_data(NULL, mr, iova, addr,
+					       0, bytes, op);
 			if (err)
 				goto err2;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 023df0562258..5f00477544fa 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -535,12 +535,15 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 static enum resp_states write_data_in(struct rxe_qp *qp,
 				      struct rxe_pkt_info *pkt)
 {
+	struct sk_buff *skb = PKT_TO_SKB(pkt);
 	enum resp_states rc = RESPST_NONE;
-	int	err;
 	int data_len = payload_size(pkt);
+	int err;
+	int skb_offset = 0;
 
-	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
-			  payload_addr(pkt), data_len, RXE_COPY_TO_MR);
+	err = rxe_copy_mr_data(skb, qp->resp.mr, qp->resp.va + qp->resp.offset,
+			  payload_addr(pkt), skb_offset, data_len,
+			  RXE_COPY_TO_MR);
 	if (err) {
 		rc = RESPST_ERR_RKEY_VIOLATION;
 		goto out;
@@ -766,6 +769,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	int err;
 	struct resp_res *res = qp->resp.res;
 	struct rxe_mr *mr;
+	int skb_offset = 0;
 
 	if (!res) {
 		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
@@ -806,15 +810,17 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	if (!skb)
 		return RESPST_ERR_RNR;
 
-	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
-			  payload, RXE_COPY_FROM_MR);
-	if (mr)
-		rxe_put(mr);
+	err = rxe_copy_mr_data(skb, mr, res->read.va, payload_addr(&ack_pkt),
+			       skb_offset, payload, RXE_COPY_FROM_MR);
 	if (err) {
 		kfree_skb(skb);
+		rxe_put(mr);
 		return RESPST_ERR_RKEY_VIOLATION;
 	}
 
+	if (mr)
+		rxe_put(mr);
+
 	if (bth_pad(&ack_pkt)) {
 		u8 *pad = payload_addr(&ack_pkt) + payload;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (6 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 08/18] RDMA/rxe: Extend rxe_mr_copy to support skb frags Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-11-24 19:16   ` Jason Gunthorpe
  2022-10-31 20:27 ` [PATCH for-next v2 10/18] RDMA/rxe: Extend copy_data to support skb frags Bob Pearson
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add routine named rxe_num_dma_frags() to compute the number of skb
frags needed to copy length bytes from a dma info struct.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h |  4 +-
 drivers/infiniband/sw/rxe/rxe_mr.c  | 67 ++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index c62fc2613a01..4c30ffaccc92 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -76,10 +76,12 @@ int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova,
 		     enum rxe_mr_copy_op op);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
+int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
+		      int length);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
 	      void *addr, int length, enum rxe_mr_copy_op op);
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
-struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
+struct rxe_mr *lookup_mr(const struct rxe_pd *pd, int access, u32 key,
 			 enum rxe_mr_lookup_type type);
 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 37d35413da94..99d0b5afefc3 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -522,6 +522,71 @@ int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova,
 	return 0;
 }
 
+/**
+ * rxe_num_dma_frags() - Count the number of skb frags needed to copy
+ *			 length bytes from a dma info struct to an skb
+ * @pd: protection domain used by dma entries
+ * @dma: dma info
+ * @length: number of bytes to copy
+ *
+ * Returns: number of frags needed or negative error
+ */
+int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
+		      int length)
+{
+	int cur_sge = dma->cur_sge;
+	const struct rxe_sge *sge = &dma->sge[cur_sge];
+	int buf_offset = dma->sge_offset;
+	int resid = dma->resid;
+	struct rxe_mr *mr = NULL;
+	int bytes;
+	u64 iova;
+	int ret;
+	int num_frags = 0;
+
+	if (length == 0)
+		return 0;
+
+	if (length > resid)
+		return -EINVAL;
+
+	while (length > 0) {
+		if (buf_offset >= sge->length) {
+			if (mr)
+				rxe_put(mr);
+
+			sge++;
+			cur_sge++;
+			buf_offset = 0;
+
+			if (cur_sge >= dma->num_sge)
+				return -ENOSPC;
+			if (!sge->length)
+				continue;
+		}
+
+		mr = lookup_mr(pd, 0, sge->lkey, RXE_LOOKUP_LOCAL);
+		if (!mr)
+			return -EINVAL;
+
+		bytes = min_t(int, length, sge->length - buf_offset);
+		if (bytes > 0) {
+			iova = sge->addr + buf_offset;
+			ret = rxe_num_mr_frags(mr, iova, length);
+			if (ret < 0) {
+				rxe_put(mr);
+				return ret;
+			}
+
+			buf_offset += bytes;
+			resid -= bytes;
+			length -= bytes;
+		}
+	}
+
+	return num_frags;
+}
+
 /* copy data in or out of a wqe, i.e. sg list
  * under the control of a dma descriptor
  */
@@ -658,7 +723,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
  * (3) verify that the mr can support the requested access
  * (4) verify that mr state is valid
  */
-struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
+struct rxe_mr *lookup_mr(const struct rxe_pd *pd, int access, u32 key,
 			 enum rxe_mr_lookup_type type)
 {
 	struct rxe_mr *mr;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 10/18] RDMA/rxe: Extend copy_data to support skb frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (7 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma Bob Pearson
@ 2022-10-31 20:27 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 11/18] RDMA/rxe: Replace rxe by qp as a parameter Bob Pearson
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

copy_data() currently supports copying between an mr and
the scatter-gather list of a wqe.

Rename copy_data() to rxe_copy_dma_data().
Extend the operations to support copying between a sg list and an skb
fragment list. Fixup calls to copy_data() to support the new API.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c |  17 ++--
 drivers/infiniband/sw/rxe/rxe_loc.h  |   5 +-
 drivers/infiniband/sw/rxe/rxe_mr.c   | 122 ++++++++++++---------------
 drivers/infiniband/sw/rxe/rxe_req.c  |  11 ++-
 drivers/infiniband/sw/rxe/rxe_resp.c |   7 +-
 5 files changed, 79 insertions(+), 83 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 77640e35ae88..3c1ecc88446d 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -352,11 +352,14 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
 				      struct rxe_pkt_info *pkt,
 				      struct rxe_send_wqe *wqe)
 {
+	struct sk_buff *skb = PKT_TO_SKB(pkt);
+	int skb_offset = 0;
 	int ret;
 
-	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
-			&wqe->dma, payload_addr(pkt),
-			payload_size(pkt), RXE_COPY_TO_MR);
+	ret = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
+				&wqe->dma, payload_addr(pkt),
+				skb_offset, payload_size(pkt),
+				RXE_COPY_TO_MR);
 	if (ret) {
 		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
@@ -372,13 +375,15 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
 					struct rxe_pkt_info *pkt,
 					struct rxe_send_wqe *wqe)
 {
+	struct sk_buff *skb = NULL;
+	int skb_offset = 0;
 	int ret;
 
 	u64 atomic_orig = atmack_orig(pkt);
 
-	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
-			&wqe->dma, &atomic_orig,
-			sizeof(u64), RXE_COPY_TO_MR);
+	ret = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
+				&wqe->dma, &atomic_orig,
+				skb_offset, sizeof(u64), RXE_COPY_TO_MR);
 	if (ret) {
 		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 4c30ffaccc92..dbead759123d 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -78,8 +78,9 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
 int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
 		      int length);
-int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
-	      void *addr, int length, enum rxe_mr_copy_op op);
+int rxe_copy_dma_data(struct sk_buff *skb, struct rxe_pd *pd, int access,
+		      struct rxe_dma_info *dma, void *addr,
+		      int skb_offset, int length, enum rxe_mr_copy_op op);
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
 struct rxe_mr *lookup_mr(const struct rxe_pd *pd, int access, u32 key,
 			 enum rxe_mr_lookup_type type);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 99d0b5afefc3..6fe5bbe43a60 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -587,100 +587,84 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
 	return num_frags;
 }
 
-/* copy data in or out of a wqe, i.e. sg list
- * under the control of a dma descriptor
+/**
+ * rxe_copy_dma_data() - transfer data between a packet and a wqe
+ * @skb: packet buffer (FRAG MR only)
+ * @pd: PD which MRs must match
+ * @access: access permission for MRs in sge (TO MR only)
+ * @dma: dma info from a wqe
+ * @addr: payload address in packet (TO/FROM MR only)
+ * @skb_offset: offset of data in skb (RXE_FRAG_TO_MR only)
+ * @length: payload length
+ * @op: copy operation (RXE_COPY_TO/FROM_MR or RXE_FRAG_TO/FROM_MR)
+ *
+ * Iterate over scatter/gather list in dma info starting from the
+ * current location until the payload length is used up and for each
+ * entry copy or build a frag list referencing the MR obtained from
+ * the lkey in the sge. This routine is called once for each packet
+ * sent or received to/from the wqe.
+ *
+ * Returns: 0 on success or an error
  */
-int copy_data(
-	struct rxe_pd		*pd,
-	int			access,
-	struct rxe_dma_info	*dma,
-	void			*addr,
-	int			length,
-	enum rxe_mr_copy_op	op)
+int rxe_copy_dma_data(struct sk_buff *skb, struct rxe_pd *pd, int access,
+		      struct rxe_dma_info *dma, void *addr,
+		      int skb_offset, int length, enum rxe_mr_copy_op op)
 {
-	int			bytes;
-	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
-	int			offset	= dma->sge_offset;
-	int			resid	= dma->resid;
-	struct rxe_mr		*mr	= NULL;
-	u64			iova;
-	int			err;
+	struct rxe_sge *sge = &dma->sge[dma->cur_sge];
+	int buf_offset = dma->sge_offset;
+	int resid = dma->resid;
+	struct rxe_mr *mr = NULL;
+	int bytes;
+	u64 iova;
+	int err = 0;
 
 	if (length == 0)
 		return 0;
 
-	if (length > resid) {
-		err = -EINVAL;
-		goto err2;
-	}
-
-	if (sge->length && (offset < sge->length)) {
-		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
-		if (!mr) {
-			err = -EINVAL;
-			goto err1;
-		}
-	}
+	if (length > resid)
+		return -EINVAL;
 
 	while (length > 0) {
-		bytes = length;
-
-		if (offset >= sge->length) {
-			if (mr) {
+		if (buf_offset >= sge->length) {
+			if (mr)
 				rxe_put(mr);
-				mr = NULL;
-			}
+
 			sge++;
 			dma->cur_sge++;
-			offset = 0;
-
-			if (dma->cur_sge >= dma->num_sge) {
-				err = -ENOSPC;
-				goto err2;
-			}
+			buf_offset = 0;
 
-			if (sge->length) {
-				mr = lookup_mr(pd, access, sge->lkey,
-					       RXE_LOOKUP_LOCAL);
-				if (!mr) {
-					err = -EINVAL;
-					goto err1;
-				}
-			} else {
+			if (dma->cur_sge >= dma->num_sge)
+				return -ENOSPC;
+			if (!sge->length)
 				continue;
-			}
 		}
 
-		if (bytes > sge->length - offset)
-			bytes = sge->length - offset;
+		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
+		if (!mr)
+			return -EINVAL;
 
+		bytes = min_t(int, length, sge->length - buf_offset);
 		if (bytes > 0) {
-			iova = sge->addr + offset;
-
-			err = rxe_copy_mr_data(NULL, mr, iova, addr,
-					       0, bytes, op);
+			iova = sge->addr + buf_offset;
+			err = rxe_copy_mr_data(skb, mr, iova, addr,
+					       skb_offset, bytes, op);
 			if (err)
-				goto err2;
+				goto err_put;
 
-			offset	+= bytes;
-			resid	-= bytes;
-			length	-= bytes;
-			addr	+= bytes;
+			addr += bytes;
+			buf_offset += bytes;
+			skb_offset += bytes;
+			resid -= bytes;
+			length -= bytes;
 		}
 	}
 
-	dma->sge_offset = offset;
-	dma->resid	= resid;
+	dma->sge_offset = buf_offset;
+	dma->resid = resid;
 
+err_put:
 	if (mr)
 		rxe_put(mr);
-
-	return 0;
-
-err2:
-	if (mr)
-		rxe_put(mr);
-err1:
 	return err;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index b111a6ddf66c..ea0132797613 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -438,8 +438,10 @@ static void rxe_init_roce_hdrs(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 }
 
 static int rxe_init_payload(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
-			    struct rxe_pkt_info *pkt, u32 payload)
+			    struct rxe_pkt_info *pkt, u32 payload,
+			    struct sk_buff *skb)
 {
+	int skb_offset = 0;
 	void *data;
 	int err = 0;
 
@@ -449,8 +451,9 @@ static int rxe_init_payload(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		wqe->dma.resid -= payload;
 		wqe->dma.sge_offset += payload;
 	} else {
-		err = copy_data(qp->pd, 0, &wqe->dma, payload_addr(pkt),
-				payload, RXE_COPY_FROM_MR);
+		err = rxe_copy_dma_data(skb, qp->pd, 0, &wqe->dma,
+					payload_addr(pkt), skb_offset,
+					payload, RXE_COPY_FROM_MR);
 	}
 
 	return err;
@@ -495,7 +498,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 	rxe_init_roce_hdrs(qp, wqe, pkt, pad);
 
 	if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
-		err = rxe_init_payload(qp, wqe, pkt, payload);
+		err = rxe_init_payload(qp, wqe, pkt, payload, skb);
 		if (err)
 			goto err_out;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 5f00477544fa..589306de7647 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -521,10 +521,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 				     int data_len)
 {
+	struct sk_buff *skb = NULL;
+	int skb_offset = 0;
 	int err;
 
-	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
-			data_addr, data_len, RXE_COPY_TO_MR);
+	err = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
+				&qp->resp.wqe->dma, data_addr,
+				skb_offset, data_len, RXE_COPY_TO_MR);
 	if (unlikely(err))
 		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
 					: RESPST_ERR_MALFORMED_WQE;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 11/18] RDMA/rxe: Replace rxe by qp as a parameter
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (8 preceding siblings ...)
  2022-10-31 20:27 ` [PATCH for-next v2 10/18] RDMA/rxe: Extend copy_data to support skb frags Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 12/18] RDMA/rxe: Extend rxe_init_packet() to support frags Bob Pearson
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Replace rxe as a parameter by qp in rxe_init_packet().
This will allow some simplification.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h  | 2 +-
 drivers/infiniband/sw/rxe/rxe_net.c  | 3 ++-
 drivers/infiniband/sw/rxe/rxe_req.c  | 2 +-
 drivers/infiniband/sw/rxe/rxe_resp.c | 3 +--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index dbead759123d..4e5fbc33277d 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -100,7 +100,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
 void rxe_mw_cleanup(struct rxe_pool_elem *elem);
 
 /* rxe_net.c */
-struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
 				struct rxe_pkt_info *pkt);
 int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
 		struct sk_buff *skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 1e4456f5cda2..faabc444d546 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -442,9 +442,10 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	return err;
 }
 
-struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
 				struct rxe_pkt_info *pkt)
 {
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	unsigned int hdr_len;
 	struct sk_buff *skb = NULL;
 	struct net_device *ndev;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index ea0132797613..0a4b8825bd55 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -491,7 +491,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 						pad + RXE_ICRC_SIZE;
 
 	/* init skb */
-	skb = rxe_init_packet(rxe, av, pkt);
+	skb = rxe_init_packet(qp, av, pkt);
 	if (unlikely(!skb))
 		goto err_out;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 589306de7647..8503d22f9114 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -665,7 +665,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 					  u32 psn,
 					  u8 syndrome)
 {
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	struct sk_buff *skb;
 	int paylen;
 	int pad;
@@ -680,7 +679,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	ack->mask = rxe_opcode[opcode].mask;
 	ack->psn = psn;
 
-	skb = rxe_init_packet(rxe, &qp->pri_av, ack);
+	skb = rxe_init_packet(qp, &qp->pri_av, ack);
 	if (!skb)
 		return NULL;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 12/18] RDMA/rxe: Extend rxe_init_packet() to support frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (9 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 11/18] RDMA/rxe: Replace rxe by qp as a parameter Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 13/18] RDMA/rxe: Extend rxe_icrc.c " Bob Pearson
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add a subroutine rxe_can_use_sg() to determine if a packet is
a candidate for a fragmented skb. Add a global variable rxe_use_sg
to control whether to support nonlinear skbs. Modify rxe_init_packet()
to test if the packet should use a fragmented skb. Fixup calls to
rxe_init_packet() to use the new API but disable creating nonlinear
skbs for now.

This is in preparation for using fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe.c      |  3 ++
 drivers/infiniband/sw/rxe/rxe.h      |  3 ++
 drivers/infiniband/sw/rxe/rxe_loc.h  |  2 +-
 drivers/infiniband/sw/rxe/rxe_mr.c   | 12 +++--
 drivers/infiniband/sw/rxe/rxe_net.c  | 79 +++++++++++++++++++++++++---
 drivers/infiniband/sw/rxe/rxe_req.c  |  2 +-
 drivers/infiniband/sw/rxe/rxe_resp.c |  7 ++-
 7 files changed, 92 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 51daac5c4feb..388d8103ec20 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -13,6 +13,9 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
 MODULE_DESCRIPTION("Soft RDMA transport");
 MODULE_LICENSE("Dual BSD/GPL");
 
+/* if true allow using fragmented skbs */
+bool rxe_use_sg;
+
 /* free resources for a rxe device all objects created for this device must
  * have been destroyed
  */
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 30fbdf3bc76a..c78fb497d9c3 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -30,6 +30,9 @@
 #include "rxe_verbs.h"
 #include "rxe_loc.h"
 
+/* if true allow using fragmented skbs */
+extern bool rxe_use_sg;
+
 /*
  * Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit
  * machines Version 2 has a different struct layout.
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 4e5fbc33277d..12fd5811cd79 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -101,7 +101,7 @@ void rxe_mw_cleanup(struct rxe_pool_elem *elem);
 
 /* rxe_net.c */
 struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
-				struct rxe_pkt_info *pkt);
+				struct rxe_pkt_info *pkt, bool *is_frag);
 int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
 		struct sk_buff *skb);
 int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 6fe5bbe43a60..cf538d97c7a5 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -541,7 +541,7 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
 	struct rxe_mr *mr = NULL;
 	int bytes;
 	u64 iova;
-	int ret;
+	int nf;
 	int num_frags = 0;
 
 	if (length == 0)
@@ -572,18 +572,22 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
 		bytes = min_t(int, length, sge->length - buf_offset);
 		if (bytes > 0) {
 			iova = sge->addr + buf_offset;
-			ret = rxe_num_mr_frags(mr, iova, length);
-			if (ret < 0) {
+			nf = rxe_num_mr_frags(mr, iova, length);
+			if (nf < 0) {
 				rxe_put(mr);
-				return ret;
+				return nf;
 			}
 
+			num_frags += nf;
 			buf_offset += bytes;
 			resid -= bytes;
 			length -= bytes;
 		}
 	}
 
+	if (mr)
+		rxe_put(mr);
+
 	return num_frags;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index faabc444d546..c6d8f5c80562 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -442,8 +442,60 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	return err;
 }
 
+/**
+ * rxe_can_use_sg() - determine if packet is a candidate for fragmenting
+ * @rxe: the rxe device
+ * @pkt: packet info
+ *
+ * Limit to packets with:
+ *	rxe_use_sg set
+ *	qp is RC
+ *	ndev supports SG
+ *	#sges less than #frags for sends
+ *
+ * Returns: true if conditions are met else 0
+ */
+static bool rxe_can_use_sg(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	int length = pkt->paylen - rxe_opcode[pkt->opcode].length
+				 - RXE_ICRC_SIZE;
+	int nf;
+
+	if (!rxe_use_sg)
+		return false;
+	if (qp_type(pkt->qp) != IB_QPT_RC)
+		return false;
+	if (!(rxe->ndev->features & NETIF_F_SG))
+		return false;
+
+	/* check we don't have a pathological sge list with lots of
+	 * short segments. Recall we need one extra frag for icrc.
+	 */
+	if (pkt->mask & RXE_SEND_MASK) {
+		nf = rxe_num_dma_frags(qp->pd, &pkt->wqe->dma, length);
+		return (nf >= 0 && nf <= MAX_SKB_FRAGS - 1) ? true : false;
+	}
+
+	return true;
+}
+
+#define RXE_MIN_SKB_SIZE (256)
+
+/**
+ * rxe_init_packet - allocate and initialize a new skb
+ * @qp: the queue pair
+ * @av: remote address vector
+ * @pkt: packet info
+ * @frag: optional return value for fragmented skb
+ *	  on call if frag == NULL do not use fragmented skb
+ *	  on return if not NULL set *frag to 1
+ *	  if packet will be fragmented else 0
+ *
+ * Returns: an skb on success else NULL
+ */
 struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
-				struct rxe_pkt_info *pkt)
+				struct rxe_pkt_info *pkt, bool *frag)
 {
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	unsigned int hdr_len;
@@ -451,6 +503,7 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
 	struct net_device *ndev;
 	const struct ib_gid_attr *attr;
 	const int port_num = 1;
+	int skb_size;
 
 	attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
 	if (IS_ERR(attr))
@@ -469,9 +522,19 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
 		rcu_read_unlock();
 		goto out;
 	}
-	skb = alloc_skb(pkt->paylen + hdr_len + LL_RESERVED_SPACE(ndev),
-			GFP_ATOMIC);
 
+	skb_size = LL_RESERVED_SPACE(ndev) + hdr_len + pkt->paylen;
+	if (frag) {
+		if (rxe_use_sg && (skb_size > RXE_MIN_SKB_SIZE) &&
+		    rxe_can_use_sg(qp, pkt)) {
+			skb_size = RXE_MIN_SKB_SIZE;
+			*frag = true;
+		} else {
+			*frag = false;
+		}
+	}
+
+	skb = alloc_skb(skb_size, GFP_ATOMIC);
 	if (unlikely(!skb)) {
 		rcu_read_unlock();
 		goto out;
@@ -480,7 +543,7 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
 	skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev));
 
 	/* FIXME: hold reference to this netdev until life of this skb. */
-	skb->dev	= ndev;
+	skb->dev = ndev;
 	rcu_read_unlock();
 
 	if (av->network_type == RXE_NETWORK_TYPE_IPV4)
@@ -488,10 +551,10 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 
-	pkt->rxe	= rxe;
-	pkt->port_num	= port_num;
-	pkt->hdr	= skb_put(skb, pkt->paylen);
-	pkt->mask	|= RXE_GRH_MASK;
+	if (frag && *frag)
+		pkt->hdr = skb_put(skb, rxe_opcode[pkt->opcode].length);
+	else
+		pkt->hdr = skb_put(skb, pkt->paylen);
 
 out:
 	rdma_put_gid_attr(attr);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 0a4b8825bd55..71a65f2a5d6d 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -491,7 +491,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 						pad + RXE_ICRC_SIZE;
 
 	/* init skb */
-	skb = rxe_init_packet(qp, av, pkt);
+	skb = rxe_init_packet(qp, av, pkt, NULL);
 	if (unlikely(!skb))
 		goto err_out;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 8503d22f9114..8868415b71b6 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -665,6 +665,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 					  u32 psn,
 					  u8 syndrome)
 {
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	struct sk_buff *skb;
 	int paylen;
 	int pad;
@@ -672,14 +673,16 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 
 	pad = (-payload) & 0x3;
 	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
-	ack->paylen = paylen;
 
+	ack->rxe = rxe;
 	ack->qp = qp;
 	ack->opcode = opcode;
 	ack->mask = rxe_opcode[opcode].mask;
+	ack->paylen = paylen;
 	ack->psn = psn;
+	ack->port_num = 1;
 
-	skb = rxe_init_packet(qp, &qp->pri_av, ack);
+	skb = rxe_init_packet(qp, &qp->pri_av, ack, NULL);
 	if (!skb)
 		return NULL;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 13/18] RDMA/rxe: Extend rxe_icrc.c to support frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (10 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 12/18] RDMA/rxe: Extend rxe_init_packet() to support frags Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 14/18] RDMA/rxe: Extend rxe_init_req_packet() for frags Bob Pearson
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend the subroutines rxe_icrc_generate() and rxe_icrc_check()
to support skb frags.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_icrc.c | 65 ++++++++++++++++++++++++----
 drivers/infiniband/sw/rxe/rxe_net.c  | 55 ++++++++++++++++++-----
 drivers/infiniband/sw/rxe/rxe_recv.c |  1 +
 3 files changed, 100 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 46bb07c5c4df..699730a13c92 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -63,7 +63,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
 
 /**
  * rxe_icrc_hdr() - Compute the partial ICRC for the network and transport
- *		  headers of a packet.
+ *		    headers of a packet.
  * @skb: packet buffer
  * @pkt: packet information
  *
@@ -129,6 +129,56 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 	return crc;
 }
 
+/**
+ * rxe_icrc_payload() - Compute the ICRC for a packet payload and also
+ *			compute the address of the icrc in the packet.
+ * @skb: packet buffer
+ * @pkt: packet information
+ * @icrc: current icrc i.e. including headers
+ * @icrcp: returned pointer to icrc in skb
+ *
+ * Return: 0 if the values match else an error
+ */
+static __be32 rxe_icrc_payload(struct sk_buff *skb, struct rxe_pkt_info *pkt,
+			       __be32 icrc, __be32 **icrcp)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	skb_frag_t *frag;
+	u8 *addr;
+	int hdr_len;
+	int len;
+	int i;
+
+	/* handle any payload left in the linear buffer */
+	hdr_len = rxe_opcode[pkt->opcode].length;
+	addr = pkt->hdr + hdr_len;
+	len = skb_tail_pointer(skb) - skb_transport_header(skb)
+		- sizeof(struct udphdr) - hdr_len;
+	if (!shinfo->nr_frags) {
+		len -= RXE_ICRC_SIZE;
+		*icrcp = (__be32 *)(addr + len);
+	}
+	if (len > 0)
+		icrc = rxe_crc32(pkt->rxe, icrc, payload_addr(pkt), len);
+	WARN_ON(len < 0);
+
+	/* handle any payload in frags */
+	for (i = 0; i < shinfo->nr_frags; i++) {
+		frag = &shinfo->frags[i];
+		addr = page_to_virt(frag->bv_page) + frag->bv_offset;
+		len = frag->bv_len;
+		if (i == shinfo->nr_frags - 1) {
+			len -= RXE_ICRC_SIZE;
+			*icrcp = (__be32 *)(addr + len);
+		}
+		if (len > 0)
+			icrc = rxe_crc32(pkt->rxe, icrc, addr, len);
+		WARN_ON(len < 0);
+	}
+
+	return icrc;
+}
+
 /**
  * rxe_icrc_check() - Compute ICRC for a packet and compare to the ICRC
  *		      delivered in the packet.
@@ -143,13 +193,11 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 	__be32 pkt_icrc;
 	__be32 icrc;
 
-	icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
-	pkt_icrc = *icrcp;
-
 	icrc = rxe_icrc_hdr(skb, pkt);
-	icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt),
-				payload_size(pkt) + bth_pad(pkt));
+	icrc = rxe_icrc_payload(skb, pkt, icrc, &icrcp);
+
 	icrc = ~icrc;
+	pkt_icrc = *icrcp;
 
 	if (unlikely(icrc != pkt_icrc))
 		return -EINVAL;
@@ -167,9 +215,8 @@ void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 	__be32 *icrcp;
 	__be32 icrc;
 
-	icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
 	icrc = rxe_icrc_hdr(skb, pkt);
-	icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt),
-				payload_size(pkt) + bth_pad(pkt));
+	icrc = rxe_icrc_payload(skb, pkt, icrc, &icrcp);
+
 	*icrcp = ~icrc;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index c6d8f5c80562..395e9d7d81c3 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -134,32 +134,51 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct rxe_dev *rxe;
 	struct net_device *ndev = skb->dev;
 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+	u8 opcode;
+	u8 buf[1];
+	u8 *p;
 
-	/* takes a reference on rxe->ib_dev
-	 * drop when skb is freed
-	 */
+	/* Takes a reference on rxe->ib_dev. Drop when skb is freed */
 	rxe = rxe_get_dev_from_net(ndev);
 	if (!rxe && is_vlan_dev(ndev))
 		rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
 	if (!rxe)
-		goto drop;
+		goto err_drop;
 
-	if (skb_linearize(skb)) {
-		ib_device_put(&rxe->ib_dev);
-		goto drop;
+	/* Get bth opcode out of skb */
+	p = skb_header_pointer(skb, sizeof(struct udphdr), 1, buf);
+	if (!p)
+		goto err_device_put;
+	opcode = *p;
+
+	/* If using fragmented skbs make sure roce headers
+	 * are in linear buffer else make skb linear
+	 */
+	if (rxe_use_sg && skb_is_nonlinear(skb)) {
+		int delta = rxe_opcode[opcode].length -
+			(skb_headlen(skb) - sizeof(struct udphdr));
+
+		if (delta > 0 && !__pskb_pull_tail(skb, delta))
+			goto err_device_put;
+	} else {
+		if (skb_linearize(skb))
+			goto err_device_put;
 	}
 
 	udph = udp_hdr(skb);
 	pkt->rxe = rxe;
 	pkt->port_num = 1;
 	pkt->hdr = (u8 *)(udph + 1);
-	pkt->mask = RXE_GRH_MASK;
+	pkt->mask = rxe_opcode[opcode].mask | RXE_GRH_MASK;
 	pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
 
 	rxe_rcv(skb);
 
 	return 0;
-drop:
+
+err_device_put:
+	ib_device_put(&rxe->ib_dev);
+err_drop:
 	kfree_skb(skb);
 
 	return 0;
@@ -385,21 +404,32 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
  */
 static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 {
-	memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
+	struct rxe_pkt_info *newpkt;
+	int err;
 
+	/* make loopback line up with rxe_udp_encap_recv */
 	if (skb->protocol == htons(ETH_P_IP))
 		skb_pull(skb, sizeof(struct iphdr));
 	else
 		skb_pull(skb, sizeof(struct ipv6hdr));
+	skb_reset_transport_header(skb);
+
+	newpkt = SKB_TO_PKT(skb);
+	memcpy(newpkt, pkt, sizeof(*newpkt));
+	newpkt->hdr = skb_transport_header(skb) + sizeof(struct udphdr);
 
 	if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) {
 		kfree_skb(skb);
-		return -EIO;
+		err = -EINVAL;
+		goto drop;
 	}
 
 	rxe_rcv(skb);
-
 	return 0;
+
+drop:
+	kfree_skb(skb);
+	return err;
 }
 
 int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
@@ -415,6 +445,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 		goto drop;
 	}
 
+	/* skb->data points at IP header */
 	rxe_icrc_generate(skb, pkt);
 
 	if (pkt->mask & RXE_LOOPBACK_MASK)
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 434a693cd4a5..ba786e5c6266 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -329,6 +329,7 @@ void rxe_rcv(struct sk_buff *skb)
 	if (unlikely(err))
 		goto drop;
 
+	/* skb->data points at UDP header */
 	err = rxe_icrc_check(skb, pkt);
 	if (unlikely(err))
 		goto drop;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 14/18] RDMA/rxe: Extend rxe_init_req_packet() for frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (11 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 13/18] RDMA/rxe: Extend rxe_icrc.c " Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 15/18] RDMA/rxe: Extend response packets " Bob Pearson
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add code to rxe_build_req_packet() to allocate space for the
pad and icrc if the skb is fragmented.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h |  9 +++-
 drivers/infiniband/sw/rxe/rxe_req.c | 74 ++++++++++++++++++++++++-----
 2 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 12fd5811cd79..cab6acad7a83 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -179,8 +179,15 @@ void rxe_srq_cleanup(struct rxe_pool_elem *elem);
 
 void rxe_dealloc(struct ib_device *ib_dev);
 
-int rxe_completer(void *arg);
+/* rxe_req.c */
+int rxe_prepare_pad_icrc(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+			  int payload, bool frag);
 int rxe_requester(void *arg);
+
+/* rxe_comp.c */
+int rxe_completer(void *arg);
+
+/* rxe_resp.c */
 int rxe_responder(void *arg);
 
 /* rxe_icrc.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 71a65f2a5d6d..984e3e957aef 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -438,27 +438,79 @@ static void rxe_init_roce_hdrs(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 }
 
 static int rxe_init_payload(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
-			    struct rxe_pkt_info *pkt, u32 payload,
-			    struct sk_buff *skb)
+			    struct rxe_pkt_info *pkt, int pad, u32 payload,
+			    struct sk_buff *skb, bool frag)
 {
+	int len = skb_tailroom(skb);
+	int tot_len = payload + pad + RXE_ICRC_SIZE;
+	int access = 0;
 	int skb_offset = 0;
+	int op;
+	void *addr;
 	void *data;
 	int err = 0;
 
 	if (wqe->wr.send_flags & IB_SEND_INLINE) {
+		if (WARN_ON(frag))
+			return -EINVAL;
+		if (len < tot_len)
+			return -EINVAL;
 		data = &wqe->dma.inline_data[wqe->dma.sge_offset];
 		memcpy(payload_addr(pkt), data, payload);
 		wqe->dma.resid -= payload;
 		wqe->dma.sge_offset += payload;
 	} else {
-		err = rxe_copy_dma_data(skb, qp->pd, 0, &wqe->dma,
-					payload_addr(pkt), skb_offset,
-					payload, RXE_COPY_FROM_MR);
+		op = frag ? RXE_FRAG_FROM_MR : RXE_COPY_FROM_MR;
+		addr = frag ? NULL : payload_addr(pkt);
+		err = rxe_copy_dma_data(skb, qp->pd, access, &wqe->dma,
+					addr, skb_offset, payload, op);
 	}
 
 	return err;
 }
 
+/**
+ * rxe_prepare_pad_icrc() - Alloc space if fragmented and init pad and icrc
+ * @pkt: packet info
+ * @skb: packet buffer
+ * @payload: roce payload
+ * @frag: true if skb is fragmented
+ *
+ * Returns: 0 on success else an error
+ */
+int rxe_prepare_pad_icrc(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+			 int payload, bool frag)
+{
+	struct rxe_phys_buf dmabuf;
+	size_t offset;
+	u64 iova;
+	u8 *addr;
+	int err = 0;
+	int pad = (-payload) & 0x3;
+
+	if (frag) {
+		/* allocate bytes at the end of the skb linear buffer
+		 * and build a frag pointing at it
+		 */
+		WARN_ON((skb->end - skb->tail) < 8);
+		addr = skb_end_pointer(skb) - RXE_ICRC_SIZE - pad;
+		iova = (uintptr_t)addr;
+		dmabuf.addr = iova & PAGE_MASK;
+		offset = iova & ~PAGE_MASK;
+		err = rxe_add_frag(skb, &dmabuf, pad + RXE_ICRC_SIZE, offset);
+		if (err)
+			goto err;
+	} else {
+		addr = payload_addr(pkt) + payload;
+	}
+
+	/* init pad and icrc to zero */
+	memset(addr, 0, pad + RXE_ICRC_SIZE);
+
+err:
+	return err;
+}
+
 static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 					   struct rxe_send_wqe *wqe,
 					   int opcode, u32 payload,
@@ -468,9 +520,9 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 	struct sk_buff *skb = NULL;
 	struct rxe_av *av;
 	struct rxe_ah *ah = NULL;
-	void *padp;
 	int pad;
 	int err = -EINVAL;
+	bool frag = false;
 
 	pkt->rxe = rxe;
 	pkt->opcode = opcode;
@@ -498,15 +550,15 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 	rxe_init_roce_hdrs(qp, wqe, pkt, pad);
 
 	if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
-		err = rxe_init_payload(qp, wqe, pkt, payload, skb);
+		err = rxe_init_payload(qp, wqe, pkt, pad, payload, skb, frag);
 		if (err)
 			goto err_out;
 	}
 
-	if (pad) {
-		padp = payload_addr(pkt) + payload;
-		memset(padp, 0, pad);
-	}
+	/* handle pad and icrc */
+	err = rxe_prepare_pad_icrc(pkt, skb, payload, frag);
+	if (err)
+		goto err_out;
 
 	/* IP and UDP network headers */
 	err = rxe_prepare(av, pkt, skb);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 15/18] RDMA/rxe: Extend response packets for frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (12 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 14/18] RDMA/rxe: Extend rxe_init_req_packet() for frags Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 16/18] RDMA/rxe: Extend send/write_data_in() " Bob Pearson
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend prepare_ack_packet(), read_reply() and send_common_ack() in
rxe_resp.c to support fragmented skbs.  Adjust calls to these routines
for the changed API.

This is in preparation for using fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 89 +++++++++++++++++-----------
 1 file changed, 55 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 8868415b71b6..905e19ee9ca5 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -660,10 +660,8 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
 
 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 					  struct rxe_pkt_info *ack,
-					  int opcode,
-					  int payload,
-					  u32 psn,
-					  u8 syndrome)
+					  int opcode, int payload, u32 psn,
+					  u8 syndrome, bool *fragp)
 {
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	struct sk_buff *skb;
@@ -682,7 +680,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	ack->psn = psn;
 	ack->port_num = 1;
 
-	skb = rxe_init_packet(qp, &qp->pri_av, ack, NULL);
+	skb = rxe_init_packet(qp, &qp->pri_av, ack, fragp);
 	if (!skb)
 		return NULL;
 
@@ -698,12 +696,14 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 		atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
 
 	err = rxe_prepare(&qp->pri_av, ack, skb);
-	if (err) {
-		kfree_skb(skb);
-		return NULL;
-	}
+	if (err)
+		goto err_free_skb;
 
 	return skb;
+
+err_free_skb:
+	kfree_skb(skb);
+	return NULL;
 }
 
 /**
@@ -775,6 +775,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	struct resp_res *res = qp->resp.res;
 	struct rxe_mr *mr;
 	int skb_offset = 0;
+	bool frag;
+	enum rxe_mr_copy_op op;
 
 	if (!res) {
 		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
@@ -787,8 +789,10 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 			qp->resp.mr = NULL;
 		} else {
 			mr = rxe_recheck_mr(qp, res->read.rkey);
-			if (!mr)
-				return RESPST_ERR_RKEY_VIOLATION;
+			if (!mr) {
+				state = RESPST_ERR_RKEY_VIOLATION;
+				goto err_out;
+			}
 		}
 
 		if (res->read.resid <= mtu)
@@ -797,8 +801,10 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
 	} else {
 		mr = rxe_recheck_mr(qp, res->read.rkey);
-		if (!mr)
-			return RESPST_ERR_RKEY_VIOLATION;
+		if (!mr) {
+			state = RESPST_ERR_RKEY_VIOLATION;
+			goto err_out;
+		}
 
 		if (res->read.resid > mtu)
 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
@@ -806,35 +812,35 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
 	}
 
-	res->state = rdatm_res_state_next;
-
 	payload = min_t(int, res->read.resid, mtu);
 
 	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
-				 res->cur_psn, AETH_ACK_UNLIMITED);
-	if (!skb)
-		return RESPST_ERR_RNR;
+				 res->cur_psn, AETH_ACK_UNLIMITED, &frag);
+	if (!skb) {
+		state = RESPST_ERR_RNR;
+		goto err_put_mr;
+	}
 
+	op = frag ? RXE_FRAG_FROM_MR : RXE_COPY_FROM_MR;
 	err = rxe_copy_mr_data(skb, mr, res->read.va, payload_addr(&ack_pkt),
-			       skb_offset, payload, RXE_COPY_FROM_MR);
+			       skb_offset, payload, op);
 	if (err) {
-		kfree_skb(skb);
-		rxe_put(mr);
-		return RESPST_ERR_RKEY_VIOLATION;
+		state = RESPST_ERR_RKEY_VIOLATION;
+		goto err_free_skb;
 	}
 
-	if (mr)
-		rxe_put(mr);
-
-	if (bth_pad(&ack_pkt)) {
-		u8 *pad = payload_addr(&ack_pkt) + payload;
-
-		memset(pad, 0, bth_pad(&ack_pkt));
+	err = rxe_prepare_pad_icrc(&ack_pkt, skb, payload, frag);
+	if (err) {
+		state = RESPST_ERR_RNR;
+		goto err_free_skb;
 	}
 
 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
-	if (err)
-		return RESPST_ERR_RNR;
+	if (err) {
+		/* rxe_xmit_packet will consume the packet */
+		state = RESPST_ERR_RNR;
+		goto err_put_mr;
+	}
 
 	res->read.va += payload;
 	res->read.resid -= payload;
@@ -851,6 +857,16 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 		state = RESPST_CLEANUP;
 	}
 
+	/* keep these after all error exits */
+	res->state = rdatm_res_state_next;
+	rxe_put(mr);
+	return state;
+
+err_free_skb:
+	kfree_skb(skb);
+err_put_mr:
+	rxe_put(mr);
+err_out:
 	return state;
 }
 
@@ -1041,14 +1057,19 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
 				  int opcode, const char *msg)
 {
 	int err;
-	struct rxe_pkt_info ack_pkt;
+	struct rxe_pkt_info ack;
 	struct sk_buff *skb;
+	int payload = 0;
 
-	skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
+	skb = prepare_ack_packet(qp, &ack, opcode, payload,
+				 psn, syndrome, NULL);
 	if (!skb)
 		return -ENOMEM;
 
-	err = rxe_xmit_packet(qp, &ack_pkt, skb);
+	/* doesn't fail if frag == false */
+	(void)rxe_prepare_pad_icrc(&ack, skb, payload, false);
+
+	err = rxe_xmit_packet(qp, &ack, skb);
 	if (err)
 		pr_err_ratelimited("Failed sending %s\n", msg);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 16/18] RDMA/rxe: Extend send/write_data_in() for frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (13 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 15/18] RDMA/rxe: Extend response packets " Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 17/18] RDMA/rxe: Extend do_read() in rxe_comp,c " Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 18/18] RDMA/rxe: Enable sg code in rxe Bob Pearson
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend send_data_in() and write_data_in() in rxe_resp.c to
support fragmented received skbs.

This is in preparation for using fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 103 +++++++++++++++++----------
 1 file changed, 65 insertions(+), 38 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 905e19ee9ca5..419e8af235aa 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -518,45 +518,89 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	return state;
 }
 
-static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
-				     int data_len)
+/**
+ * rxe_send_data_in() - Copy payload data into receive buffer
+ * @qp: The queue pair
+ * @pkt: Request packet info
+ *
+ * Copy the packet payload into the receive buffer at the current offset.
+ * If a UD message also copy the IP header into the receive buffer.
+ *
+ * Returns: 0 if successful else an error resp_states value.
+ */
+static enum resp_states rxe_send_data_in(struct rxe_qp *qp,
+					 struct rxe_pkt_info *pkt)
 {
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb = PKT_TO_SKB(pkt);
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	u8 *data_addr = payload_addr(pkt);
+	int data_len = payload_size(pkt);
+	union rdma_network_hdr hdr;
+	enum rxe_mr_copy_op op;
 	int skb_offset = 0;
 	int err;
 
+	/* Per IBA for UD packets copy the IP header into the receive buffer */
+	if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) {
+		if (skb->protocol == htons(ETH_P_IP)) {
+			memset(&hdr.reserved, 0, sizeof(hdr.reserved));
+			memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh));
+		} else {
+			memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr));
+		}
+		err = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
+					&qp->resp.wqe->dma, &hdr, skb_offset,
+					sizeof(hdr), RXE_COPY_TO_MR);
+		if (err)
+			goto err_out;
+	}
+
+	op = nr_frags ? RXE_FRAG_TO_MR : RXE_COPY_TO_MR;
+	skb_offset = data_addr - skb_transport_header(skb);
 	err = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
 				&qp->resp.wqe->dma, data_addr,
-				skb_offset, data_len, RXE_COPY_TO_MR);
-	if (unlikely(err))
-		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
-					: RESPST_ERR_MALFORMED_WQE;
+				skb_offset, data_len, op);
+	if (err)
+		goto err_out;
 
 	return RESPST_NONE;
+
+err_out:
+	return (err == -ENOSPC) ? RESPST_ERR_LENGTH
+				: RESPST_ERR_MALFORMED_WQE;
 }
 
-static enum resp_states write_data_in(struct rxe_qp *qp,
-				      struct rxe_pkt_info *pkt)
+/**
+ * rxe_write_data_in() - Copy payload data to iova
+ * @qp: The queue pair
+ * @pkt: Request packet info
+ *
+ * Copy the packet payload to current iova and update iova.
+ *
+ * Returns: 0 if successful else an error resp_states value.
+ */
+static enum resp_states rxe_write_data_in(struct rxe_qp *qp,
+					  struct rxe_pkt_info *pkt)
 {
 	struct sk_buff *skb = PKT_TO_SKB(pkt);
-	enum resp_states rc = RESPST_NONE;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	u8 *data_addr = payload_addr(pkt);
 	int data_len = payload_size(pkt);
+	enum rxe_mr_copy_op op;
+	int skb_offset;
 	int err;
-	int skb_offset = 0;
 
+	op = nr_frags ? RXE_FRAG_TO_MR : RXE_COPY_TO_MR;
+	skb_offset = data_addr - skb_transport_header(skb);
 	err = rxe_copy_mr_data(skb, qp->resp.mr, qp->resp.va + qp->resp.offset,
-			  payload_addr(pkt), skb_offset, data_len,
-			  RXE_COPY_TO_MR);
-	if (err) {
-		rc = RESPST_ERR_RKEY_VIOLATION;
-		goto out;
-	}
+			  data_addr, skb_offset, data_len, op);
+	if (err)
+		return RESPST_ERR_RKEY_VIOLATION;
 
 	qp->resp.va += data_len;
 	qp->resp.resid -= data_len;
 
-out:
-	return rc;
+	return RESPST_NONE;
 }
 
 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
@@ -884,30 +928,13 @@ static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 {
 	enum resp_states err;
-	struct sk_buff *skb = PKT_TO_SKB(pkt);
-	union rdma_network_hdr hdr;
 
 	if (pkt->mask & RXE_SEND_MASK) {
-		if (qp_type(qp) == IB_QPT_UD ||
-		    qp_type(qp) == IB_QPT_GSI) {
-			if (skb->protocol == htons(ETH_P_IP)) {
-				memset(&hdr.reserved, 0,
-						sizeof(hdr.reserved));
-				memcpy(&hdr.roce4grh, ip_hdr(skb),
-						sizeof(hdr.roce4grh));
-				err = send_data_in(qp, &hdr, sizeof(hdr));
-			} else {
-				err = send_data_in(qp, ipv6_hdr(skb),
-						sizeof(hdr));
-			}
-			if (err)
-				return err;
-		}
-		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
+		err = rxe_send_data_in(qp, pkt);
 		if (err)
 			return err;
 	} else if (pkt->mask & RXE_WRITE_MASK) {
-		err = write_data_in(qp, pkt);
+		err = rxe_write_data_in(qp, pkt);
 		if (err)
 			return err;
 	} else if (pkt->mask & RXE_READ_MASK) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 17/18] RDMA/rxe: Extend do_read() in rxe_comp,c for frags
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (14 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 16/18] RDMA/rxe: Extend send/write_data_in() " Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  2022-10-31 20:28 ` [PATCH for-next v2 18/18] RDMA/rxe: Enable sg code in rxe Bob Pearson
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Extend do_read() in rxe_comp.c to support fragmented skbs.

Rename rxe_do_read(). Adjust caller's API.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 40 ++++++++++++++++++----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 3c1ecc88446d..85b3a4a6b55b 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -348,22 +348,34 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 	return COMPST_ERROR;
 }
 
-static inline enum comp_state do_read(struct rxe_qp *qp,
-				      struct rxe_pkt_info *pkt,
-				      struct rxe_send_wqe *wqe)
+/**
+ * rxe_do_read() - Process read reply packet
+ * @qp: The queue pair
+ * @pkt: Packet info
+ * @wqe: The current work request
+ *
+ * Copy payload from incoming read reply packet into current
+ * iova.
+ *
+ * Returns: 0 on success else an error comp_state
+ */
+static inline enum comp_state rxe_do_read(struct rxe_qp *qp,
+					  struct rxe_pkt_info *pkt,
+					  struct rxe_send_wqe *wqe)
 {
 	struct sk_buff *skb = PKT_TO_SKB(pkt);
-	int skb_offset = 0;
-	int ret;
-
-	ret = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
-				&wqe->dma, payload_addr(pkt),
-				skb_offset, payload_size(pkt),
-				RXE_COPY_TO_MR);
-	if (ret) {
-		wqe->status = IB_WC_LOC_PROT_ERR;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	u8 *data_addr = payload_addr(pkt);
+	int data_len = payload_size(pkt);
+	enum rxe_mr_copy_op op = nr_frags ? RXE_FRAG_TO_MR : RXE_COPY_TO_MR;
+	int skb_offset = data_addr - skb_transport_header(skb);
+	int err;
+
+	err = rxe_copy_dma_data(skb, qp->pd, IB_ACCESS_LOCAL_WRITE,
+				&wqe->dma, data_addr,
+				skb_offset, data_len, op);
+	if (err)
 		return COMPST_ERROR;
-	}
 
 	if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
 		return COMPST_COMP_ACK;
@@ -625,7 +637,7 @@ int rxe_completer(void *arg)
 			break;
 
 		case COMPST_READ:
-			state = do_read(qp, pkt, wqe);
+			state = rxe_do_read(qp, pkt, wqe);
 			break;
 
 		case COMPST_ATOMIC:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH for-next v2 18/18] RDMA/rxe: Enable sg code in rxe
  2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
                   ` (15 preceding siblings ...)
  2022-10-31 20:28 ` [PATCH for-next v2 17/18] RDMA/rxe: Extend do_read() in rxe_comp,c " Bob Pearson
@ 2022-10-31 20:28 ` Bob Pearson
  16 siblings, 0 replies; 36+ messages in thread
From: Bob Pearson @ 2022-10-31 20:28 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Make changes to enable sg code in rxe.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe.c     | 2 +-
 drivers/infiniband/sw/rxe/rxe_req.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 388d8103ec20..fd5e916ecce9 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -14,7 +14,7 @@ MODULE_DESCRIPTION("Soft RDMA transport");
 MODULE_LICENSE("Dual BSD/GPL");
 
 /* if true allow using fragmented skbs */
-bool rxe_use_sg;
+bool rxe_use_sg = true;
 
 /* free resources for a rxe device all objects created for this device must
  * have been destroyed
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 984e3e957aef..a3760a84aa5d 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -521,8 +521,8 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 	struct rxe_av *av;
 	struct rxe_ah *ah = NULL;
 	int pad;
+	bool frag;
 	int err = -EINVAL;
-	bool frag = false;
 
 	pkt->rxe = rxe;
 	pkt->opcode = opcode;
@@ -543,7 +543,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
 						pad + RXE_ICRC_SIZE;
 
 	/* init skb */
-	skb = rxe_init_packet(qp, av, pkt, NULL);
+	skb = rxe_init_packet(qp, av, pkt, &frag);
 	if (unlikely(!skb))
 		goto err_out;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops
  2022-10-31 20:27 ` [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops Bob Pearson
@ 2022-11-24 19:05   ` Jason Gunthorpe
  0 siblings, 0 replies; 36+ messages in thread
From: Jason Gunthorpe @ 2022-11-24 19:05 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Mon, Oct 31, 2022 at 03:27:54PM -0500, Bob Pearson wrote:
> +/**
> + * enum rxe_mr_copy_op - Operations peformed by rxe_copy_mr/dma_data()
> + * @RXE_COPY_TO_MR:	Copy data from packet to MR(s)
> + * @RXE_COPY_FROM_MR:	Copy data from MR(s) to packet
> + * @RXE_FRAG_TO_MR:	Copy data from frag list to MR(s)
> + * @RXE_FRAG_FROM_MR:	Copy data from MR(s) to frag list
> + */
> +enum rxe_mr_copy_op {
> +	RXE_COPY_TO_MR,
> +	RXE_COPY_FROM_MR,
> +	RXE_FRAG_TO_MR,
> +	RXE_FRAG_FROM_MR,

These FRAG ones are not used in this patch, add them in a patch that
implements them

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-10-31 20:27 ` [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c Bob Pearson
@ 2022-11-24 19:10   ` Jason Gunthorpe
  2022-11-30 20:53     ` Bob Pearson
  0 siblings, 1 reply; 36+ messages in thread
From: Jason Gunthorpe @ 2022-11-24 19:10 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Mon, Oct 31, 2022 at 03:27:55PM -0500, Bob Pearson wrote:
> +int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
> +		 int length, int offset)
> +{
> +	int nr_frags = skb_shinfo(skb)->nr_frags;
> +	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
> +
> +	if (nr_frags >= MAX_SKB_FRAGS) {
> +		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
> +			__func__, nr_frags);
> +		return -EINVAL;
> +	}
> +
> +	frag->bv_len = length;
> +	frag->bv_offset = offset;
> +	frag->bv_page = virt_to_page(buf->addr);

Assuming this is even OK to do, then please do the xarray conversion
I sketched first:

https://lore.kernel.org/linux-rdma/Y3gvZr6%2FNCii9Avy@nvidia.com/

And this operation is basically a xa_for_each loop taking 'struct page
*' off of the MR's xarray, slicing it, then stuffing into the
skb. Don't call virt_to_page()

*However* I have no idea if it is even safe to stuff unstable pages
into a skb. Are there other examples doing this? Eg zero copy tcp?

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags
  2022-10-31 20:27 ` [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags Bob Pearson
@ 2022-11-24 19:15   ` Jason Gunthorpe
  0 siblings, 0 replies; 36+ messages in thread
From: Jason Gunthorpe @ 2022-11-24 19:15 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Mon, Oct 31, 2022 at 03:27:56PM -0500, Bob Pearson wrote:
> Add a subroutine named rxe_num_mr_frags() to compute the
> number of skb frags needed to hold length bytes in an skb
> when sending data from an mr starting at iova.
> 
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_loc.h |  1 +
>  drivers/infiniband/sw/rxe/rxe_mr.c  | 68 +++++++++++++++++++++++++++++
>  2 files changed, 69 insertions(+)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 81a611778d44..87fb052c1d0a 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -70,6 +70,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
>  int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>  		 int length, int offset);
> +int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length);
>  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>  		enum rxe_mr_copy_op op);
>  int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index 2dcf37f32330..23abcf2a0198 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -320,6 +320,74 @@ int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>  	return 0;
>  }
>  
> +/**
> + * rxe_num_mr_frags() - Compute the number of skb frags needed to copy
> + *			length bytes from an mr to an skb frag list.
> + * @mr: mr to copy data from
> + * @iova: iova in memory region as starting point
> + * @length: number of bytes to transfer
> + *
> + * Returns: the number of frags needed or a negative error
> + */
> +int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
> +{

This seems too complicated, and isn't quite right anyhow..

The umem code builds up the SGT by combining physically adjacent pages
into contiguous chunks. The key thing to notice is that it will
combine pages that are not part of the same folio (compound page) into
SGL entries. This is fine and well for a DMA device

However, when you build a skb frag you can only put a folio into
it, as it has exactly one struct page refcount that controls a folio
worth of memory lifetime.

So, eg, if the umem stuff allowed you to create a 64K page size MR, it
doesn't guarentee that the folios are 64K page size, and thus it
doesn't guarantee that you can use 64k skb frags later.

The best you can do is (after the xarray conversion) check what was
stuffed in the xarray and decide what the smallest folio size is within
the MR.

Then this is just simple math, num frags is computed as the number of
folios of smallest size that span the requested IOVA.

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma
  2022-10-31 20:27 ` [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma Bob Pearson
@ 2022-11-24 19:16   ` Jason Gunthorpe
  0 siblings, 0 replies; 36+ messages in thread
From: Jason Gunthorpe @ 2022-11-24 19:16 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Mon, Oct 31, 2022 at 03:27:58PM -0500, Bob Pearson wrote:
> Add routine named rxe_num_dma_frags() to compute the number of skb
> frags needed to copy length bytes from a dma info struct.
> 
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_loc.h |  4 +-
>  drivers/infiniband/sw/rxe/rxe_mr.c  | 67 ++++++++++++++++++++++++++++-
>  2 files changed, 69 insertions(+), 2 deletions(-)

I would say this is a nice place to stop and start a new
series. Adding skb frags to MR memory is a self contained feature.

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-11-24 19:10   ` Jason Gunthorpe
@ 2022-11-30 20:53     ` Bob Pearson
  2022-11-30 23:36       ` Jason Gunthorpe
  0 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-11-30 20:53 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 11/24/22 13:10, Jason Gunthorpe wrote:
> On Mon, Oct 31, 2022 at 03:27:55PM -0500, Bob Pearson wrote:
>> +int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>> +		 int length, int offset)
>> +{
>> +	int nr_frags = skb_shinfo(skb)->nr_frags;
>> +	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
>> +
>> +	if (nr_frags >= MAX_SKB_FRAGS) {
>> +		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
>> +			__func__, nr_frags);
>> +		return -EINVAL;
>> +	}
>> +
>> +	frag->bv_len = length;
>> +	frag->bv_offset = offset;
>> +	frag->bv_page = virt_to_page(buf->addr);
> 
> Assuming this is even OK to do, then please do the xarray conversion
> I sketched first:
> 
> https://lore.kernel.org/linux-rdma/Y3gvZr6%2FNCii9Avy@nvidia.com/

I've been looking at this. Seems incorrect for IB_MR_TYPE_DMA which
do not carry a page map but simply convert iova to kernel virtual addresses.
This breaks in the mr_copy routine and atomic ops in responder.
There is also a missing rxe_mr_iova_to_index() function. Looks simple enough
just the number of pages starting from 0.

I am curious what the benefit of the 'advanced' API for xarrays buys here. Is it just
preallocating all the memory before it gets used?

I am happy to go in this direction and if we do it should be ahead of the other
outstanding changes that touch MRs.

I will try to submit a patch to do that.

Bob

> 
> And this operation is basically a xa_for_each loop taking 'struct page
> *' off of the MR's xarray, slicing it, then stuffing into the
> skb. Don't call virt_to_page()
> 
> *However* I have no idea if it is even safe to stuff unstable pages
> into a skb. Are there other examples doing this? Eg zero copy tcp?
> 
> Jason


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-11-30 20:53     ` Bob Pearson
@ 2022-11-30 23:36       ` Jason Gunthorpe
  2022-12-01  0:16         ` Bob Pearson
  0 siblings, 1 reply; 36+ messages in thread
From: Jason Gunthorpe @ 2022-11-30 23:36 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Wed, Nov 30, 2022 at 02:53:22PM -0600, Bob Pearson wrote:
> On 11/24/22 13:10, Jason Gunthorpe wrote:
> > On Mon, Oct 31, 2022 at 03:27:55PM -0500, Bob Pearson wrote:
> >> +int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
> >> +		 int length, int offset)
> >> +{
> >> +	int nr_frags = skb_shinfo(skb)->nr_frags;
> >> +	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
> >> +
> >> +	if (nr_frags >= MAX_SKB_FRAGS) {
> >> +		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
> >> +			__func__, nr_frags);
> >> +		return -EINVAL;
> >> +	}
> >> +
> >> +	frag->bv_len = length;
> >> +	frag->bv_offset = offset;
> >> +	frag->bv_page = virt_to_page(buf->addr);
> > 
> > Assuming this is even OK to do, then please do the xarray conversion
> > I sketched first:
> > 
> > https://lore.kernel.org/linux-rdma/Y3gvZr6%2FNCii9Avy@nvidia.com/
> 
> I've been looking at this. Seems incorrect for IB_MR_TYPE_DMA which
> do not carry a page map but simply convert iova to kernel virtual
> addresses.

There is always a struct page involved, even in the kernel case. You
can do virt_to_page on kernel addresses

> I am curious what the benefit of the 'advanced' API for xarrays buys here. Is it just
> preallocating all the memory before it gets used?

It runs quite a bit faster

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-11-30 23:36       ` Jason Gunthorpe
@ 2022-12-01  0:16         ` Bob Pearson
  2022-12-01  0:20           ` Jason Gunthorpe
  0 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01  0:16 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 11/30/22 17:36, Jason Gunthorpe wrote:
> On Wed, Nov 30, 2022 at 02:53:22PM -0600, Bob Pearson wrote:
>> On 11/24/22 13:10, Jason Gunthorpe wrote:
>>> On Mon, Oct 31, 2022 at 03:27:55PM -0500, Bob Pearson wrote:
>>>> +int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>>>> +		 int length, int offset)
>>>> +{
>>>> +	int nr_frags = skb_shinfo(skb)->nr_frags;
>>>> +	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
>>>> +
>>>> +	if (nr_frags >= MAX_SKB_FRAGS) {
>>>> +		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
>>>> +			__func__, nr_frags);
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	frag->bv_len = length;
>>>> +	frag->bv_offset = offset;
>>>> +	frag->bv_page = virt_to_page(buf->addr);
>>>
>>> Assuming this is even OK to do, then please do the xarray conversion
>>> I sketched first:
>>>
>>> https://lore.kernel.org/linux-rdma/Y3gvZr6%2FNCii9Avy@nvidia.com/
>>
>> I've been looking at this. Seems incorrect for IB_MR_TYPE_DMA which
>> do not carry a page map but simply convert iova to kernel virtual
>> addresses.
> 
> There is always a struct page involved, even in the kernel case. You
> can do virt_to_page on kernel addresses
Agreed but there isn't a page map set up for DMA mr's. You just get the whole kernel
address space. So the call to rxe_mr_copy_xarray() won't work. There isn't an
xarray to copy to/from. Much easier to just leave the DMA mr code in place since
it does what we want very simply. Also you have to treat the DMA mr separately for
atomic ops.

Bob
> 
>> I am curious what the benefit of the 'advanced' API for xarrays buys here. Is it just
>> preallocating all the memory before it gets used?
> 
> It runs quite a bit faster
> 
> Jason


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01  0:16         ` Bob Pearson
@ 2022-12-01  0:20           ` Jason Gunthorpe
  2022-12-01  0:36             ` Bob Pearson
  0 siblings, 1 reply; 36+ messages in thread
From: Jason Gunthorpe @ 2022-12-01  0:20 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Wed, Nov 30, 2022 at 06:16:53PM -0600, Bob Pearson wrote:
> On 11/30/22 17:36, Jason Gunthorpe wrote:
> > On Wed, Nov 30, 2022 at 02:53:22PM -0600, Bob Pearson wrote:
> >> On 11/24/22 13:10, Jason Gunthorpe wrote:
> >>> On Mon, Oct 31, 2022 at 03:27:55PM -0500, Bob Pearson wrote:
> >>>> +int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
> >>>> +		 int length, int offset)
> >>>> +{
> >>>> +	int nr_frags = skb_shinfo(skb)->nr_frags;
> >>>> +	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
> >>>> +
> >>>> +	if (nr_frags >= MAX_SKB_FRAGS) {
> >>>> +		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
> >>>> +			__func__, nr_frags);
> >>>> +		return -EINVAL;
> >>>> +	}
> >>>> +
> >>>> +	frag->bv_len = length;
> >>>> +	frag->bv_offset = offset;
> >>>> +	frag->bv_page = virt_to_page(buf->addr);
> >>>
> >>> Assuming this is even OK to do, then please do the xarray conversion
> >>> I sketched first:
> >>>
> >>> https://lore.kernel.org/linux-rdma/Y3gvZr6%2FNCii9Avy@nvidia.com/
> >>
> >> I've been looking at this. Seems incorrect for IB_MR_TYPE_DMA which
> >> do not carry a page map but simply convert iova to kernel virtual
> >> addresses.
> > 
> > There is always a struct page involved, even in the kernel case. You
> > can do virt_to_page on kernel addresses

> Agreed but there isn't a page map set up for DMA mr's. You just get the whole kernel
> address space. So the call to rxe_mr_copy_xarray() won't work. There isn't an
> xarray to copy to/from. Much easier to just leave the DMA mr code in place since
> it does what we want very simply. Also you have to treat the DMA mr separately for
> atomic ops.

You mean the all physical memory MR type? It is true, but you still
have to add the kmap and so on. It should be a similar function that
assumes the IOVA is a physical address is a kernel mapped address and
does virt_to_page/etc instead of the xarray loop.

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01  0:20           ` Jason Gunthorpe
@ 2022-12-01  0:36             ` Bob Pearson
  2022-12-01  0:41               ` Jason Gunthorpe
  0 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01  0:36 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 11/30/22 18:20, Jason Gunthorpe wrote:
> On Wed, Nov 30, 2022 at 06:16:53PM -0600, Bob Pearson wrote:
>> On 11/30/22 17:36, Jason Gunthorpe wrote:
>>> On Wed, Nov 30, 2022 at 02:53:22PM -0600, Bob Pearson wrote:
>>>> On 11/24/22 13:10, Jason Gunthorpe wrote:
>>>>> On Mon, Oct 31, 2022 at 03:27:55PM -0500, Bob Pearson wrote:
>>>>>> +int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>>>>>> +		 int length, int offset)
>>>>>> +{
>>>>>> +	int nr_frags = skb_shinfo(skb)->nr_frags;
>>>>>> +	skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags];
>>>>>> +
>>>>>> +	if (nr_frags >= MAX_SKB_FRAGS) {
>>>>>> +		pr_debug("%s: nr_frags (%d) >= MAX_SKB_FRAGS\n",
>>>>>> +			__func__, nr_frags);
>>>>>> +		return -EINVAL;
>>>>>> +	}
>>>>>> +
>>>>>> +	frag->bv_len = length;
>>>>>> +	frag->bv_offset = offset;
>>>>>> +	frag->bv_page = virt_to_page(buf->addr);
>>>>>
>>>>> Assuming this is even OK to do, then please do the xarray conversion
>>>>> I sketched first:
>>>>>
>>>>> https://lore.kernel.org/linux-rdma/Y3gvZr6%2FNCii9Avy@nvidia.com/
>>>>
>>>> I've been looking at this. Seems incorrect for IB_MR_TYPE_DMA which
>>>> do not carry a page map but simply convert iova to kernel virtual
>>>> addresses.
>>>
>>> There is always a struct page involved, even in the kernel case. You
>>> can do virt_to_page on kernel addresses
> 
>> Agreed but there isn't a page map set up for DMA mr's. You just get the whole kernel
>> address space. So the call to rxe_mr_copy_xarray() won't work. There isn't an
>> xarray to copy to/from. Much easier to just leave the DMA mr code in place since
>> it does what we want very simply. Also you have to treat the DMA mr separately for
>> atomic ops.
> 
> You mean the all physical memory MR type? It is true, but you still
> have to add the kmap and so on. It should be a similar function that
> assumes the IOVA is a physical address is a kernel mapped address and
> does virt_to_page/etc instead of the xarray loop.
> 
> Jason

I'm not looking at my patch you responded to but the one you posted to replace maps
by xarrays. The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA that
the iova is just a kernel (virtual) address that is already mapped. Maybe this is
not correct but it has always worked this way. These are heavily used by storage stacks
(e.g. Lustre) which always use DMA mr's. Since we don't actually do any DMAs we don't
need to setup the iommu for these and just do memcpy's without dealing with pages.

Bob

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01  0:36             ` Bob Pearson
@ 2022-12-01  0:41               ` Jason Gunthorpe
  2022-12-01  5:05                 ` Bob Pearson
  2022-12-01 15:04                 ` Bob Pearson
  0 siblings, 2 replies; 36+ messages in thread
From: Jason Gunthorpe @ 2022-12-01  0:41 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Wed, Nov 30, 2022 at 06:36:56PM -0600, Bob Pearson wrote:
> I'm not looking at my patch you responded to but the one you posted to replace maps
> by xarrays.

I see, I botched that part

> The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA
> that the iova is just a kernel (virtual) address that is already
> mapped.

No, it is not correct

> Maybe this is not correct but it has always worked this way. These
> are heavily used by storage stacks (e.g. Lustre) which always use
> DMA mr's. Since we don't actually do any DMAs we don't need to setup
> the iommu for these and just do memcpy's without dealing with pages.

You still should be doing the kmap

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01  0:41               ` Jason Gunthorpe
@ 2022-12-01  5:05                 ` Bob Pearson
  2022-12-01 12:51                   ` Jason Gunthorpe
  2022-12-01 15:04                 ` Bob Pearson
  1 sibling, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01  5:05 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 11/30/22 18:41, Jason Gunthorpe wrote:
> On Wed, Nov 30, 2022 at 06:36:56PM -0600, Bob Pearson wrote:
>> I'm not looking at my patch you responded to but the one you posted to replace maps
>> by xarrays.
> 
> I see, I botched that part
> 
>> The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA
>> that the iova is just a kernel (virtual) address that is already
>> mapped.
> 
> No, it is not correct
> 
>> Maybe this is not correct but it has always worked this way. These
>> are heavily used by storage stacks (e.g. Lustre) which always use
>> DMA mr's. Since we don't actually do any DMAs we don't need to setup
>> the iommu for these and just do memcpy's without dealing with pages.
> 
> You still should be doing the kmap
> 
> Jason

Does this have to do with 32 bit machines? I have always tested on 64 bit machines and
dma mr's are always already mapped.

Bob

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01  5:05                 ` Bob Pearson
@ 2022-12-01 12:51                   ` Jason Gunthorpe
  0 siblings, 0 replies; 36+ messages in thread
From: Jason Gunthorpe @ 2022-12-01 12:51 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Wed, Nov 30, 2022 at 11:05:04PM -0600, Bob Pearson wrote:
> On 11/30/22 18:41, Jason Gunthorpe wrote:
> > On Wed, Nov 30, 2022 at 06:36:56PM -0600, Bob Pearson wrote:
> >> I'm not looking at my patch you responded to but the one you posted to replace maps
> >> by xarrays.
> > 
> > I see, I botched that part
> > 
> >> The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA
> >> that the iova is just a kernel (virtual) address that is already
> >> mapped.
> > 
> > No, it is not correct
> > 
> >> Maybe this is not correct but it has always worked this way. These
> >> are heavily used by storage stacks (e.g. Lustre) which always use
> >> DMA mr's. Since we don't actually do any DMAs we don't need to setup
> >> the iommu for these and just do memcpy's without dealing with pages.
> > 
> > You still should be doing the kmap
> > 
> > Jason
> 
> Does this have to do with 32 bit machines? I have always tested on 64 bit machines and
> dma mr's are always already mapped.

Originally, but people are making 64 bit machines require kmap for
security purposes

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01  0:41               ` Jason Gunthorpe
  2022-12-01  5:05                 ` Bob Pearson
@ 2022-12-01 15:04                 ` Bob Pearson
  2022-12-01 15:16                   ` Bob Pearson
  1 sibling, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01 15:04 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 11/30/22 18:41, Jason Gunthorpe wrote:
> On Wed, Nov 30, 2022 at 06:36:56PM -0600, Bob Pearson wrote:
>> I'm not looking at my patch you responded to but the one you posted to replace maps
>> by xarrays.
> 
> I see, I botched that part
> 
>> The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA
>> that the iova is just a kernel (virtual) address that is already
>> mapped.
> 
> No, it is not correct
> 
>> Maybe this is not correct but it has always worked this way. These
>> are heavily used by storage stacks (e.g. Lustre) which always use
>> DMA mr's. Since we don't actually do any DMAs we don't need to setup
>> the iommu for these and just do memcpy's without dealing with pages.
> 
> You still should be doing the kmap
> 
> Jason

Something was disconnected in my memory. So I went back and looked at lustre.
Turns out it never uses IB_MR_TYPE_DMA and for that matter I can't find any
use cases in the rdma tree or online. So, the implementation in rxe has almost
certainly never been used.

So I need to choose to 'fix' the current implementation or just delete type dma support.
I get the idea that I need to convert the iova to a page and kmap it but i'm not
clear how to do that. This 64 bit numnber (iova) needs to convert to a struct page *.
Without a use case to look at I don't know how to interpret it. Apparently it's not a
virtual address.

Bob


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01 15:04                 ` Bob Pearson
@ 2022-12-01 15:16                   ` Bob Pearson
  2022-12-01 15:38                     ` Bob Pearson
  0 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01 15:16 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 12/1/22 09:04, Bob Pearson wrote:
> On 11/30/22 18:41, Jason Gunthorpe wrote:
>> On Wed, Nov 30, 2022 at 06:36:56PM -0600, Bob Pearson wrote:
>>> I'm not looking at my patch you responded to but the one you posted to replace maps
>>> by xarrays.
>>
>> I see, I botched that part
>>
>>> The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA
>>> that the iova is just a kernel (virtual) address that is already
>>> mapped.
>>
>> No, it is not correct
>>
>>> Maybe this is not correct but it has always worked this way. These
>>> are heavily used by storage stacks (e.g. Lustre) which always use
>>> DMA mr's. Since we don't actually do any DMAs we don't need to setup
>>> the iommu for these and just do memcpy's without dealing with pages.
>>
>> You still should be doing the kmap
>>
>> Jason
> 
> Something was disconnected in my memory. So I went back and looked at lustre.
> Turns out it never uses IB_MR_TYPE_DMA and for that matter I can't find any
> use cases in the rdma tree or online. So, the implementation in rxe has almost
> certainly never been used.
> 
> So I need to choose to 'fix' the current implementation or just delete type dma support.
> I get the idea that I need to convert the iova to a page and kmap it but i'm not
> clear how to do that. This 64 bit numnber (iova) needs to convert to a struct page *.
> Without a use case to look at I don't know how to interpret it. Apparently it's not a
> virtual address.
> 
> Bob
> 

I did find a single use case for the mr created during alloc_pd. The comments seem
to imply that the use is just access to local kernel memory with va=pa. So I am back
to my previous thoughts. Memcpy should just work.

Bob

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01 15:16                   ` Bob Pearson
@ 2022-12-01 15:38                     ` Bob Pearson
  2022-12-01 15:39                       ` Jason Gunthorpe
  0 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01 15:38 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 12/1/22 09:16, Bob Pearson wrote:
> On 12/1/22 09:04, Bob Pearson wrote:
>> On 11/30/22 18:41, Jason Gunthorpe wrote:
>>> On Wed, Nov 30, 2022 at 06:36:56PM -0600, Bob Pearson wrote:
>>>> I'm not looking at my patch you responded to but the one you posted to replace maps
>>>> by xarrays.
>>>
>>> I see, I botched that part
>>>
>>>> The existing rxe driver assumes that if ibmr->type == IB_MR_TYPE_DMA
>>>> that the iova is just a kernel (virtual) address that is already
>>>> mapped.
>>>
>>> No, it is not correct
>>>
>>>> Maybe this is not correct but it has always worked this way. These
>>>> are heavily used by storage stacks (e.g. Lustre) which always use
>>>> DMA mr's. Since we don't actually do any DMAs we don't need to setup
>>>> the iommu for these and just do memcpy's without dealing with pages.
>>>
>>> You still should be doing the kmap
>>>
>>> Jason
>>
>> Something was disconnected in my memory. So I went back and looked at lustre.
>> Turns out it never uses IB_MR_TYPE_DMA and for that matter I can't find any
>> use cases in the rdma tree or online. So, the implementation in rxe has almost
>> certainly never been used.
>>
>> So I need to choose to 'fix' the current implementation or just delete type dma support.
>> I get the idea that I need to convert the iova to a page and kmap it but i'm not
>> clear how to do that. This 64 bit numnber (iova) needs to convert to a struct page *.
>> Without a use case to look at I don't know how to interpret it. Apparently it's not a
>> virtual address.
>>
>> Bob
>>
> 
> I did find a single use case for the mr created during alloc_pd. The comments seem
> to imply that the use is just access to local kernel memory with va=pa. So I am back
> to my previous thoughts. Memcpy should just work.
> 
> Bob

Further, looking at ipoib as an example, it builds sge lists using the lkey from get_dma_mr()
and sets the sge->addr to a kernel virtual memory address after previously calling
ib_dma_map_single() so the addresses are mapped for dma access and visible before use.
They are unmapped after the read/write operation completes. What is the point of kmapping
the address after dma mapping them?

Bob  

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01 15:38                     ` Bob Pearson
@ 2022-12-01 15:39                       ` Jason Gunthorpe
  2022-12-01 17:11                         ` Bob Pearson
  0 siblings, 1 reply; 36+ messages in thread
From: Jason Gunthorpe @ 2022-12-01 15:39 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Thu, Dec 01, 2022 at 09:38:10AM -0600, Bob Pearson wrote:

> Further, looking at ipoib as an example, it builds sge lists using the lkey from get_dma_mr()
> and sets the sge->addr to a kernel virtual memory address after previously calling
> ib_dma_map_single() so the addresses are mapped for dma access and visible before use.
> They are unmapped after the read/write operation completes. What is the point of kmapping
> the address after dma mapping them?

Because not everything is ipoib, and things like block will map sgls
with struct pages, not kva.

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01 15:39                       ` Jason Gunthorpe
@ 2022-12-01 17:11                         ` Bob Pearson
  2022-12-01 18:00                           ` Jason Gunthorpe
  0 siblings, 1 reply; 36+ messages in thread
From: Bob Pearson @ 2022-12-01 17:11 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: leon, zyjzyj2000, linux-rdma

On 12/1/22 09:39, Jason Gunthorpe wrote:
> On Thu, Dec 01, 2022 at 09:38:10AM -0600, Bob Pearson wrote:
> 
>> Further, looking at ipoib as an example, it builds sge lists using the lkey from get_dma_mr()
>> and sets the sge->addr to a kernel virtual memory address after previously calling
>> ib_dma_map_single() so the addresses are mapped for dma access and visible before use.
>> They are unmapped after the read/write operation completes. What is the point of kmapping
>> the address after dma mapping them?
> 
> Because not everything is ipoib, and things like block will map sgls
> with struct pages, not kva.
> 
> Jason

OK it's working now but there is a bug in your rxe_mr_fill_pages_from_sgt() routine.
You have a

	if (xas_xa_index && WARN_ON(sg_iter.sg_pgoffset % PAGE_SIZE)) {...}

which seems to assume that sg_pgoffset contains the byte offset in the current page.
But looking at __sg_page_iter_next() it appears that it is the number of pages offset
in the current sg entry which results in a splat when I run ib_send_bw.

Bob

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c
  2022-12-01 17:11                         ` Bob Pearson
@ 2022-12-01 18:00                           ` Jason Gunthorpe
  0 siblings, 0 replies; 36+ messages in thread
From: Jason Gunthorpe @ 2022-12-01 18:00 UTC (permalink / raw)
  To: Bob Pearson; +Cc: leon, zyjzyj2000, linux-rdma

On Thu, Dec 01, 2022 at 11:11:10AM -0600, Bob Pearson wrote:
> On 12/1/22 09:39, Jason Gunthorpe wrote:
> > On Thu, Dec 01, 2022 at 09:38:10AM -0600, Bob Pearson wrote:
> > 
> >> Further, looking at ipoib as an example, it builds sge lists using the lkey from get_dma_mr()
> >> and sets the sge->addr to a kernel virtual memory address after previously calling
> >> ib_dma_map_single() so the addresses are mapped for dma access and visible before use.
> >> They are unmapped after the read/write operation completes. What is the point of kmapping
> >> the address after dma mapping them?
> > 
> > Because not everything is ipoib, and things like block will map sgls
> > with struct pages, not kva.
> > 
> > Jason
> 
> OK it's working now but there is a bug in your rxe_mr_fill_pages_from_sgt() routine.
> You have a
> 
> 	if (xas_xa_index && WARN_ON(sg_iter.sg_pgoffset % PAGE_SIZE)) {...}
> 
> which seems to assume that sg_pgoffset contains the byte offset in the current page.
> But looking at __sg_page_iter_next() it appears that it is the number of pages offset
> in the current sg entry which results in a splat when I run
> ib_send_bw.

The pgoffset depends on the creator of the SGL, I guess something must
be pointing the SGL ad a folio and using the sg_pgoffset as a sub
folio index?

If so then the calculation of the kmap has to be adjusted to extract
the number of pages from the sg_pgoffset

Jason

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2022-12-01 18:00 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 02/18] RDMA/rxe: Isolate request payload code in a subroutine Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 03/18] RDMA/rxe: Remove paylen parameter from rxe_init_packet Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 04/18] RDMA/rxe: Isolate code to build request packet Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops Bob Pearson
2022-11-24 19:05   ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c Bob Pearson
2022-11-24 19:10   ` Jason Gunthorpe
2022-11-30 20:53     ` Bob Pearson
2022-11-30 23:36       ` Jason Gunthorpe
2022-12-01  0:16         ` Bob Pearson
2022-12-01  0:20           ` Jason Gunthorpe
2022-12-01  0:36             ` Bob Pearson
2022-12-01  0:41               ` Jason Gunthorpe
2022-12-01  5:05                 ` Bob Pearson
2022-12-01 12:51                   ` Jason Gunthorpe
2022-12-01 15:04                 ` Bob Pearson
2022-12-01 15:16                   ` Bob Pearson
2022-12-01 15:38                     ` Bob Pearson
2022-12-01 15:39                       ` Jason Gunthorpe
2022-12-01 17:11                         ` Bob Pearson
2022-12-01 18:00                           ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags Bob Pearson
2022-11-24 19:15   ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 08/18] RDMA/rxe: Extend rxe_mr_copy to support skb frags Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma Bob Pearson
2022-11-24 19:16   ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 10/18] RDMA/rxe: Extend copy_data to support skb frags Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 11/18] RDMA/rxe: Replace rxe by qp as a parameter Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 12/18] RDMA/rxe: Extend rxe_init_packet() to support frags Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 13/18] RDMA/rxe: Extend rxe_icrc.c " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 14/18] RDMA/rxe: Extend rxe_init_req_packet() for frags Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 15/18] RDMA/rxe: Extend response packets " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 16/18] RDMA/rxe: Extend send/write_data_in() " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 17/18] RDMA/rxe: Extend do_read() in rxe_comp,c " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 18/18] RDMA/rxe: Enable sg code in rxe Bob Pearson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.