Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows
@ 2021-04-15  2:54 Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 1/9] RDMA/rxe: Add bind MW fields to rxe_send_wr Bob Pearson
                   ` (8 more replies)
  0 siblings, 9 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

This series of patches implement memory windows for the rdma_rxe
driver. This is a shorter reimplementation of an earlier patch
set. They apply to and depend on the current for-next linux rdma
tree.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
v2:
  cleaned up an issue in rdma_user_rxe.h
  cleaned up a collision in rxe_resp.c

Bob Pearson (9):
  RDMA/rxe: Add bind MW fields to rxe_send_wr
  RDMA/rxe: Return errors for add index and key
  RDMA/rxe: Enable MW object pool
  RDMA/rxe: Add ib_alloc_mw and ib_dealloc_mw verbs
  RDMA/rxe: Replace WR_REG_MASK by WR_LOCAL_OP_MASK
  RDMA/rxe: Move local ops to subroutine
  RDMA/rxe: Add support for bind MW work requests
  RDMA/rxe: Implement invalidate MW operations
  RDMA/rxe: Implement memory access through MWs

 drivers/infiniband/sw/rxe/Makefile     |   1 +
 drivers/infiniband/sw/rxe/rxe.c        |   1 +
 drivers/infiniband/sw/rxe/rxe_comp.c   |   1 +
 drivers/infiniband/sw/rxe/rxe_loc.h    |  29 +-
 drivers/infiniband/sw/rxe/rxe_mr.c     |  79 ++++--
 drivers/infiniband/sw/rxe/rxe_mw.c     | 356 +++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_opcode.c |  11 +-
 drivers/infiniband/sw/rxe/rxe_opcode.h |   3 +-
 drivers/infiniband/sw/rxe/rxe_param.h  |  19 +-
 drivers/infiniband/sw/rxe/rxe_pool.c   |  45 ++--
 drivers/infiniband/sw/rxe/rxe_pool.h   |   8 +-
 drivers/infiniband/sw/rxe/rxe_req.c    | 102 ++++---
 drivers/infiniband/sw/rxe/rxe_resp.c   | 110 +++++---
 drivers/infiniband/sw/rxe/rxe_verbs.c  |   5 +-
 drivers/infiniband/sw/rxe/rxe_verbs.h  |  38 ++-
 include/uapi/rdma/rdma_user_rxe.h      |  34 ++-
 16 files changed, 691 insertions(+), 151 deletions(-)
 create mode 100644 drivers/infiniband/sw/rxe/rxe_mw.c
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 1/9] RDMA/rxe: Add bind MW fields to rxe_send_wr
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 2/9] RDMA/rxe: Return errors for add index and key Bob Pearson
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add fields to struct rxe_send_wr in rdma_user_rxe.h to
support bind MW work requests

Link: https://lore.kernel.org/linux-rdma/eba02326-013f-1707-0db7-209413d2cd6f@gmail.com/
Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
v2:
  Replaced umw and kmw by mw which will work for user space
  and kernel space if ever needed. Not currently.

 include/uapi/rdma/rdma_user_rxe.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index 068433e2229d..b8f408ceb1a8 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -99,7 +99,17 @@ struct rxe_send_wr {
 			__u32	remote_qkey;
 			__u16	pkey_index;
 		} ud;
+		struct {
+			__aligned_u64	addr;
+			__aligned_u64	length;
+			__u32		mr_lkey;
+			__u32		mw_rkey;
+			__u32	rkey;
+			__u32	access;
+			__u32	flags;
+		} mw;
 		/* reg is only used by the kernel and is not part of the uapi */
+#ifdef __KERNEL__
 		struct {
 			union {
 				struct ib_mr *mr;
@@ -108,6 +118,7 @@ struct rxe_send_wr {
 			__u32	     key;
 			__u32	     access;
 		} reg;
+#endif
 	} wr;
 };
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 2/9] RDMA/rxe: Return errors for add index and key
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 1/9] RDMA/rxe: Add bind MW fields to rxe_send_wr Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 3/9] RDMA/rxe: Enable MW object pool Bob Pearson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Modify rxe_add_index() and rxe_add_key() to return an
error if the index or key is aleady present in the pool.
Currently they print a warning and silently fail with
bad consequences to the caller.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 44 ++++++++++++++++++----------
 drivers/infiniband/sw/rxe/rxe_pool.h |  8 ++---
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index d24901f2af3f..2b795e2fc4b3 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -183,7 +183,7 @@ static u32 alloc_index(struct rxe_pool *pool)
 	return index + pool->index.min_index;
 }
 
-static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
+static int insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
 {
 	struct rb_node **link = &pool->index.tree.rb_node;
 	struct rb_node *parent = NULL;
@@ -195,7 +195,7 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
 
 		if (elem->index == new->index) {
 			pr_warn("element already exists!\n");
-			goto out;
+			return -EINVAL;
 		}
 
 		if (elem->index > new->index)
@@ -206,11 +206,11 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
 
 	rb_link_node(&new->index_node, parent, link);
 	rb_insert_color(&new->index_node, &pool->index.tree);
-out:
-	return;
+
+	return 0;
 }
 
-static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
+static int insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 {
 	struct rb_node **link = &pool->key.tree.rb_node;
 	struct rb_node *parent = NULL;
@@ -226,7 +226,7 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 
 		if (cmp == 0) {
 			pr_warn("key already exists!\n");
-			goto out;
+			return -EINVAL;
 		}
 
 		if (cmp > 0)
@@ -237,26 +237,32 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 
 	rb_link_node(&new->key_node, parent, link);
 	rb_insert_color(&new->key_node, &pool->key.tree);
-out:
-	return;
+
+	return 0;
 }
 
-void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key)
+int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key)
 {
 	struct rxe_pool *pool = elem->pool;
+	int err;
 
 	memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
-	insert_key(pool, elem);
+	err = insert_key(pool, elem);
+
+	return err;
 }
 
-void __rxe_add_key(struct rxe_pool_entry *elem, void *key)
+int __rxe_add_key(struct rxe_pool_entry *elem, void *key)
 {
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
+	int err;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	__rxe_add_key_locked(elem, key);
+	err = __rxe_add_key_locked(elem, key);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
+
+	return err;
 }
 
 void __rxe_drop_key_locked(struct rxe_pool_entry *elem)
@@ -276,22 +282,28 @@ void __rxe_drop_key(struct rxe_pool_entry *elem)
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
-void __rxe_add_index_locked(struct rxe_pool_entry *elem)
+int __rxe_add_index_locked(struct rxe_pool_entry *elem)
 {
 	struct rxe_pool *pool = elem->pool;
+	int err;
 
 	elem->index = alloc_index(pool);
-	insert_index(pool, elem);
+	err = insert_index(pool, elem);
+
+	return err;
 }
 
-void __rxe_add_index(struct rxe_pool_entry *elem)
+int __rxe_add_index(struct rxe_pool_entry *elem)
 {
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
+	int err;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	__rxe_add_index_locked(elem);
+	err = __rxe_add_index_locked(elem);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
+
+	return err;
 }
 
 void __rxe_drop_index_locked(struct rxe_pool_entry *elem)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 61210b300a78..1feca1bffced 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -111,11 +111,11 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
 /* assign an index to an indexed object and insert object into
  *  pool's rb tree holding and not holding the pool_lock
  */
-void __rxe_add_index_locked(struct rxe_pool_entry *elem);
+int __rxe_add_index_locked(struct rxe_pool_entry *elem);
 
 #define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem)
 
-void __rxe_add_index(struct rxe_pool_entry *elem);
+int __rxe_add_index(struct rxe_pool_entry *elem);
 
 #define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
 
@@ -133,11 +133,11 @@ void __rxe_drop_index(struct rxe_pool_entry *elem);
 /* assign a key to a keyed object and insert object into
  * pool's rb tree holding and not holding pool_lock
  */
-void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key);
+int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key);
 
 #define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key)
 
-void __rxe_add_key(struct rxe_pool_entry *elem, void *key);
+int __rxe_add_key(struct rxe_pool_entry *elem, void *key);
 
 #define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 3/9] RDMA/rxe: Enable MW object pool
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 1/9] RDMA/rxe: Add bind MW fields to rxe_send_wr Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 2/9] RDMA/rxe: Return errors for add index and key Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 4/9] RDMA/rxe: Add ib_alloc_mw and ib_dealloc_mw verbs Bob Pearson
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Currently the rxe driver has a rxe_mw struct object but
nothing about memory windows is enabled. This patch
turns on memory windows and some minor cleanup.

Set device attribute in rxe.c so max_mw = MAX_MW.
Change parameters in rxe_param.h so that MAX_MW is the same as MAX_MR.
Reduce the number of MRs and MWs to 4K from 256K.
Add device capability bits for 2a and 2b memory windows.
Removed RXE_MR_TYPE_MW from the rxe_mr_type enum.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe.c       |  1 +
 drivers/infiniband/sw/rxe/rxe_param.h | 19 ++++++++++++-------
 drivers/infiniband/sw/rxe/rxe_verbs.h |  1 -
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 95f0de0c8b49..8e0f9c489cab 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -54,6 +54,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 	rxe->attr.max_cq			= RXE_MAX_CQ;
 	rxe->attr.max_cqe			= (1 << RXE_MAX_LOG_CQE) - 1;
 	rxe->attr.max_mr			= RXE_MAX_MR;
+	rxe->attr.max_mw			= RXE_MAX_MW;
 	rxe->attr.max_pd			= RXE_MAX_PD;
 	rxe->attr.max_qp_rd_atom		= RXE_MAX_QP_RD_ATOM;
 	rxe->attr.max_res_rd_atom		= RXE_MAX_RES_RD_ATOM;
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 25ab50d9b7c2..742e6ec93686 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -37,7 +37,6 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
 enum rxe_device_param {
 	RXE_MAX_MR_SIZE			= -1ull,
 	RXE_PAGE_SIZE_CAP		= 0xfffff000,
-	RXE_MAX_QP			= 0x10000,
 	RXE_MAX_QP_WR			= 0x4000,
 	RXE_DEVICE_CAP_FLAGS		= IB_DEVICE_BAD_PKEY_CNTR
 					| IB_DEVICE_BAD_QKEY_CNTR
@@ -49,7 +48,10 @@ enum rxe_device_param {
 					| IB_DEVICE_RC_RNR_NAK_GEN
 					| IB_DEVICE_SRQ_RESIZE
 					| IB_DEVICE_MEM_MGT_EXTENSIONS
-					| IB_DEVICE_ALLOW_USER_UNREG,
+					| IB_DEVICE_ALLOW_USER_UNREG
+					| IB_DEVICE_MEM_WINDOW
+					| IB_DEVICE_MEM_WINDOW_TYPE_2A
+					| IB_DEVICE_MEM_WINDOW_TYPE_2B,
 	RXE_MAX_SGE			= 32,
 	RXE_MAX_WQE_SIZE		= sizeof(struct rxe_send_wqe) +
 					  sizeof(struct ib_sge) * RXE_MAX_SGE,
@@ -58,7 +60,6 @@ enum rxe_device_param {
 	RXE_MAX_SGE_RD			= 32,
 	RXE_MAX_CQ			= 16384,
 	RXE_MAX_LOG_CQE			= 15,
-	RXE_MAX_MR			= 256 * 1024,
 	RXE_MAX_PD			= 0x7ffc,
 	RXE_MAX_QP_RD_ATOM		= 128,
 	RXE_MAX_RES_RD_ATOM		= 0x3f000,
@@ -67,7 +68,6 @@ enum rxe_device_param {
 	RXE_MAX_MCAST_QP_ATTACH		= 56,
 	RXE_MAX_TOT_MCAST_QP_ATTACH	= 0x70000,
 	RXE_MAX_AH			= 100,
-	RXE_MAX_SRQ			= 960,
 	RXE_MAX_SRQ_WR			= 0x4000,
 	RXE_MIN_SRQ_WR			= 1,
 	RXE_MAX_SRQ_SGE			= 27,
@@ -80,16 +80,21 @@ enum rxe_device_param {
 
 	RXE_NUM_PORT			= 1,
 
+	RXE_MAX_QP			= 0x10000,
 	RXE_MIN_QP_INDEX		= 16,
 	RXE_MAX_QP_INDEX		= 0x00020000,
 
+	RXE_MAX_SRQ			= 0x00001000,
 	RXE_MIN_SRQ_INDEX		= 0x00020001,
 	RXE_MAX_SRQ_INDEX		= 0x00040000,
 
+	RXE_MAX_MR			= 0x00001000,
+	RXE_MAX_MW			= 0x00001000,
 	RXE_MIN_MR_INDEX		= 0x00000001,
-	RXE_MAX_MR_INDEX		= 0x00040000,
-	RXE_MIN_MW_INDEX		= 0x00040001,
-	RXE_MAX_MW_INDEX		= 0x00060000,
+	RXE_MAX_MR_INDEX		= 0x00010000,
+	RXE_MIN_MW_INDEX		= 0x00010001,
+	RXE_MAX_MW_INDEX		= 0x00020000,
+
 	RXE_MAX_PKT_PER_ACK		= 64,
 
 	RXE_MAX_UNACKED_PSNS		= 128,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 11eba7a3ba8f..8d32e3f50813 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -273,7 +273,6 @@ enum rxe_mr_type {
 	RXE_MR_TYPE_NONE,
 	RXE_MR_TYPE_DMA,
 	RXE_MR_TYPE_MR,
-	RXE_MR_TYPE_MW,
 };
 
 #define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 4/9] RDMA/rxe: Add ib_alloc_mw and ib_dealloc_mw verbs
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
                   ` (2 preceding siblings ...)
  2021-04-15  2:54 ` [PATCH for-next v2 3/9] RDMA/rxe: Enable MW object pool Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 5/9] RDMA/rxe: Replace WR_REG_MASK by WR_LOCAL_OP_MASK Bob Pearson
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add ib_alloc_mw and ib_dealloc_mw verbs APIs.

Added new file rxe_mw.c focused on MWs.
Changed the 8 bit random key generator.
Added a cleanup routine for MWs.
Added verbs routines to ib_device_ops.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/Makefile    |  1 +
 drivers/infiniband/sw/rxe/rxe_loc.h   |  6 +++
 drivers/infiniband/sw/rxe/rxe_mr.c    | 20 +++++-----
 drivers/infiniband/sw/rxe/rxe_mw.c    | 53 +++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_pool.c  |  1 +
 drivers/infiniband/sw/rxe/rxe_verbs.c |  3 ++
 drivers/infiniband/sw/rxe/rxe_verbs.h |  2 +
 7 files changed, 75 insertions(+), 11 deletions(-)
 create mode 100644 drivers/infiniband/sw/rxe/rxe_mw.c

diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 66af72dca759..1e24673e9318 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -15,6 +15,7 @@ rdma_rxe-y := \
 	rxe_qp.o \
 	rxe_cq.o \
 	rxe_mr.o \
+	rxe_mw.o \
 	rxe_opcode.o \
 	rxe_mmap.o \
 	rxe_icrc.o \
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index ef8061d2fbe0..edf575930a98 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -76,6 +76,7 @@ enum copy_direction {
 	from_mr_obj,
 };
 
+u8 rxe_get_next_key(u32 last_key);
 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
 
 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
@@ -106,6 +107,11 @@ void rxe_mr_cleanup(struct rxe_pool_entry *arg);
 
 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
 
+/* rxe_mw.c */
+int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
+int rxe_dealloc_mw(struct ib_mw *ibmw);
+void rxe_mw_cleanup(struct rxe_pool_entry *arg);
+
 /* rxe_net.c */
 void rxe_loopback(struct sk_buff *skb);
 int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 9f63947bab12..7f2cfc1ce659 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -7,19 +7,17 @@
 #include "rxe.h"
 #include "rxe_loc.h"
 
-/*
- * lfsr (linear feedback shift register) with period 255
+/* Return a random 8 bit key value that is
+ * different than the last_key. Set last_key to -1
+ * if this is the first key for an MR or MW
  */
-static u8 rxe_get_key(void)
+u8 rxe_get_next_key(u32 last_key)
 {
-	static u32 key = 1;
-
-	key = key << 1;
-
-	key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
-		^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
+	u8 key;
 
-	key &= 0xff;
+	do {
+		get_random_bytes(&key, 1);
+	} while (key == last_key);
 
 	return key;
 }
@@ -47,7 +45,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
 
 static void rxe_mr_init(int access, struct rxe_mr *mr)
 {
-	u32 lkey = mr->pelem.index << 8 | rxe_get_key();
+	u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
 
 	mr->ibmr.lkey = lkey;
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
new file mode 100644
index 000000000000..69128e298d44
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
+ */
+
+#include "rxe.h"
+
+int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+	struct rxe_mw *mw = to_rmw(ibmw);
+	struct rxe_pd *pd = to_rpd(ibmw->pd);
+	struct rxe_dev *rxe = to_rdev(ibmw->device);
+	int ret;
+
+	rxe_add_ref(pd);
+
+	ret = rxe_add_to_pool(&rxe->mw_pool, mw);
+	if (ret) {
+		rxe_drop_ref(pd);
+		return ret;
+	}
+
+	rxe_add_index(mw);
+	ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
+	mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
+			RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
+	spin_lock_init(&mw->lock);
+
+	return 0;
+}
+
+int rxe_dealloc_mw(struct ib_mw *ibmw)
+{
+	struct rxe_mw *mw = to_rmw(ibmw);
+	struct rxe_pd *pd = to_rpd(ibmw->pd);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mw->lock, flags);
+	mw->state = RXE_MW_STATE_INVALID;
+	spin_unlock_irqrestore(&mw->lock, flags);
+
+	rxe_drop_ref(mw);
+	rxe_drop_ref(pd);
+
+	return 0;
+}
+
+void rxe_mw_cleanup(struct rxe_pool_entry *elem)
+{
+	struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
+
+	rxe_drop_index(mw);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 2b795e2fc4b3..5b3277e8c35d 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -65,6 +65,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 		.name		= "rxe-mw",
 		.size		= sizeof(struct rxe_mw),
 		.elem_offset	= offsetof(struct rxe_mw, pelem),
+		.cleanup	= rxe_mw_cleanup,
 		.flags		= RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
 		.max_index	= RXE_MAX_MW_INDEX,
 		.min_index	= RXE_MIN_MW_INDEX,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index aeb5e232c195..fff81bf78a86 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1060,6 +1060,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 
 	.alloc_hw_stats = rxe_ib_alloc_hw_stats,
 	.alloc_mr = rxe_alloc_mr,
+	.alloc_mw = rxe_alloc_mw,
 	.alloc_pd = rxe_alloc_pd,
 	.alloc_ucontext = rxe_alloc_ucontext,
 	.attach_mcast = rxe_attach_mcast,
@@ -1069,6 +1070,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.create_srq = rxe_create_srq,
 	.create_user_ah = rxe_create_ah,
 	.dealloc_driver = rxe_dealloc,
+	.dealloc_mw = rxe_dealloc_mw,
 	.dealloc_pd = rxe_dealloc_pd,
 	.dealloc_ucontext = rxe_dealloc_ucontext,
 	.dereg_mr = rxe_dereg_mr,
@@ -1106,6 +1108,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
+	INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
 	INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
 	INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 8d32e3f50813..c8597ae8c833 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -323,6 +323,8 @@ enum rxe_mw_state {
 struct rxe_mw {
 	struct ib_mw ibmw;
 	struct rxe_pool_entry pelem;
+	spinlock_t lock;
+	enum rxe_mw_state state;
 };
 
 struct rxe_mc_grp {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 5/9] RDMA/rxe: Replace WR_REG_MASK by WR_LOCAL_OP_MASK
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
                   ` (3 preceding siblings ...)
  2021-04-15  2:54 ` [PATCH for-next v2 4/9] RDMA/rxe: Add ib_alloc_mw and ib_dealloc_mw verbs Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 6/9] RDMA/rxe: Move local ops to subroutine Bob Pearson
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Rxe has two mask bits WR_LOCAL_MASK and WR_REG_MASK with
WR_REG_MASK used to indicate any local operation and WR_LOCAL_MASK
unused. This patch replaces both of these with one mask bit
WR_LOCAL_OP_MASK which is clearer.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_opcode.c | 4 ++--
 drivers/infiniband/sw/rxe/rxe_opcode.h | 3 +--
 drivers/infiniband/sw/rxe/rxe_req.c    | 2 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c  | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 0cb4b01fd910..1e4b67b048f3 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -87,13 +87,13 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 	[IB_WR_LOCAL_INV]				= {
 		.name	= "IB_WR_LOCAL_INV",
 		.mask	= {
-			[IB_QPT_RC]	= WR_REG_MASK,
+			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
 		},
 	},
 	[IB_WR_REG_MR]					= {
 		.name	= "IB_WR_REG_MR",
 		.mask	= {
-			[IB_QPT_RC]	= WR_REG_MASK,
+			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
 		},
 	},
 };
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 1041ac9a9233..e02f039b8c44 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -19,8 +19,7 @@ enum rxe_wr_mask {
 	WR_SEND_MASK			= BIT(2),
 	WR_READ_MASK			= BIT(3),
 	WR_WRITE_MASK			= BIT(4),
-	WR_LOCAL_MASK			= BIT(5),
-	WR_REG_MASK			= BIT(6),
+	WR_LOCAL_OP_MASK		= BIT(5),
 
 	WR_READ_OR_WRITE_MASK		= WR_READ_MASK | WR_WRITE_MASK,
 	WR_READ_WRITE_OR_SEND_MASK	= WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 3664cdae7e1f..0d4dcd514c55 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -593,7 +593,7 @@ int rxe_requester(void *arg)
 	if (unlikely(!wqe))
 		goto exit;
 
-	if (wqe->mask & WR_REG_MASK) {
+	if (wqe->mask & WR_LOCAL_OP_MASK) {
 		if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
 			struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 			struct rxe_mr *rmr;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index fff81bf78a86..d22f011a20f3 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -577,7 +577,7 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 	init_send_wr(qp, &wqe->wr, ibwr);
 
 	/* local operation */
-	if (unlikely(mask & WR_REG_MASK)) {
+	if (unlikely(mask & WR_LOCAL_OP_MASK)) {
 		wqe->mask = mask;
 		wqe->state = wqe_state_posted;
 		return;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 6/9] RDMA/rxe: Move local ops to subroutine
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
                   ` (4 preceding siblings ...)
  2021-04-15  2:54 ` [PATCH for-next v2 5/9] RDMA/rxe: Replace WR_REG_MASK by WR_LOCAL_OP_MASK Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests Bob Pearson
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Simplify rxe_requester() by moving the local operations
to a subroutine. Add an error return for illegal send WR opcode.
Moved next_index ahead of rxe_run_task which fixed a small bug where
work completions were delayed until after the next wqe which was not
the intended behavior.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 89 +++++++++++++++++------------
 1 file changed, 54 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 0d4dcd514c55..0cf97e3db29f 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -555,6 +555,56 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 			  jiffies + qp->qp_timeout_jiffies);
 }
 
+static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+	u8 opcode = wqe->wr.opcode;
+	struct rxe_dev *rxe;
+	struct rxe_mr *mr;
+	u32 rkey;
+
+	switch (opcode) {
+	case IB_WR_LOCAL_INV:
+		rxe = to_rdev(qp->ibqp.device);
+		rkey = wqe->wr.ex.invalidate_rkey;
+		mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+		if (!mr) {
+			pr_err("No MR for rkey %#x\n", rkey);
+			wqe->state = wqe_state_error;
+			wqe->status = IB_WC_LOC_QP_OP_ERR;
+			return -EINVAL;
+		}
+		mr->state = RXE_MR_STATE_FREE;
+		rxe_drop_ref(mr);
+		break;
+	case IB_WR_REG_MR:
+		mr = to_rmr(wqe->wr.wr.reg.mr);
+
+		rxe_add_ref(mr);
+		mr->state = RXE_MR_STATE_VALID;
+		mr->access = wqe->wr.wr.reg.access;
+		mr->ibmr.lkey = wqe->wr.wr.reg.key;
+		mr->ibmr.rkey = wqe->wr.wr.reg.key;
+		mr->iova = wqe->wr.wr.reg.mr->iova;
+		rxe_drop_ref(mr);
+		break;
+	default:
+		pr_err("Unexpected send wqe opcode %d\n", opcode);
+		wqe->state = wqe_state_error;
+		wqe->status = IB_WC_LOC_QP_OP_ERR;
+		return -EINVAL;
+	}
+
+	wqe->state = wqe_state_done;
+	wqe->status = IB_WC_SUCCESS;
+	qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
+
+	if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+	    qp->sq_sig_type == IB_SIGNAL_ALL_WR)
+		rxe_run_task(&qp->comp.task, 1);
+
+	return 0;
+}
+
 int rxe_requester(void *arg)
 {
 	struct rxe_qp *qp = (struct rxe_qp *)arg;
@@ -594,42 +644,11 @@ int rxe_requester(void *arg)
 		goto exit;
 
 	if (wqe->mask & WR_LOCAL_OP_MASK) {
-		if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
-			struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-			struct rxe_mr *rmr;
-
-			rmr = rxe_pool_get_index(&rxe->mr_pool,
-						 wqe->wr.ex.invalidate_rkey >> 8);
-			if (!rmr) {
-				pr_err("No mr for key %#x\n",
-				       wqe->wr.ex.invalidate_rkey);
-				wqe->state = wqe_state_error;
-				wqe->status = IB_WC_MW_BIND_ERR;
-				goto exit;
-			}
-			rmr->state = RXE_MR_STATE_FREE;
-			rxe_drop_ref(rmr);
-			wqe->state = wqe_state_done;
-			wqe->status = IB_WC_SUCCESS;
-		} else if (wqe->wr.opcode == IB_WR_REG_MR) {
-			struct rxe_mr *rmr = to_rmr(wqe->wr.wr.reg.mr);
-
-			rmr->state = RXE_MR_STATE_VALID;
-			rmr->access = wqe->wr.wr.reg.access;
-			rmr->ibmr.lkey = wqe->wr.wr.reg.key;
-			rmr->ibmr.rkey = wqe->wr.wr.reg.key;
-			rmr->iova = wqe->wr.wr.reg.mr->iova;
-			wqe->state = wqe_state_done;
-			wqe->status = IB_WC_SUCCESS;
-		} else {
+		ret = do_local_ops(qp, wqe);
+		if (unlikely(ret))
 			goto exit;
-		}
-		if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-		    qp->sq_sig_type == IB_SIGNAL_ALL_WR)
-			rxe_run_task(&qp->comp.task, 1);
-		qp->req.wqe_index = next_index(qp->sq.queue,
-						qp->req.wqe_index);
-		goto next_wqe;
+		else
+			goto next_wqe;
 	}
 
 	if (unlikely(qp_type(qp) == IB_QPT_RC &&
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
                   ` (5 preceding siblings ...)
  2021-04-15  2:54 ` [PATCH for-next v2 6/9] RDMA/rxe: Move local ops to subroutine Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-20 14:32   ` Zhu Yanjun
  2021-04-15  2:54 ` [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations Bob Pearson
  2021-04-15  2:54 ` [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs Bob Pearson
  8 siblings, 1 reply; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add support for bind MW work requests from user space.
Since rdma/core does not support bind mw in ib_send_wr
there is no way to support bind mw in kernel space.

Added bind_mw local operation in rxe_req.c
Added bind_mw WR operation in rxe_opcode.c
Added bind_mw WC in rxe_comp.c
Added additional fields to rxe_mw in rxe_verbs.h
Added do_dealloc_mw() subroutine to cleanup an mw
when rxe_dealloc_mw is called.
Added code to implement bind_mw operation in rxe_mw.c

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
v2:
  Dropped kernel support for bind_mw in rxe_mw.c
  Replaced umw with mw in rxe_send_wr.

 drivers/infiniband/sw/rxe/rxe_comp.c   |   1 +
 drivers/infiniband/sw/rxe/rxe_loc.h    |   1 +
 drivers/infiniband/sw/rxe/rxe_mw.c     | 204 ++++++++++++++++++++++++-
 drivers/infiniband/sw/rxe/rxe_opcode.c |   7 +
 drivers/infiniband/sw/rxe/rxe_req.c    |   9 ++
 drivers/infiniband/sw/rxe/rxe_verbs.h  |  15 +-
 6 files changed, 232 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 2af26737d32d..bc5488af5f55 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -103,6 +103,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
 	case IB_WR_RDMA_READ_WITH_INV:		return IB_WC_RDMA_READ;
 	case IB_WR_LOCAL_INV:			return IB_WC_LOCAL_INV;
 	case IB_WR_REG_MR:			return IB_WC_REG_MR;
+	case IB_WR_BIND_MW:			return IB_WC_BIND_MW;
 
 	default:
 		return 0xff;
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index edf575930a98..e6f574973298 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -110,6 +110,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
 /* rxe_mw.c */
 int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
 int rxe_dealloc_mw(struct ib_mw *ibmw);
+int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
 void rxe_mw_cleanup(struct rxe_pool_entry *arg);
 
 /* rxe_net.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 69128e298d44..6ced54126b72 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -29,6 +29,29 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 	return 0;
 }
 
+static void do_dealloc_mw(struct rxe_mw *mw)
+{
+	if (mw->mr) {
+		struct rxe_mr *mr = mw->mr;
+
+		mw->mr = NULL;
+		atomic_dec(&mr->num_mw);
+		rxe_drop_ref(mr);
+	}
+
+	if (mw->qp) {
+		struct rxe_qp *qp = mw->qp;
+
+		mw->qp = NULL;
+		rxe_drop_ref(qp);
+	}
+
+	mw->access = 0;
+	mw->addr = 0;
+	mw->length = 0;
+	mw->state = RXE_MW_STATE_INVALID;
+}
+
 int rxe_dealloc_mw(struct ib_mw *ibmw)
 {
 	struct rxe_mw *mw = to_rmw(ibmw);
@@ -36,7 +59,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
 	unsigned long flags;
 
 	spin_lock_irqsave(&mw->lock, flags);
-	mw->state = RXE_MW_STATE_INVALID;
+	do_dealloc_mw(mw);
 	spin_unlock_irqrestore(&mw->lock, flags);
 
 	rxe_drop_ref(mw);
@@ -45,6 +68,185 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
 	return 0;
 }
 
+static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+			 struct rxe_mw *mw, struct rxe_mr *mr)
+{
+	if (mw->ibmw.type == IB_MW_TYPE_1) {
+		if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
+			pr_err_once(
+				"attempt to bind a type 1 MW not in the valid state\n");
+			return -EINVAL;
+		}
+
+		/* o10-36.2.2 */
+		if (unlikely((mw->access & IB_ZERO_BASED))) {
+			pr_err_once("attempt to bind a zero based type 1 MW\n");
+			return -EINVAL;
+		}
+	}
+
+	if (mw->ibmw.type == IB_MW_TYPE_2) {
+		/* o10-37.2.30 */
+		if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
+			pr_err_once(
+				"attempt to bind a type 2 MW not in the free state\n");
+			return -EINVAL;
+		}
+
+		/* C10-72 */
+		if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
+			pr_err_once(
+				"attempt to bind type 2 MW with qp with different PD\n");
+			return -EINVAL;
+		}
+
+		/* o10-37.2.40 */
+		if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
+			pr_err_once(
+				"attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
+			return -EINVAL;
+		}
+	}
+
+	if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
+		pr_err_once("attempt to bind MW with same key\n");
+		return -EINVAL;
+	}
+
+	/* remaining checks only apply to a nonzero MR */
+	if (!mr)
+		return 0;
+
+	if (unlikely(mr->access & IB_ZERO_BASED)) {
+		pr_err_once("attempt to bind MW to zero based MR\n");
+		return -EINVAL;
+	}
+
+	/* C10-73 */
+	if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
+		pr_err_once(
+			"attempt to bind an MW to an MR without bind access\n");
+		return -EINVAL;
+	}
+
+	/* C10-74 */
+	if (unlikely((mw->access &
+		      (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
+		     !(mr->access & IB_ACCESS_LOCAL_WRITE))) {
+		pr_err_once(
+			"attempt to bind an writeable MW to an MR without local write access\n");
+		return -EINVAL;
+	}
+
+	/* C10-75 */
+	if (mw->access & IB_ZERO_BASED) {
+		if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
+			pr_err_once(
+				"attempt to bind a ZB MW outside of the MR\n");
+			return -EINVAL;
+		}
+	} else {
+		if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
+			     ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
+			      (mr->iova + mr->length)))) {
+			pr_err_once(
+				"attempt to bind a VA MW outside of the MR\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+		      struct rxe_mw *mw, struct rxe_mr *mr)
+{
+	u32 rkey;
+	u32 new_rkey;
+
+	rkey = mw->ibmw.rkey;
+	new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
+
+	mw->ibmw.rkey = new_rkey;
+	mw->access = wqe->wr.wr.mw.access;
+	mw->state = RXE_MW_STATE_VALID;
+	mw->addr = wqe->wr.wr.mw.addr;
+	mw->length = wqe->wr.wr.mw.length;
+
+	if (mw->mr) {
+		rxe_drop_ref(mw->mr);
+		atomic_dec(&mw->mr->num_mw);
+		mw->mr = NULL;
+	}
+
+	if (mw->length) {
+		mw->mr = mr;
+		atomic_inc(&mr->num_mw);
+		rxe_add_ref(mr);
+	}
+
+	if (mw->ibmw.type == IB_MW_TYPE_2) {
+		rxe_add_ref(qp);
+		mw->qp = qp;
+	}
+
+	return 0;
+}
+
+int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+	int ret;
+	struct rxe_mw *mw;
+	struct rxe_mr *mr;
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	unsigned long flags;
+
+	mw = rxe_pool_get_index(&rxe->mw_pool,
+				wqe->wr.wr.mw.mw_rkey >> 8);
+	if (unlikely(!mw)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) {
+		ret = -EINVAL;
+		goto err_drop_mw;
+	}
+
+	if (likely(wqe->wr.wr.mw.length)) {
+		mr = rxe_pool_get_index(&rxe->mr_pool,
+					wqe->wr.wr.mw.mr_lkey >> 8);
+		if (unlikely(!mr)) {
+			ret = -EINVAL;
+			goto err_drop_mw;
+		}
+
+		if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) {
+			ret = -EINVAL;
+			goto err_drop_mr;
+		}
+	} else {
+		mr = NULL;
+	}
+
+	spin_lock_irqsave(&mw->lock, flags);
+
+	ret = check_bind_mw(qp, wqe, mw, mr);
+	if (ret)
+		goto err_unlock;
+
+	ret = do_bind_mw(qp, wqe, mw, mr);
+err_unlock:
+	spin_unlock_irqrestore(&mw->lock, flags);
+err_drop_mr:
+	if (mr)
+		rxe_drop_ref(mr);
+err_drop_mw:
+	rxe_drop_ref(mw);
+err:
+	return ret;
+}
+
 void rxe_mw_cleanup(struct rxe_pool_entry *elem)
 {
 	struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 1e4b67b048f3..3ef5a10a6efd 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -96,6 +96,13 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
 		},
 	},
+	[IB_WR_BIND_MW]					= {
+		.name	= "IB_WR_BIND_MW",
+		.mask	= {
+			[IB_QPT_RC]	= WR_LOCAL_OP_MASK,
+			[IB_QPT_UC]	= WR_LOCAL_OP_MASK,
+		},
+	},
 };
 
 struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 0cf97e3db29f..243602584a28 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -561,6 +561,7 @@ static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 	struct rxe_dev *rxe;
 	struct rxe_mr *mr;
 	u32 rkey;
+	int ret;
 
 	switch (opcode) {
 	case IB_WR_LOCAL_INV:
@@ -587,6 +588,14 @@ static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 		mr->iova = wqe->wr.wr.reg.mr->iova;
 		rxe_drop_ref(mr);
 		break;
+	case IB_WR_BIND_MW:
+		ret = rxe_bind_mw(qp, wqe);
+		if (ret) {
+			wqe->state = wqe_state_error;
+			wqe->status = IB_WC_MW_BIND_ERR;
+			return -EINVAL;
+		}
+		break;
 	default:
 		pr_err("Unexpected send wqe opcode %d\n", opcode);
 		wqe->state = wqe_state_error;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index c8597ae8c833..7da47b8c707b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -312,6 +312,8 @@ struct rxe_mr {
 	u32			num_map;
 
 	struct rxe_map		**map;
+
+	atomic_t		num_mw;
 };
 
 enum rxe_mw_state {
@@ -321,10 +323,15 @@ enum rxe_mw_state {
 };
 
 struct rxe_mw {
-	struct ib_mw ibmw;
-	struct rxe_pool_entry pelem;
-	spinlock_t lock;
-	enum rxe_mw_state state;
+	struct			ib_mw ibmw;
+	struct			rxe_pool_entry pelem;
+	spinlock_t		lock;
+	enum rxe_mw_state	state;
+	struct rxe_qp		*qp;	/* Type 2 only */
+	struct rxe_mr		*mr;
+	int			access;
+	u64			addr;
+	u64			length;
 };
 
 struct rxe_mc_grp {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
                   ` (6 preceding siblings ...)
  2021-04-15  2:54 ` [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-20  6:38   ` Zhu Yanjun
  2021-04-15  2:54 ` [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs Bob Pearson
  8 siblings, 1 reply; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Implement invalidate MW and cleaned up invalidate MR operations.

Added code to perform remote invalidate for send with invalidate.
Added code to perform local invalidation.
Deleted some blank lines in rxe_loc.h.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h   | 23 ++++-----
 drivers/infiniband/sw/rxe/rxe_mr.c    | 59 +++++++++++++++++------
 drivers/infiniband/sw/rxe/rxe_mw.c    | 67 +++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_req.c   | 22 +++++----
 drivers/infiniband/sw/rxe/rxe_resp.c  | 52 +++++++++++++--------
 drivers/infiniband/sw/rxe/rxe_verbs.h | 23 +++++----
 6 files changed, 178 insertions(+), 68 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index e6f574973298..7f1117c51e30 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -76,41 +76,34 @@ enum copy_direction {
 	from_mr_obj,
 };
 
+enum lookup_type {
+	lookup_local,
+	lookup_remote,
+};
+
 u8 rxe_get_next_key(u32 last_key);
 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
-
 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 		     int access, struct ib_udata *udata, struct rxe_mr *mr);
-
 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
-
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum copy_direction dir, u32 *crcp);
-
 int copy_data(struct rxe_pd *pd, int access,
 	      struct rxe_dma_info *dma, void *addr, int length,
 	      enum copy_direction dir, u32 *crcp);
-
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
-
-enum lookup_type {
-	lookup_local,
-	lookup_remote,
-};
-
 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 			 enum lookup_type type);
-
 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
-
-void rxe_mr_cleanup(struct rxe_pool_entry *arg);
-
 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
+void rxe_mr_cleanup(struct rxe_pool_entry *arg);
 
 /* rxe_mw.c */
 int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
 int rxe_dealloc_mw(struct ib_mw *ibmw);
 int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
+int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
 void rxe_mw_cleanup(struct rxe_pool_entry *arg);
 
 /* rxe_net.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 7f2cfc1ce659..0f1791ed0350 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -55,21 +55,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
 }
 
-void rxe_mr_cleanup(struct rxe_pool_entry *arg)
-{
-	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
-	int i;
-
-	ib_umem_release(mr->umem);
-
-	if (mr->map) {
-		for (i = 0; i < mr->num_map; i++)
-			kfree(mr->map[i]);
-
-		kfree(mr->map);
-	}
-}
-
 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
 {
 	int i;
@@ -540,3 +525,47 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 
 	return mr;
 }
+
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
+{
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	struct rxe_mr *mr;
+	int ret;
+
+	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+	if (!mr) {
+		pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (rkey != mr->ibmr.rkey) {
+		pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
+			__func__, rkey, mr->ibmr.rkey);
+		ret = -EINVAL;
+		goto err_drop_ref;
+	}
+
+	mr->state = RXE_MR_STATE_FREE;
+	ret = 0;
+
+err_drop_ref:
+	rxe_drop_ref(mr);
+err:
+	return ret;
+}
+
+void rxe_mr_cleanup(struct rxe_pool_entry *arg)
+{
+	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
+	int i;
+
+	ib_umem_release(mr->umem);
+
+	if (mr->map) {
+		for (i = 0; i < mr->num_map; i++)
+			kfree(mr->map[i]);
+
+		kfree(mr->map);
+	}
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 6ced54126b72..4c1830b4a8bf 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -247,6 +247,73 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 	return ret;
 }
 
+static int check_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
+{
+	if (unlikely(mw->state == RXE_MW_STATE_INVALID))
+		return -EINVAL;
+
+	/* o10-37.2.26 */
+	if (unlikely(mw->ibmw.type == IB_MW_TYPE_1))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void do_invalidate_mw(struct rxe_mw *mw)
+{
+	struct rxe_qp *qp;
+	struct rxe_mr *mr;
+
+	/* valid type 2 MW will always have a QP pointer */
+	qp = mw->qp;
+	mw->qp = NULL;
+	rxe_drop_ref(qp);
+
+	/* valid type 2 MW will always have an MR pointer */
+	mr = mw->mr;
+	mw->mr = NULL;
+	atomic_dec(&mr->num_mw);
+	rxe_drop_ref(mr);
+
+	mw->access = 0;
+	mw->addr = 0;
+	mw->length = 0;
+	mw->state = RXE_MW_STATE_FREE;
+}
+
+int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
+{
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	unsigned long flags;
+	struct rxe_mw *mw;
+	int ret;
+
+	mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
+	if (!mw) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (rkey != mw->ibmw.rkey) {
+		ret = -EINVAL;
+		goto err_drop_ref;
+	}
+
+	spin_lock_irqsave(&mw->lock, flags);
+
+	ret = check_invalidate_mw(qp, mw);
+	if (ret)
+		goto err_unlock;
+
+	do_invalidate_mw(mw);
+err_unlock:
+	spin_unlock_irqrestore(&mw->lock, flags);
+err_drop_ref:
+	rxe_drop_ref(mw);
+err:
+	return ret;
+}
+
 void rxe_mw_cleanup(struct rxe_pool_entry *elem)
 {
 	struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 243602584a28..66fc208d0ec1 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -558,25 +558,25 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 {
 	u8 opcode = wqe->wr.opcode;
-	struct rxe_dev *rxe;
 	struct rxe_mr *mr;
-	u32 rkey;
 	int ret;
+	u32 rkey;
 
 	switch (opcode) {
 	case IB_WR_LOCAL_INV:
-		rxe = to_rdev(qp->ibqp.device);
 		rkey = wqe->wr.ex.invalidate_rkey;
-		mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
-		if (!mr) {
-			pr_err("No MR for rkey %#x\n", rkey);
+		if (rkey_is_mw(rkey))
+			ret = rxe_invalidate_mw(qp, rkey);
+		else
+			ret = rxe_invalidate_mr(qp, rkey);
+
+		if (ret) {
 			wqe->state = wqe_state_error;
 			wqe->status = IB_WC_LOC_QP_OP_ERR;
-			return -EINVAL;
+			return ret;
 		}
-		mr->state = RXE_MR_STATE_FREE;
-		rxe_drop_ref(mr);
 		break;
+
 	case IB_WR_REG_MR:
 		mr = to_rmr(wqe->wr.wr.reg.mr);
 
@@ -588,14 +588,16 @@ static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 		mr->iova = wqe->wr.wr.reg.mr->iova;
 		rxe_drop_ref(mr);
 		break;
+
 	case IB_WR_BIND_MW:
 		ret = rxe_bind_mw(qp, wqe);
 		if (ret) {
 			wqe->state = wqe_state_error;
 			wqe->status = IB_WC_MW_BIND_ERR;
-			return -EINVAL;
+			return ret;
 		}
 		break;
+
 	default:
 		pr_err("Unexpected send wqe opcode %d\n", opcode);
 		wqe->state = wqe_state_error;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 2b220659bddb..21adc9209107 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -35,6 +35,7 @@ enum resp_states {
 	RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
 	RESPST_ERR_RNR,
 	RESPST_ERR_RKEY_VIOLATION,
+	RESPST_ERR_INVALIDATE_RKEY,
 	RESPST_ERR_LENGTH,
 	RESPST_ERR_CQ_OVERFLOW,
 	RESPST_ERROR,
@@ -68,6 +69,7 @@ static char *resp_state_name[] = {
 	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
 	[RESPST_ERR_RNR]			= "ERR_RNR",
 	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
+	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
 	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
 	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
 	[RESPST_ERROR]				= "ERROR",
@@ -751,6 +753,14 @@ static void build_rdma_network_hdr(union rdma_network_hdr *hdr,
 		memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh));
 }
 
+static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
+{
+	if (rkey_is_mw(rkey))
+		return rxe_invalidate_mw(qp, rkey);
+	else
+		return rxe_invalidate_mr(qp, rkey);
+}
+
 /* Executes a new request. A retried request never reach that function (send
  * and writes are discarded, and reads and atomics are retried elsewhere.
  */
@@ -790,6 +800,14 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 		WARN_ON_ONCE(1);
 	}
 
+	if (pkt->mask & RXE_IETH_MASK) {
+		u32 rkey = ieth_rkey(pkt);
+
+		err = invalidate_rkey(qp, rkey);
+		if (err)
+			return RESPST_ERR_INVALIDATE_RKEY;
+	}
+
 	/* next expected psn, read handles this separately */
 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 	qp->resp.ack_psn = qp->resp.psn;
@@ -822,13 +840,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 	memset(&cqe, 0, sizeof(cqe));
 
 	if (qp->rcq->is_user) {
-		uwc->status             = qp->resp.status;
-		uwc->qp_num             = qp->ibqp.qp_num;
-		uwc->wr_id              = wqe->wr_id;
+		uwc->status		= qp->resp.status;
+		uwc->qp_num		= qp->ibqp.qp_num;
+		uwc->wr_id		= wqe->wr_id;
 	} else {
-		wc->status              = qp->resp.status;
-		wc->qp                  = &qp->ibqp;
-		wc->wr_id               = wqe->wr_id;
+		wc->status		= qp->resp.status;
+		wc->qp			= &qp->ibqp;
+		wc->wr_id		= wqe->wr_id;
 	}
 
 	if (wc->status == IB_WC_SUCCESS) {
@@ -883,27 +901,14 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 			}
 
 			if (pkt->mask & RXE_IETH_MASK) {
-				struct rxe_mr *rmr;
-
 				wc->wc_flags |= IB_WC_WITH_INVALIDATE;
 				wc->ex.invalidate_rkey = ieth_rkey(pkt);
-
-				rmr = rxe_pool_get_index(&rxe->mr_pool,
-							 wc->ex.invalidate_rkey >> 8);
-				if (unlikely(!rmr)) {
-					pr_err("Bad rkey %#x invalidation\n",
-					       wc->ex.invalidate_rkey);
-					return RESPST_ERROR;
-				}
-				rmr->state = RXE_MR_STATE_FREE;
-				rxe_drop_ref(rmr);
 			}
 
-			wc->qp			= &qp->ibqp;
-
 			if (pkt->mask & RXE_DETH_MASK)
 				wc->src_qp = deth_sqp(pkt);
 
+			wc->qp			= &qp->ibqp;
 			wc->port_num		= qp->attr.port_num;
 		}
 	}
@@ -1314,6 +1319,13 @@ int rxe_responder(void *arg)
 			}
 			break;
 
+		case RESPST_ERR_INVALIDATE_RKEY:
+			/* RC - Class J. */
+			qp->resp.goto_error = 1;
+			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
+			state = RESPST_COMPLETE;
+			break;
+
 		case RESPST_ERR_LENGTH:
 			if (qp_type(qp) == IB_QPT_RC) {
 				/* Class C */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 7da47b8c707b..b286a14ec282 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -286,6 +286,13 @@ struct rxe_map {
 	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
 };
 
+static inline int rkey_is_mw(u32 rkey)
+{
+	u32 index = rkey >> 8;
+
+	return (index >= RXE_MIN_MW_INDEX) && (index <= RXE_MAX_MW_INDEX);
+}
+
 struct rxe_mr {
 	struct rxe_pool_entry	pelem;
 	struct ib_mr		ibmr;
@@ -311,23 +318,23 @@ struct rxe_mr {
 	u32			max_buf;
 	u32			num_map;
 
-	struct rxe_map		**map;
-
 	atomic_t		num_mw;
+
+	struct rxe_map		**map;
 };
 
 enum rxe_mw_state {
-	RXE_MW_STATE_INVALID = RXE_MR_STATE_INVALID,
-	RXE_MW_STATE_FREE = RXE_MR_STATE_FREE,
-	RXE_MW_STATE_VALID = RXE_MR_STATE_VALID,
+	RXE_MW_STATE_INVALID	= RXE_MR_STATE_INVALID,
+	RXE_MW_STATE_FREE	= RXE_MR_STATE_FREE,
+	RXE_MW_STATE_VALID	= RXE_MR_STATE_VALID,
 };
 
 struct rxe_mw {
-	struct			ib_mw ibmw;
-	struct			rxe_pool_entry pelem;
+	struct ib_mw		ibmw;
+	struct rxe_pool_entry	pelem;
 	spinlock_t		lock;
 	enum rxe_mw_state	state;
-	struct rxe_qp		*qp;	/* Type 2 only */
+	struct rxe_qp		*qp; /* Type 2 only */
 	struct rxe_mr		*mr;
 	int			access;
 	u64			addr;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs
  2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
                   ` (7 preceding siblings ...)
  2021-04-15  2:54 ` [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations Bob Pearson
@ 2021-04-15  2:54 ` Bob Pearson
  2021-04-20  6:34   ` Zhu Yanjun
  8 siblings, 1 reply; 17+ messages in thread
From: Bob Pearson @ 2021-04-15  2:54 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Add code to implement memory access through memory windows.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
v2:
  Removed a copy of changes in ea4922518940 "Fix missing acks from responder"
  that was submitted separately.

 drivers/infiniband/sw/rxe/rxe_loc.h   |  1 +
 drivers/infiniband/sw/rxe/rxe_mw.c    | 23 +++++++++++
 drivers/infiniband/sw/rxe/rxe_resp.c  | 55 +++++++++++++++++++--------
 drivers/infiniband/sw/rxe/rxe_verbs.h | 11 ++++++
 4 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 7f1117c51e30..99158d11dae7 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -104,6 +104,7 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
 int rxe_dealloc_mw(struct ib_mw *ibmw);
 int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
 int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
+struct rxe_mw *lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
 void rxe_mw_cleanup(struct rxe_pool_entry *arg);
 
 /* rxe_net.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 4c1830b4a8bf..e443e4672e00 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -314,6 +314,29 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
 	return ret;
 }
 
+struct rxe_mw *lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
+{
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	struct rxe_pd *pd = to_rpd(qp->ibqp.pd);
+	struct rxe_mw *mw;
+	int index = rkey >> 8;
+
+	mw = rxe_pool_get_index(&rxe->mw_pool, index);
+	if (!mw)
+		return NULL;
+
+	if (unlikely((mw_rkey(mw) != rkey) || mw_pd(mw) != pd ||
+		     (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
+		     (mw->length == 0) ||
+		     (access && !(access & mw->access)) ||
+		     mw->state != RXE_MW_STATE_VALID)) {
+		rxe_drop_ref(mw);
+		return NULL;
+	}
+
+	return mw;
+}
+
 void rxe_mw_cleanup(struct rxe_pool_entry *elem)
 {
 	struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 21adc9209107..9410b8576abe 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -394,6 +394,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 				   struct rxe_pkt_info *pkt)
 {
 	struct rxe_mr *mr = NULL;
+	struct rxe_mw *mw = NULL;
 	u64 va;
 	u32 rkey;
 	u32 resid;
@@ -405,6 +406,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
 		if (pkt->mask & RXE_RETH_MASK) {
 			qp->resp.va = reth_va(pkt);
+			qp->resp.offset = 0;
 			qp->resp.rkey = reth_rkey(pkt);
 			qp->resp.resid = reth_len(pkt);
 			qp->resp.length = reth_len(pkt);
@@ -413,6 +415,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 						     : IB_ACCESS_REMOTE_WRITE;
 	} else if (pkt->mask & RXE_ATOMIC_MASK) {
 		qp->resp.va = atmeth_va(pkt);
+		qp->resp.offset = 0;
 		qp->resp.rkey = atmeth_rkey(pkt);
 		qp->resp.resid = sizeof(u64);
 		access = IB_ACCESS_REMOTE_ATOMIC;
@@ -432,18 +435,36 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	resid	= qp->resp.resid;
 	pktlen	= payload_size(pkt);
 
-	mr = lookup_mr(qp->pd, access, rkey, lookup_remote);
-	if (!mr) {
-		state = RESPST_ERR_RKEY_VIOLATION;
-		goto err;
-	}
+	if (rkey_is_mw(rkey)) {
+		mw = lookup_mw(qp, access, rkey);
+		if (!mw) {
+			pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+			state = RESPST_ERR_RKEY_VIOLATION;
+			goto err;
+		}
 
-	if (unlikely(mr->state == RXE_MR_STATE_FREE)) {
-		state = RESPST_ERR_RKEY_VIOLATION;
-		goto err;
+		mr = mw->mr;
+		if (!mr) {
+			pr_err("%s: MW doesn't have an MR\n", __func__);
+			state = RESPST_ERR_RKEY_VIOLATION;
+			goto err;
+		}
+
+		if (mw->access & IB_ZERO_BASED)
+			qp->resp.offset = mw->addr;
+
+		rxe_drop_ref(mw);
+		rxe_add_ref(mr);
+	} else {
+		mr = lookup_mr(qp->pd, access, rkey, lookup_remote);
+		if (!mr) {
+			pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+			state = RESPST_ERR_RKEY_VIOLATION;
+			goto err;
+		}
 	}
 
-	if (mr_check_range(mr, va, resid)) {
+	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
 		state = RESPST_ERR_RKEY_VIOLATION;
 		goto err;
 	}
@@ -477,6 +498,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 err:
 	if (mr)
 		rxe_drop_ref(mr);
+	if (mw)
+		rxe_drop_ref(mw);
+
 	return state;
 }
 
@@ -501,8 +525,8 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
 	int	err;
 	int data_len = payload_size(pkt);
 
-	err = rxe_mr_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), data_len,
-			  to_mr_obj, NULL);
+	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
+			  payload_addr(pkt), data_len, to_mr_obj, NULL);
 	if (err) {
 		rc = RESPST_ERR_RKEY_VIOLATION;
 		goto out;
@@ -521,7 +545,6 @@ static DEFINE_SPINLOCK(atomic_ops_lock);
 static enum resp_states process_atomic(struct rxe_qp *qp,
 				       struct rxe_pkt_info *pkt)
 {
-	u64 iova = atmeth_va(pkt);
 	u64 *vaddr;
 	enum resp_states ret;
 	struct rxe_mr *mr = qp->resp.mr;
@@ -531,7 +554,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp,
 		goto out;
 	}
 
-	vaddr = iova_to_vaddr(mr, iova, sizeof(u64));
+	vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
 
 	/* check vaddr is 8 bytes aligned. */
 	if (!vaddr || (uintptr_t)vaddr & 7) {
@@ -655,8 +678,10 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 		res->type		= RXE_READ_MASK;
 		res->replay		= 0;
 
-		res->read.va		= qp->resp.va;
-		res->read.va_org	= qp->resp.va;
+		res->read.va		= qp->resp.va +
+					  qp->resp.offset;
+		res->read.va_org	= qp->resp.va +
+					  qp->resp.offset;
 
 		res->first_psn		= req_pkt->psn;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index b286a14ec282..9f35e2c042d0 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -183,6 +183,7 @@ struct rxe_resp_info {
 
 	/* RDMA read / atomic only */
 	u64			va;
+	u64			offset;
 	struct rxe_mr		*mr;
 	u32			resid;
 	u32			rkey;
@@ -470,6 +471,16 @@ static inline u32 mr_rkey(struct rxe_mr *mr)
 	return mr->ibmr.rkey;
 }
 
+static inline struct rxe_pd *mw_pd(struct rxe_mw *mw)
+{
+	return to_rpd(mw->ibmw.pd);
+}
+
+static inline u32 mw_rkey(struct rxe_mw *mw)
+{
+	return mw->ibmw.rkey;
+}
+
 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
 
 void rxe_mc_cleanup(struct rxe_pool_entry *arg);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs
  2021-04-15  2:54 ` [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs Bob Pearson
@ 2021-04-20  6:34   ` Zhu Yanjun
  2021-04-20 12:04     ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Zhu Yanjun @ 2021-04-20  6:34 UTC (permalink / raw)
  To: Bob Pearson; +Cc: Jason Gunthorpe, RDMA mailing list, Bob Pearson

On Thu, Apr 15, 2021 at 10:55 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> Add code to implement memory access through memory windows.
>
> Signed-off-by: Bob Pearson <rpearson@hpe.com>
> ---
> v2:
>   Removed a copy of changes in ea4922518940 "Fix missing acks from responder"
>   that was submitted separately.
>
>  drivers/infiniband/sw/rxe/rxe_loc.h   |  1 +
>  drivers/infiniband/sw/rxe/rxe_mw.c    | 23 +++++++++++
>  drivers/infiniband/sw/rxe/rxe_resp.c  | 55 +++++++++++++++++++--------
>  drivers/infiniband/sw/rxe/rxe_verbs.h | 11 ++++++
>  4 files changed, 75 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 7f1117c51e30..99158d11dae7 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -104,6 +104,7 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
>  int rxe_dealloc_mw(struct ib_mw *ibmw);
>  int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
>  int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
> +struct rxe_mw *lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
>  void rxe_mw_cleanup(struct rxe_pool_entry *arg);
>
>  /* rxe_net.c */
> diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
> index 4c1830b4a8bf..e443e4672e00 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mw.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mw.c
> @@ -314,6 +314,29 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
>         return ret;
>  }
>
> +struct rxe_mw *lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
> +{
> +       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
> +       struct rxe_pd *pd = to_rpd(qp->ibqp.pd);
> +       struct rxe_mw *mw;
> +       int index = rkey >> 8;
> +
> +       mw = rxe_pool_get_index(&rxe->mw_pool, index);
> +       if (!mw)
> +               return NULL;
> +
> +       if (unlikely((mw_rkey(mw) != rkey) || mw_pd(mw) != pd ||
> +                    (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
> +                    (mw->length == 0) ||
> +                    (access && !(access & mw->access)) ||
> +                    mw->state != RXE_MW_STATE_VALID)) {
> +               rxe_drop_ref(mw);
> +               return NULL;
> +       }
> +
> +       return mw;
> +}
> +
>  void rxe_mw_cleanup(struct rxe_pool_entry *elem)
>  {
>         struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> index 21adc9209107..9410b8576abe 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -394,6 +394,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>                                    struct rxe_pkt_info *pkt)
>  {
>         struct rxe_mr *mr = NULL;
> +       struct rxe_mw *mw = NULL;
>         u64 va;
>         u32 rkey;
>         u32 resid;
> @@ -405,6 +406,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>         if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
>                 if (pkt->mask & RXE_RETH_MASK) {
>                         qp->resp.va = reth_va(pkt);
> +                       qp->resp.offset = 0;
>                         qp->resp.rkey = reth_rkey(pkt);
>                         qp->resp.resid = reth_len(pkt);
>                         qp->resp.length = reth_len(pkt);
> @@ -413,6 +415,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>                                                      : IB_ACCESS_REMOTE_WRITE;
>         } else if (pkt->mask & RXE_ATOMIC_MASK) {
>                 qp->resp.va = atmeth_va(pkt);
> +               qp->resp.offset = 0;
>                 qp->resp.rkey = atmeth_rkey(pkt);
>                 qp->resp.resid = sizeof(u64);
>                 access = IB_ACCESS_REMOTE_ATOMIC;
> @@ -432,18 +435,36 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>         resid   = qp->resp.resid;
>         pktlen  = payload_size(pkt);
>
> -       mr = lookup_mr(qp->pd, access, rkey, lookup_remote);
> -       if (!mr) {
> -               state = RESPST_ERR_RKEY_VIOLATION;
> -               goto err;
> -       }
> +       if (rkey_is_mw(rkey)) {
> +               mw = lookup_mw(qp, access, rkey);
> +               if (!mw) {
> +                       pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
> +                       state = RESPST_ERR_RKEY_VIOLATION;
> +                       goto err;
> +               }
>
> -       if (unlikely(mr->state == RXE_MR_STATE_FREE)) {
> -               state = RESPST_ERR_RKEY_VIOLATION;
> -               goto err;
> +               mr = mw->mr;
> +               if (!mr) {
> +                       pr_err("%s: MW doesn't have an MR\n", __func__);
> +                       state = RESPST_ERR_RKEY_VIOLATION;
> +                       goto err;
> +               }
> +
> +               if (mw->access & IB_ZERO_BASED)
> +                       qp->resp.offset = mw->addr;
> +
> +               rxe_drop_ref(mw);
> +               rxe_add_ref(mr);
> +       } else {
> +               mr = lookup_mr(qp->pd, access, rkey, lookup_remote);
> +               if (!mr) {
> +                       pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
> +                       state = RESPST_ERR_RKEY_VIOLATION;
> +                       goto err;
> +               }
>         }
>
> -       if (mr_check_range(mr, va, resid)) {
> +       if (mr_check_range(mr, va + qp->resp.offset, resid)) {
>                 state = RESPST_ERR_RKEY_VIOLATION;
>                 goto err;
>         }
> @@ -477,6 +498,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>  err:
>         if (mr)
>                 rxe_drop_ref(mr);
> +       if (mw)
> +               rxe_drop_ref(mw);
> +
>         return state;
>  }
>
> @@ -501,8 +525,8 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
>         int     err;
>         int data_len = payload_size(pkt);
>
> -       err = rxe_mr_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), data_len,
> -                         to_mr_obj, NULL);
> +       err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
> +                         payload_addr(pkt), data_len, to_mr_obj, NULL);
>         if (err) {
>                 rc = RESPST_ERR_RKEY_VIOLATION;
>                 goto out;
> @@ -521,7 +545,6 @@ static DEFINE_SPINLOCK(atomic_ops_lock);
>  static enum resp_states process_atomic(struct rxe_qp *qp,
>                                        struct rxe_pkt_info *pkt)
>  {
> -       u64 iova = atmeth_va(pkt);
>         u64 *vaddr;
>         enum resp_states ret;
>         struct rxe_mr *mr = qp->resp.mr;
> @@ -531,7 +554,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp,
>                 goto out;
>         }
>
> -       vaddr = iova_to_vaddr(mr, iova, sizeof(u64));
> +       vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
>
>         /* check vaddr is 8 bytes aligned. */
>         if (!vaddr || (uintptr_t)vaddr & 7) {
> @@ -655,8 +678,10 @@ static enum resp_states read_reply(struct rxe_qp *qp,
>                 res->type               = RXE_READ_MASK;
>                 res->replay             = 0;
>
> -               res->read.va            = qp->resp.va;
> -               res->read.va_org        = qp->resp.va;
> +               res->read.va            = qp->resp.va +
> +                                         qp->resp.offset;
> +               res->read.va_org        = qp->resp.va +
> +                                         qp->resp.offset;
>
>                 res->first_psn          = req_pkt->psn;
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index b286a14ec282..9f35e2c042d0 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -183,6 +183,7 @@ struct rxe_resp_info {
>
>         /* RDMA read / atomic only */
>         u64                     va;
> +       u64                     offset;
>         struct rxe_mr           *mr;
>         u32                     resid;
>         u32                     rkey;
> @@ -470,6 +471,16 @@ static inline u32 mr_rkey(struct rxe_mr *mr)
>         return mr->ibmr.rkey;
>  }
>
> +static inline struct rxe_pd *mw_pd(struct rxe_mw *mw)

inline
Can we remove inline keyword and let the compile to decide it?

> +{
> +       return to_rpd(mw->ibmw.pd);
> +}
> +
> +static inline u32 mw_rkey(struct rxe_mw *mw)

inline
the same.

Zhu Yanjun
> +{
> +       return mw->ibmw.rkey;
> +}
> +
>  int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
>
>  void rxe_mc_cleanup(struct rxe_pool_entry *arg);
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations
  2021-04-15  2:54 ` [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations Bob Pearson
@ 2021-04-20  6:38   ` Zhu Yanjun
  2021-04-21  4:22     ` Bob Pearson
  0 siblings, 1 reply; 17+ messages in thread
From: Zhu Yanjun @ 2021-04-20  6:38 UTC (permalink / raw)
  To: Bob Pearson; +Cc: Jason Gunthorpe, RDMA mailing list, Bob Pearson

On Thu, Apr 15, 2021 at 10:55 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> Implement invalidate MW and cleaned up invalidate MR operations.
>
> Added code to perform remote invalidate for send with invalidate.
> Added code to perform local invalidation.
> Deleted some blank lines in rxe_loc.h.
>
> Signed-off-by: Bob Pearson <rpearson@hpe.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_loc.h   | 23 ++++-----
>  drivers/infiniband/sw/rxe/rxe_mr.c    | 59 +++++++++++++++++------
>  drivers/infiniband/sw/rxe/rxe_mw.c    | 67 +++++++++++++++++++++++++++
>  drivers/infiniband/sw/rxe/rxe_req.c   | 22 +++++----
>  drivers/infiniband/sw/rxe/rxe_resp.c  | 52 +++++++++++++--------
>  drivers/infiniband/sw/rxe/rxe_verbs.h | 23 +++++----
>  6 files changed, 178 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index e6f574973298..7f1117c51e30 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -76,41 +76,34 @@ enum copy_direction {
>         from_mr_obj,
>  };
>
> +enum lookup_type {
> +       lookup_local,
> +       lookup_remote,
> +};
> +

https://www.kernel.org/doc/Documentation/process/coding-style.rst
"

12) Macros, Enums and RTL
-------------------------

Names of macros defining constants and labels in enums are capitalized.

"
Zhu Yanjun

>  u8 rxe_get_next_key(u32 last_key);
>  void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
> -
>  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
>                      int access, struct ib_udata *udata, struct rxe_mr *mr);
> -
>  int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
> -
>  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>                 enum copy_direction dir, u32 *crcp);
> -
>  int copy_data(struct rxe_pd *pd, int access,
>               struct rxe_dma_info *dma, void *addr, int length,
>               enum copy_direction dir, u32 *crcp);
> -
>  void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
> -
> -enum lookup_type {
> -       lookup_local,
> -       lookup_remote,
> -};
> -
>  struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
>                          enum lookup_type type);
> -
>  int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
> -
> -void rxe_mr_cleanup(struct rxe_pool_entry *arg);
> -
>  int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
> +int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
> +void rxe_mr_cleanup(struct rxe_pool_entry *arg);
>
>  /* rxe_mw.c */
>  int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
>  int rxe_dealloc_mw(struct ib_mw *ibmw);
>  int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
> +int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
>  void rxe_mw_cleanup(struct rxe_pool_entry *arg);
>
>  /* rxe_net.c */
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index 7f2cfc1ce659..0f1791ed0350 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -55,21 +55,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
>         mr->map_shift = ilog2(RXE_BUF_PER_MAP);
>  }
>
> -void rxe_mr_cleanup(struct rxe_pool_entry *arg)
> -{
> -       struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
> -       int i;
> -
> -       ib_umem_release(mr->umem);
> -
> -       if (mr->map) {
> -               for (i = 0; i < mr->num_map; i++)
> -                       kfree(mr->map[i]);
> -
> -               kfree(mr->map);
> -       }
> -}
> -
>  static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
>  {
>         int i;
> @@ -540,3 +525,47 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
>
>         return mr;
>  }
> +
> +int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
> +{
> +       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
> +       struct rxe_mr *mr;
> +       int ret;
> +
> +       mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
> +       if (!mr) {
> +               pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
> +               ret = -EINVAL;
> +               goto err;
> +       }
> +
> +       if (rkey != mr->ibmr.rkey) {
> +               pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
> +                       __func__, rkey, mr->ibmr.rkey);
> +               ret = -EINVAL;
> +               goto err_drop_ref;
> +       }
> +
> +       mr->state = RXE_MR_STATE_FREE;
> +       ret = 0;
> +
> +err_drop_ref:
> +       rxe_drop_ref(mr);
> +err:
> +       return ret;
> +}
> +
> +void rxe_mr_cleanup(struct rxe_pool_entry *arg)
> +{
> +       struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
> +       int i;
> +
> +       ib_umem_release(mr->umem);
> +
> +       if (mr->map) {
> +               for (i = 0; i < mr->num_map; i++)
> +                       kfree(mr->map[i]);
> +
> +               kfree(mr->map);
> +       }
> +}
> diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
> index 6ced54126b72..4c1830b4a8bf 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mw.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mw.c
> @@ -247,6 +247,73 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
>         return ret;
>  }
>
> +static int check_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
> +{
> +       if (unlikely(mw->state == RXE_MW_STATE_INVALID))
> +               return -EINVAL;
> +
> +       /* o10-37.2.26 */
> +       if (unlikely(mw->ibmw.type == IB_MW_TYPE_1))
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +
> +static void do_invalidate_mw(struct rxe_mw *mw)
> +{
> +       struct rxe_qp *qp;
> +       struct rxe_mr *mr;
> +
> +       /* valid type 2 MW will always have a QP pointer */
> +       qp = mw->qp;
> +       mw->qp = NULL;
> +       rxe_drop_ref(qp);
> +
> +       /* valid type 2 MW will always have an MR pointer */
> +       mr = mw->mr;
> +       mw->mr = NULL;
> +       atomic_dec(&mr->num_mw);
> +       rxe_drop_ref(mr);
> +
> +       mw->access = 0;
> +       mw->addr = 0;
> +       mw->length = 0;
> +       mw->state = RXE_MW_STATE_FREE;
> +}
> +
> +int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
> +{
> +       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
> +       unsigned long flags;
> +       struct rxe_mw *mw;
> +       int ret;
> +
> +       mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
> +       if (!mw) {
> +               ret = -EINVAL;
> +               goto err;
> +       }
> +
> +       if (rkey != mw->ibmw.rkey) {
> +               ret = -EINVAL;
> +               goto err_drop_ref;
> +       }
> +
> +       spin_lock_irqsave(&mw->lock, flags);
> +
> +       ret = check_invalidate_mw(qp, mw);
> +       if (ret)
> +               goto err_unlock;
> +
> +       do_invalidate_mw(mw);
> +err_unlock:
> +       spin_unlock_irqrestore(&mw->lock, flags);
> +err_drop_ref:
> +       rxe_drop_ref(mw);
> +err:
> +       return ret;
> +}
> +
>  void rxe_mw_cleanup(struct rxe_pool_entry *elem)
>  {
>         struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
> diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
> index 243602584a28..66fc208d0ec1 100644
> --- a/drivers/infiniband/sw/rxe/rxe_req.c
> +++ b/drivers/infiniband/sw/rxe/rxe_req.c
> @@ -558,25 +558,25 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
>  static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
>  {
>         u8 opcode = wqe->wr.opcode;
> -       struct rxe_dev *rxe;
>         struct rxe_mr *mr;
> -       u32 rkey;
>         int ret;
> +       u32 rkey;
>
>         switch (opcode) {
>         case IB_WR_LOCAL_INV:
> -               rxe = to_rdev(qp->ibqp.device);
>                 rkey = wqe->wr.ex.invalidate_rkey;
> -               mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
> -               if (!mr) {
> -                       pr_err("No MR for rkey %#x\n", rkey);
> +               if (rkey_is_mw(rkey))
> +                       ret = rxe_invalidate_mw(qp, rkey);
> +               else
> +                       ret = rxe_invalidate_mr(qp, rkey);
> +
> +               if (ret) {
>                         wqe->state = wqe_state_error;
>                         wqe->status = IB_WC_LOC_QP_OP_ERR;
> -                       return -EINVAL;
> +                       return ret;
>                 }
> -               mr->state = RXE_MR_STATE_FREE;
> -               rxe_drop_ref(mr);
>                 break;
> +
>         case IB_WR_REG_MR:
>                 mr = to_rmr(wqe->wr.wr.reg.mr);
>
> @@ -588,14 +588,16 @@ static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
>                 mr->iova = wqe->wr.wr.reg.mr->iova;
>                 rxe_drop_ref(mr);
>                 break;
> +
>         case IB_WR_BIND_MW:
>                 ret = rxe_bind_mw(qp, wqe);
>                 if (ret) {
>                         wqe->state = wqe_state_error;
>                         wqe->status = IB_WC_MW_BIND_ERR;
> -                       return -EINVAL;
> +                       return ret;
>                 }
>                 break;
> +
>         default:
>                 pr_err("Unexpected send wqe opcode %d\n", opcode);
>                 wqe->state = wqe_state_error;
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> index 2b220659bddb..21adc9209107 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -35,6 +35,7 @@ enum resp_states {
>         RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
>         RESPST_ERR_RNR,
>         RESPST_ERR_RKEY_VIOLATION,
> +       RESPST_ERR_INVALIDATE_RKEY,
>         RESPST_ERR_LENGTH,
>         RESPST_ERR_CQ_OVERFLOW,
>         RESPST_ERROR,
> @@ -68,6 +69,7 @@ static char *resp_state_name[] = {
>         [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]      = "ERR_TOO_MANY_RDMA_ATM_REQ",
>         [RESPST_ERR_RNR]                        = "ERR_RNR",
>         [RESPST_ERR_RKEY_VIOLATION]             = "ERR_RKEY_VIOLATION",
> +       [RESPST_ERR_INVALIDATE_RKEY]            = "ERR_INVALIDATE_RKEY_VIOLATION",
>         [RESPST_ERR_LENGTH]                     = "ERR_LENGTH",
>         [RESPST_ERR_CQ_OVERFLOW]                = "ERR_CQ_OVERFLOW",
>         [RESPST_ERROR]                          = "ERROR",
> @@ -751,6 +753,14 @@ static void build_rdma_network_hdr(union rdma_network_hdr *hdr,
>                 memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh));
>  }
>
> +static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
> +{
> +       if (rkey_is_mw(rkey))
> +               return rxe_invalidate_mw(qp, rkey);
> +       else
> +               return rxe_invalidate_mr(qp, rkey);
> +}
> +
>  /* Executes a new request. A retried request never reach that function (send
>   * and writes are discarded, and reads and atomics are retried elsewhere.
>   */
> @@ -790,6 +800,14 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
>                 WARN_ON_ONCE(1);
>         }
>
> +       if (pkt->mask & RXE_IETH_MASK) {
> +               u32 rkey = ieth_rkey(pkt);
> +
> +               err = invalidate_rkey(qp, rkey);
> +               if (err)
> +                       return RESPST_ERR_INVALIDATE_RKEY;
> +       }
> +
>         /* next expected psn, read handles this separately */
>         qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
>         qp->resp.ack_psn = qp->resp.psn;
> @@ -822,13 +840,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
>         memset(&cqe, 0, sizeof(cqe));
>
>         if (qp->rcq->is_user) {
> -               uwc->status             = qp->resp.status;
> -               uwc->qp_num             = qp->ibqp.qp_num;
> -               uwc->wr_id              = wqe->wr_id;
> +               uwc->status             = qp->resp.status;
> +               uwc->qp_num             = qp->ibqp.qp_num;
> +               uwc->wr_id              = wqe->wr_id;
>         } else {
> -               wc->status              = qp->resp.status;
> -               wc->qp                  = &qp->ibqp;
> -               wc->wr_id               = wqe->wr_id;
> +               wc->status              = qp->resp.status;
> +               wc->qp                  = &qp->ibqp;
> +               wc->wr_id               = wqe->wr_id;
>         }
>
>         if (wc->status == IB_WC_SUCCESS) {
> @@ -883,27 +901,14 @@ static enum resp_states do_complete(struct rxe_qp *qp,
>                         }
>
>                         if (pkt->mask & RXE_IETH_MASK) {
> -                               struct rxe_mr *rmr;
> -
>                                 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
>                                 wc->ex.invalidate_rkey = ieth_rkey(pkt);
> -
> -                               rmr = rxe_pool_get_index(&rxe->mr_pool,
> -                                                        wc->ex.invalidate_rkey >> 8);
> -                               if (unlikely(!rmr)) {
> -                                       pr_err("Bad rkey %#x invalidation\n",
> -                                              wc->ex.invalidate_rkey);
> -                                       return RESPST_ERROR;
> -                               }
> -                               rmr->state = RXE_MR_STATE_FREE;
> -                               rxe_drop_ref(rmr);
>                         }
>
> -                       wc->qp                  = &qp->ibqp;
> -
>                         if (pkt->mask & RXE_DETH_MASK)
>                                 wc->src_qp = deth_sqp(pkt);
>
> +                       wc->qp                  = &qp->ibqp;
>                         wc->port_num            = qp->attr.port_num;
>                 }
>         }
> @@ -1314,6 +1319,13 @@ int rxe_responder(void *arg)
>                         }
>                         break;
>
> +               case RESPST_ERR_INVALIDATE_RKEY:
> +                       /* RC - Class J. */
> +                       qp->resp.goto_error = 1;
> +                       qp->resp.status = IB_WC_REM_INV_REQ_ERR;
> +                       state = RESPST_COMPLETE;
> +                       break;
> +
>                 case RESPST_ERR_LENGTH:
>                         if (qp_type(qp) == IB_QPT_RC) {
>                                 /* Class C */
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 7da47b8c707b..b286a14ec282 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -286,6 +286,13 @@ struct rxe_map {
>         struct rxe_phys_buf     buf[RXE_BUF_PER_MAP];
>  };
>
> +static inline int rkey_is_mw(u32 rkey)
> +{
> +       u32 index = rkey >> 8;
> +
> +       return (index >= RXE_MIN_MW_INDEX) && (index <= RXE_MAX_MW_INDEX);
> +}
> +
>  struct rxe_mr {
>         struct rxe_pool_entry   pelem;
>         struct ib_mr            ibmr;
> @@ -311,23 +318,23 @@ struct rxe_mr {
>         u32                     max_buf;
>         u32                     num_map;
>
> -       struct rxe_map          **map;
> -
>         atomic_t                num_mw;
> +
> +       struct rxe_map          **map;
>  };
>
>  enum rxe_mw_state {
> -       RXE_MW_STATE_INVALID = RXE_MR_STATE_INVALID,
> -       RXE_MW_STATE_FREE = RXE_MR_STATE_FREE,
> -       RXE_MW_STATE_VALID = RXE_MR_STATE_VALID,
> +       RXE_MW_STATE_INVALID    = RXE_MR_STATE_INVALID,
> +       RXE_MW_STATE_FREE       = RXE_MR_STATE_FREE,
> +       RXE_MW_STATE_VALID      = RXE_MR_STATE_VALID,
>  };
>
>  struct rxe_mw {
> -       struct                  ib_mw ibmw;
> -       struct                  rxe_pool_entry pelem;
> +       struct ib_mw            ibmw;
> +       struct rxe_pool_entry   pelem;
>         spinlock_t              lock;
>         enum rxe_mw_state       state;
> -       struct rxe_qp           *qp;    /* Type 2 only */
> +       struct rxe_qp           *qp; /* Type 2 only */
>         struct rxe_mr           *mr;
>         int                     access;
>         u64                     addr;
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs
  2021-04-20  6:34   ` Zhu Yanjun
@ 2021-04-20 12:04     ` Jason Gunthorpe
  2021-04-21  4:09       ` Bob Pearson
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-04-20 12:04 UTC (permalink / raw)
  To: Zhu Yanjun; +Cc: Bob Pearson, RDMA mailing list, Bob Pearson

On Tue, Apr 20, 2021 at 02:34:07PM +0800, Zhu Yanjun wrote:
> > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> > index b286a14ec282..9f35e2c042d0 100644
> > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> > @@ -183,6 +183,7 @@ struct rxe_resp_info {
> >
> >         /* RDMA read / atomic only */
> >         u64                     va;
> > +       u64                     offset;
> >         struct rxe_mr           *mr;
> >         u32                     resid;
> >         u32                     rkey;
> > @@ -470,6 +471,16 @@ static inline u32 mr_rkey(struct rxe_mr *mr)
> >         return mr->ibmr.rkey;
> >  }
> >
> > +static inline struct rxe_pd *mw_pd(struct rxe_mw *mw)
> 
> inline
> Can we remove inline keyword and let the compile to decide it?

Not in a header

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests
  2021-04-15  2:54 ` [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests Bob Pearson
@ 2021-04-20 14:32   ` Zhu Yanjun
  2021-04-21  4:24     ` Bob Pearson
  0 siblings, 1 reply; 17+ messages in thread
From: Zhu Yanjun @ 2021-04-20 14:32 UTC (permalink / raw)
  To: Bob Pearson; +Cc: Jason Gunthorpe, RDMA mailing list, Bob Pearson

On Thu, Apr 15, 2021 at 10:55 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> Add support for bind MW work requests from user space.
> Since rdma/core does not support bind mw in ib_send_wr
> there is no way to support bind mw in kernel space.
>
> Added bind_mw local operation in rxe_req.c
> Added bind_mw WR operation in rxe_opcode.c
> Added bind_mw WC in rxe_comp.c
> Added additional fields to rxe_mw in rxe_verbs.h
> Added do_dealloc_mw() subroutine to cleanup an mw
> when rxe_dealloc_mw is called.
> Added code to implement bind_mw operation in rxe_mw.c
>
> Signed-off-by: Bob Pearson <rpearson@hpe.com>
> ---
> v2:
>   Dropped kernel support for bind_mw in rxe_mw.c
>   Replaced umw with mw in rxe_send_wr.
>
>  drivers/infiniband/sw/rxe/rxe_comp.c   |   1 +
>  drivers/infiniband/sw/rxe/rxe_loc.h    |   1 +
>  drivers/infiniband/sw/rxe/rxe_mw.c     | 204 ++++++++++++++++++++++++-
>  drivers/infiniband/sw/rxe/rxe_opcode.c |   7 +
>  drivers/infiniband/sw/rxe/rxe_req.c    |   9 ++
>  drivers/infiniband/sw/rxe/rxe_verbs.h  |  15 +-
>  6 files changed, 232 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
> index 2af26737d32d..bc5488af5f55 100644
> --- a/drivers/infiniband/sw/rxe/rxe_comp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_comp.c
> @@ -103,6 +103,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
>         case IB_WR_RDMA_READ_WITH_INV:          return IB_WC_RDMA_READ;
>         case IB_WR_LOCAL_INV:                   return IB_WC_LOCAL_INV;
>         case IB_WR_REG_MR:                      return IB_WC_REG_MR;
> +       case IB_WR_BIND_MW:                     return IB_WC_BIND_MW;
>
>         default:
>                 return 0xff;
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index edf575930a98..e6f574973298 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -110,6 +110,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
>  /* rxe_mw.c */
>  int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
>  int rxe_dealloc_mw(struct ib_mw *ibmw);
> +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
>  void rxe_mw_cleanup(struct rxe_pool_entry *arg);
>
>  /* rxe_net.c */
> diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
> index 69128e298d44..6ced54126b72 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mw.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mw.c
> @@ -29,6 +29,29 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
>         return 0;
>  }
>
> +static void do_dealloc_mw(struct rxe_mw *mw)
> +{
> +       if (mw->mr) {
> +               struct rxe_mr *mr = mw->mr;
> +
> +               mw->mr = NULL;
> +               atomic_dec(&mr->num_mw);
> +               rxe_drop_ref(mr);
> +       }
> +
> +       if (mw->qp) {
> +               struct rxe_qp *qp = mw->qp;
> +
> +               mw->qp = NULL;
> +               rxe_drop_ref(qp);
> +       }
> +
> +       mw->access = 0;
> +       mw->addr = 0;
> +       mw->length = 0;
> +       mw->state = RXE_MW_STATE_INVALID;
> +}
> +
>  int rxe_dealloc_mw(struct ib_mw *ibmw)
>  {
>         struct rxe_mw *mw = to_rmw(ibmw);
> @@ -36,7 +59,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
>         unsigned long flags;
>
>         spin_lock_irqsave(&mw->lock, flags);
> -       mw->state = RXE_MW_STATE_INVALID;
> +       do_dealloc_mw(mw);
>         spin_unlock_irqrestore(&mw->lock, flags);
>
>         rxe_drop_ref(mw);
> @@ -45,6 +68,185 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
>         return 0;
>  }
>
> +static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
> +                        struct rxe_mw *mw, struct rxe_mr *mr)
> +{
> +       if (mw->ibmw.type == IB_MW_TYPE_1) {
> +               if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
> +                       pr_err_once(
> +                               "attempt to bind a type 1 MW not in the valid state\n");
> +                       return -EINVAL;
> +               }
> +
> +               /* o10-36.2.2 */
> +               if (unlikely((mw->access & IB_ZERO_BASED))) {
> +                       pr_err_once("attempt to bind a zero based type 1 MW\n");
> +                       return -EINVAL;
> +               }
> +       }
> +
> +       if (mw->ibmw.type == IB_MW_TYPE_2) {
> +               /* o10-37.2.30 */
> +               if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
> +                       pr_err_once(
> +                               "attempt to bind a type 2 MW not in the free state\n");
> +                       return -EINVAL;
> +               }
> +
> +               /* C10-72 */
> +               if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
> +                       pr_err_once(
> +                               "attempt to bind type 2 MW with qp with different PD\n");
> +                       return -EINVAL;
> +               }
> +
> +               /* o10-37.2.40 */
> +               if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
> +                       pr_err_once(
> +                               "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
> +                       return -EINVAL;
> +               }
> +       }
> +
> +       if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
> +               pr_err_once("attempt to bind MW with same key\n");
> +               return -EINVAL;
> +       }
> +
> +       /* remaining checks only apply to a nonzero MR */
> +       if (!mr)
> +               return 0;
> +
> +       if (unlikely(mr->access & IB_ZERO_BASED)) {
> +               pr_err_once("attempt to bind MW to zero based MR\n");
> +               return -EINVAL;
> +       }
> +
> +       /* C10-73 */
> +       if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
> +               pr_err_once(
> +                       "attempt to bind an MW to an MR without bind access\n");
> +               return -EINVAL;
> +       }
> +
> +       /* C10-74 */
> +       if (unlikely((mw->access &
> +                     (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
> +                    !(mr->access & IB_ACCESS_LOCAL_WRITE))) {
> +               pr_err_once(
> +                       "attempt to bind an writeable MW to an MR without local write access\n");
> +               return -EINVAL;
> +       }
> +
> +       /* C10-75 */
> +       if (mw->access & IB_ZERO_BASED) {
> +               if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
> +                       pr_err_once(
> +                               "attempt to bind a ZB MW outside of the MR\n");
> +                       return -EINVAL;
> +               }
> +       } else {
> +               if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
> +                            ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
> +                             (mr->iova + mr->length)))) {
> +                       pr_err_once(
> +                               "attempt to bind a VA MW outside of the MR\n");
> +                       return -EINVAL;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
> +                     struct rxe_mw *mw, struct rxe_mr *mr)
> +{
> +       u32 rkey;
> +       u32 new_rkey;
> +
> +       rkey = mw->ibmw.rkey;
> +       new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
> +
> +       mw->ibmw.rkey = new_rkey;
> +       mw->access = wqe->wr.wr.mw.access;
> +       mw->state = RXE_MW_STATE_VALID;
> +       mw->addr = wqe->wr.wr.mw.addr;
> +       mw->length = wqe->wr.wr.mw.length;
> +
> +       if (mw->mr) {
> +               rxe_drop_ref(mw->mr);
> +               atomic_dec(&mw->mr->num_mw);
> +               mw->mr = NULL;
> +       }
> +
> +       if (mw->length) {
> +               mw->mr = mr;
> +               atomic_inc(&mr->num_mw);
> +               rxe_add_ref(mr);
> +       }
> +
> +       if (mw->ibmw.type == IB_MW_TYPE_2) {
> +               rxe_add_ref(qp);
> +               mw->qp = qp;
> +       }
> +
> +       return 0;

Is it necessary to return 0?

no others invalid value.

Zhu Yanjun
> +}
> +
> +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
> +{
> +       int ret;
> +       struct rxe_mw *mw;
> +       struct rxe_mr *mr;
> +       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
> +       unsigned long flags;
> +
> +       mw = rxe_pool_get_index(&rxe->mw_pool,
> +                               wqe->wr.wr.mw.mw_rkey >> 8);
> +       if (unlikely(!mw)) {
> +               ret = -EINVAL;
> +               goto err;
> +       }
> +
> +       if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) {
> +               ret = -EINVAL;
> +               goto err_drop_mw;
> +       }
> +
> +       if (likely(wqe->wr.wr.mw.length)) {
> +               mr = rxe_pool_get_index(&rxe->mr_pool,
> +                                       wqe->wr.wr.mw.mr_lkey >> 8);
> +               if (unlikely(!mr)) {
> +                       ret = -EINVAL;
> +                       goto err_drop_mw;
> +               }
> +
> +               if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) {
> +                       ret = -EINVAL;
> +                       goto err_drop_mr;
> +               }
> +       } else {
> +               mr = NULL;
> +       }
> +
> +       spin_lock_irqsave(&mw->lock, flags);
> +
> +       ret = check_bind_mw(qp, wqe, mw, mr);
> +       if (ret)
> +               goto err_unlock;
> +
> +       ret = do_bind_mw(qp, wqe, mw, mr);
> +err_unlock:
> +       spin_unlock_irqrestore(&mw->lock, flags);
> +err_drop_mr:
> +       if (mr)
> +               rxe_drop_ref(mr);
> +err_drop_mw:
> +       rxe_drop_ref(mw);
> +err:
> +       return ret;
> +}
> +
>  void rxe_mw_cleanup(struct rxe_pool_entry *elem)
>  {
>         struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
> diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
> index 1e4b67b048f3..3ef5a10a6efd 100644
> --- a/drivers/infiniband/sw/rxe/rxe_opcode.c
> +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
> @@ -96,6 +96,13 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
>                         [IB_QPT_RC]     = WR_LOCAL_OP_MASK,
>                 },
>         },
> +       [IB_WR_BIND_MW]                                 = {
> +               .name   = "IB_WR_BIND_MW",
> +               .mask   = {
> +                       [IB_QPT_RC]     = WR_LOCAL_OP_MASK,
> +                       [IB_QPT_UC]     = WR_LOCAL_OP_MASK,
> +               },
> +       },
>  };
>
>  struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
> diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
> index 0cf97e3db29f..243602584a28 100644
> --- a/drivers/infiniband/sw/rxe/rxe_req.c
> +++ b/drivers/infiniband/sw/rxe/rxe_req.c
> @@ -561,6 +561,7 @@ static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
>         struct rxe_dev *rxe;
>         struct rxe_mr *mr;
>         u32 rkey;
> +       int ret;
>
>         switch (opcode) {
>         case IB_WR_LOCAL_INV:
> @@ -587,6 +588,14 @@ static int do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
>                 mr->iova = wqe->wr.wr.reg.mr->iova;
>                 rxe_drop_ref(mr);
>                 break;
> +       case IB_WR_BIND_MW:
> +               ret = rxe_bind_mw(qp, wqe);
> +               if (ret) {
> +                       wqe->state = wqe_state_error;
> +                       wqe->status = IB_WC_MW_BIND_ERR;
> +                       return -EINVAL;
> +               }
> +               break;
>         default:
>                 pr_err("Unexpected send wqe opcode %d\n", opcode);
>                 wqe->state = wqe_state_error;
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index c8597ae8c833..7da47b8c707b 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -312,6 +312,8 @@ struct rxe_mr {
>         u32                     num_map;
>
>         struct rxe_map          **map;
> +
> +       atomic_t                num_mw;
>  };
>
>  enum rxe_mw_state {
> @@ -321,10 +323,15 @@ enum rxe_mw_state {
>  };
>
>  struct rxe_mw {
> -       struct ib_mw ibmw;
> -       struct rxe_pool_entry pelem;
> -       spinlock_t lock;
> -       enum rxe_mw_state state;
> +       struct                  ib_mw ibmw;
> +       struct                  rxe_pool_entry pelem;
> +       spinlock_t              lock;
> +       enum rxe_mw_state       state;
> +       struct rxe_qp           *qp;    /* Type 2 only */
> +       struct rxe_mr           *mr;
> +       int                     access;
> +       u64                     addr;
> +       u64                     length;
>  };
>
>  struct rxe_mc_grp {
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs
  2021-04-20 12:04     ` Jason Gunthorpe
@ 2021-04-21  4:09       ` Bob Pearson
  0 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-21  4:09 UTC (permalink / raw)
  To: Jason Gunthorpe, Zhu Yanjun; +Cc: RDMA mailing list, Bob Pearson

On 4/20/21 7:04 AM, Jason Gunthorpe wrote:
> On Tue, Apr 20, 2021 at 02:34:07PM +0800, Zhu Yanjun wrote:
>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>> index b286a14ec282..9f35e2c042d0 100644
>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>> @@ -183,6 +183,7 @@ struct rxe_resp_info {
>>>
>>>         /* RDMA read / atomic only */
>>>         u64                     va;
>>> +       u64                     offset;
>>>         struct rxe_mr           *mr;
>>>         u32                     resid;
>>>         u32                     rkey;
>>> @@ -470,6 +471,16 @@ static inline u32 mr_rkey(struct rxe_mr *mr)
>>>         return mr->ibmr.rkey;
>>>  }
>>>
>>> +static inline struct rxe_pd *mw_pd(struct rxe_mw *mw)
>>
>> inline
>> Can we remove inline keyword and let the compile to decide it?
> 
> Not in a header
> 
> Jason
> 
I think this is the cleanest way to add these trivial functions. They don't really generate any code
just a different offset from the pointer.

Bob

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations
  2021-04-20  6:38   ` Zhu Yanjun
@ 2021-04-21  4:22     ` Bob Pearson
  0 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-21  4:22 UTC (permalink / raw)
  To: Zhu Yanjun; +Cc: Jason Gunthorpe, RDMA mailing list, Bob Pearson

On 4/20/21 1:38 AM, Zhu Yanjun wrote:
> On Thu, Apr 15, 2021 at 10:55 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> Implement invalidate MW and cleaned up invalidate MR operations.
>>
>> Added code to perform remote invalidate for send with invalidate.
>> Added code to perform local invalidation.
>> Deleted some blank lines in rxe_loc.h.
>>
>> Signed-off-by: Bob Pearson <rpearson@hpe.com>
>> ---
>>  drivers/infiniband/sw/rxe/rxe_loc.h   | 23 ++++-----
>>  drivers/infiniband/sw/rxe/rxe_mr.c    | 59 +++++++++++++++++------
>>  drivers/infiniband/sw/rxe/rxe_mw.c    | 67 +++++++++++++++++++++++++++
>>  drivers/infiniband/sw/rxe/rxe_req.c   | 22 +++++----
>>  drivers/infiniband/sw/rxe/rxe_resp.c  | 52 +++++++++++++--------
>>  drivers/infiniband/sw/rxe/rxe_verbs.h | 23 +++++----
>>  6 files changed, 178 insertions(+), 68 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
>> index e6f574973298..7f1117c51e30 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
>> @@ -76,41 +76,34 @@ enum copy_direction {
>>         from_mr_obj,
>>  };
>>
>> +enum lookup_type {
>> +       lookup_local,
>> +       lookup_remote,
>> +};
>> +
> 
> https://www.kernel.org/doc/Documentation/process/coding-style.rst
> "
> 
> 12) Macros, Enums and RTL
> -------------------------
> 
> Names of macros defining constants and labels in enums are capitalized.
> 
> "
> Zhu Yanjun
> 
Agreed. It's original code. I just moved it ahead of the prototypes. I've been wanting to fix this
so this is the perfect excuse.

Bob

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests
  2021-04-20 14:32   ` Zhu Yanjun
@ 2021-04-21  4:24     ` Bob Pearson
  0 siblings, 0 replies; 17+ messages in thread
From: Bob Pearson @ 2021-04-21  4:24 UTC (permalink / raw)
  To: Zhu Yanjun; +Cc: Jason Gunthorpe, RDMA mailing list, Bob Pearson

On 4/20/21 9:32 AM, Zhu Yanjun wrote:
> On Thu, Apr 15, 2021 at 10:55 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> Add support for bind MW work requests from user space.
>> Since rdma/core does not support bind mw in ib_send_wr
>> there is no way to support bind mw in kernel space.
>>
>> Added bind_mw local operation in rxe_req.c
>> Added bind_mw WR operation in rxe_opcode.c
>> Added bind_mw WC in rxe_comp.c
>> Added additional fields to rxe_mw in rxe_verbs.h
>> Added do_dealloc_mw() subroutine to cleanup an mw
>> when rxe_dealloc_mw is called.
>> Added code to implement bind_mw operation in rxe_mw.c
>>
>> Signed-off-by: Bob Pearson <rpearson@hpe.com>
>> ---
>> v2:
>>   Dropped kernel support for bind_mw in rxe_mw.c
>>   Replaced umw with mw in rxe_send_wr.
>>
>>  drivers/infiniband/sw/rxe/rxe_comp.c   |   1 +
>>  drivers/infiniband/sw/rxe/rxe_loc.h    |   1 +
>>  drivers/infiniband/sw/rxe/rxe_mw.c     | 204 ++++++++++++++++++++++++-
>>  drivers/infiniband/sw/rxe/rxe_opcode.c |   7 +
>>  drivers/infiniband/sw/rxe/rxe_req.c    |   9 ++
>>  drivers/infiniband/sw/rxe/rxe_verbs.h  |  15 +-
>>  6 files changed, 232 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
>> index 2af26737d32d..bc5488af5f55 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_comp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_comp.c
>> @@ -103,6 +103,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
>>         case IB_WR_RDMA_READ_WITH_INV:          return IB_WC_RDMA_READ;
>>         case IB_WR_LOCAL_INV:                   return IB_WC_LOCAL_INV;
>>         case IB_WR_REG_MR:                      return IB_WC_REG_MR;
>> +       case IB_WR_BIND_MW:                     return IB_WC_BIND_MW;
>>
>>         default:
>>                 return 0xff;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
>> index edf575930a98..e6f574973298 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
>> @@ -110,6 +110,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
>>  /* rxe_mw.c */
>>  int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
>>  int rxe_dealloc_mw(struct ib_mw *ibmw);
>> +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
>>  void rxe_mw_cleanup(struct rxe_pool_entry *arg);
>>
>>  /* rxe_net.c */
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
>> index 69128e298d44..6ced54126b72 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mw.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mw.c
>> @@ -29,6 +29,29 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
>>         return 0;
>>  }
>>
>> +static void do_dealloc_mw(struct rxe_mw *mw)
>> +{
>> +       if (mw->mr) {
>> +               struct rxe_mr *mr = mw->mr;
>> +
>> +               mw->mr = NULL;
>> +               atomic_dec(&mr->num_mw);
>> +               rxe_drop_ref(mr);
>> +       }
>> +
>> +       if (mw->qp) {
>> +               struct rxe_qp *qp = mw->qp;
>> +
>> +               mw->qp = NULL;
>> +               rxe_drop_ref(qp);
>> +       }
>> +
>> +       mw->access = 0;
>> +       mw->addr = 0;
>> +       mw->length = 0;
>> +       mw->state = RXE_MW_STATE_INVALID;
>> +}
>> +
>>  int rxe_dealloc_mw(struct ib_mw *ibmw)
>>  {
>>         struct rxe_mw *mw = to_rmw(ibmw);
>> @@ -36,7 +59,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
>>         unsigned long flags;
>>
>>         spin_lock_irqsave(&mw->lock, flags);
>> -       mw->state = RXE_MW_STATE_INVALID;
>> +       do_dealloc_mw(mw);
>>         spin_unlock_irqrestore(&mw->lock, flags);
>>
>>         rxe_drop_ref(mw);
>> @@ -45,6 +68,185 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
>>         return 0;
>>  }
>>
>> +static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
>> +                        struct rxe_mw *mw, struct rxe_mr *mr)
>> +{
>> +       if (mw->ibmw.type == IB_MW_TYPE_1) {
>> +               if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
>> +                       pr_err_once(
>> +                               "attempt to bind a type 1 MW not in the valid state\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               /* o10-36.2.2 */
>> +               if (unlikely((mw->access & IB_ZERO_BASED))) {
>> +                       pr_err_once("attempt to bind a zero based type 1 MW\n");
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       if (mw->ibmw.type == IB_MW_TYPE_2) {
>> +               /* o10-37.2.30 */
>> +               if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
>> +                       pr_err_once(
>> +                               "attempt to bind a type 2 MW not in the free state\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               /* C10-72 */
>> +               if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
>> +                       pr_err_once(
>> +                               "attempt to bind type 2 MW with qp with different PD\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               /* o10-37.2.40 */
>> +               if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
>> +                       pr_err_once(
>> +                               "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
>> +               pr_err_once("attempt to bind MW with same key\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* remaining checks only apply to a nonzero MR */
>> +       if (!mr)
>> +               return 0;
>> +
>> +       if (unlikely(mr->access & IB_ZERO_BASED)) {
>> +               pr_err_once("attempt to bind MW to zero based MR\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* C10-73 */
>> +       if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
>> +               pr_err_once(
>> +                       "attempt to bind an MW to an MR without bind access\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* C10-74 */
>> +       if (unlikely((mw->access &
>> +                     (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
>> +                    !(mr->access & IB_ACCESS_LOCAL_WRITE))) {
>> +               pr_err_once(
>> +                       "attempt to bind an writeable MW to an MR without local write access\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* C10-75 */
>> +       if (mw->access & IB_ZERO_BASED) {
>> +               if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
>> +                       pr_err_once(
>> +                               "attempt to bind a ZB MW outside of the MR\n");
>> +                       return -EINVAL;
>> +               }
>> +       } else {
>> +               if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
>> +                            ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
>> +                             (mr->iova + mr->length)))) {
>> +                       pr_err_once(
>> +                               "attempt to bind a VA MW outside of the MR\n");
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
>> +                     struct rxe_mw *mw, struct rxe_mr *mr)
>> +{
>> +       u32 rkey;
>> +       u32 new_rkey;
>> +
>> +       rkey = mw->ibmw.rkey;
>> +       new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
>> +
>> +       mw->ibmw.rkey = new_rkey;
>> +       mw->access = wqe->wr.wr.mw.access;
>> +       mw->state = RXE_MW_STATE_VALID;
>> +       mw->addr = wqe->wr.wr.mw.addr;
>> +       mw->length = wqe->wr.wr.mw.length;
>> +
>> +       if (mw->mr) {
>> +               rxe_drop_ref(mw->mr);
>> +               atomic_dec(&mw->mr->num_mw);
>> +               mw->mr = NULL;
>> +       }
>> +
>> +       if (mw->length) {
>> +               mw->mr = mr;
>> +               atomic_inc(&mr->num_mw);
>> +               rxe_add_ref(mr);
>> +       }
>> +
>> +       if (mw->ibmw.type == IB_MW_TYPE_2) {
>> +               rxe_add_ref(qp);
>> +               mw->qp = qp;
>> +       }
>> +
>> +       return 0;
> 
> Is it necessary to return 0?
> 
> no others invalid value.
> 
> Zhu Yanjun

Good catch. It was left over from the past. I'll get rid of it.

Bob

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, back to index

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 1/9] RDMA/rxe: Add bind MW fields to rxe_send_wr Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 2/9] RDMA/rxe: Return errors for add index and key Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 3/9] RDMA/rxe: Enable MW object pool Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 4/9] RDMA/rxe: Add ib_alloc_mw and ib_dealloc_mw verbs Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 5/9] RDMA/rxe: Replace WR_REG_MASK by WR_LOCAL_OP_MASK Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 6/9] RDMA/rxe: Move local ops to subroutine Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests Bob Pearson
2021-04-20 14:32   ` Zhu Yanjun
2021-04-21  4:24     ` Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations Bob Pearson
2021-04-20  6:38   ` Zhu Yanjun
2021-04-21  4:22     ` Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs Bob Pearson
2021-04-20  6:34   ` Zhu Yanjun
2021-04-20 12:04     ` Jason Gunthorpe
2021-04-21  4:09       ` Bob Pearson

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git