All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions
@ 2020-12-16 23:15 Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 1/7] RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag Bob Pearson
                   ` (7 more replies)
  0 siblings, 8 replies; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

This patch series makes various cleanups and extensions to the
object pool core in RDMA/rxe. They are mostly extracted from an
earlier patch set that implemented memory windows and extended
verbs APIs but are separated out since they stand on their own.

Bob Pearson (7):
  RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag
  RDMA/rxe: Let pools support both keys and indices
  RDMA/rxe: Add elem_offset field to rxe_type_info
  RDMA/rxe: Make pool lookup and alloc APIs type safe
  RDMA/rxe: Make add/drop key/index APIs type safe
  RDMA/rxe: Add unlocked versions of pool APIs
  RDMA/rxe: Fix race in rxe_mcast.c

 drivers/infiniband/sw/rxe/rxe_mcast.c |  64 +++++---
 drivers/infiniband/sw/rxe/rxe_pool.c  | 226 +++++++++++++++++---------
 drivers/infiniband/sw/rxe/rxe_pool.h  |  94 ++++++++---
 drivers/infiniband/sw/rxe/rxe_verbs.c |  16 +-
 4 files changed, 268 insertions(+), 132 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH for-next 1/7] RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 2/7] RDMA/rxe: Let pools support both keys and indices Bob Pearson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Remove RXE_POOL_ATOMIC flag from rxe_type_info for AH objects.
These objects are now allocated by rdma/core so there is no
further reason for this flag.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b374eb53e2fe..11571ff64a8f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -25,7 +25,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_AH] = {
 		.name		= "rxe-ah",
 		.size		= sizeof(struct rxe_ah),
-		.flags		= RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC,
+		.flags		= RXE_POOL_NO_ALLOC,
 	},
 	[RXE_TYPE_SRQ] = {
 		.name		= "rxe-srq",
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH for-next 2/7] RDMA/rxe: Let pools support both keys and indices
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 1/7] RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 3/7] RDMA/rxe: Add elem_offset field to rxe_type_info Bob Pearson
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Allow both indices and keys to exist for objects in pools.
Previously you were limited to one or the other.

This is required for later implementing rxe memory windows.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 73 ++++++++++++++--------------
 drivers/infiniband/sw/rxe/rxe_pool.h | 32 +++++++-----
 2 files changed, 58 insertions(+), 47 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 11571ff64a8f..f1ecf5983742 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -94,18 +94,18 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
 		goto out;
 	}
 
-	pool->max_index = max;
-	pool->min_index = min;
+	pool->index.max_index = max;
+	pool->index.min_index = min;
 
 	size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
-	pool->table = kmalloc(size, GFP_KERNEL);
-	if (!pool->table) {
+	pool->index.table = kmalloc(size, GFP_KERNEL);
+	if (!pool->index.table) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	pool->table_size = size;
-	bitmap_zero(pool->table, max - min + 1);
+	pool->index.table_size = size;
+	bitmap_zero(pool->index.table, max - min + 1);
 
 out:
 	return err;
@@ -127,7 +127,8 @@ int rxe_pool_init(
 	pool->max_elem		= max_elem;
 	pool->elem_size		= ALIGN(size, RXE_POOL_ALIGN);
 	pool->flags		= rxe_type_info[type].flags;
-	pool->tree		= RB_ROOT;
+	pool->index.tree	= RB_ROOT;
+	pool->key.tree		= RB_ROOT;
 	pool->cleanup		= rxe_type_info[type].cleanup;
 
 	atomic_set(&pool->num_elem, 0);
@@ -145,8 +146,8 @@ int rxe_pool_init(
 	}
 
 	if (rxe_type_info[type].flags & RXE_POOL_KEY) {
-		pool->key_offset = rxe_type_info[type].key_offset;
-		pool->key_size = rxe_type_info[type].key_size;
+		pool->key.key_offset = rxe_type_info[type].key_offset;
+		pool->key.key_size = rxe_type_info[type].key_size;
 	}
 
 	pool->state = RXE_POOL_STATE_VALID;
@@ -160,7 +161,7 @@ static void rxe_pool_release(struct kref *kref)
 	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
 
 	pool->state = RXE_POOL_STATE_INVALID;
-	kfree(pool->table);
+	kfree(pool->index.table);
 }
 
 static void rxe_pool_put(struct rxe_pool *pool)
@@ -185,27 +186,27 @@ void rxe_pool_cleanup(struct rxe_pool *pool)
 static u32 alloc_index(struct rxe_pool *pool)
 {
 	u32 index;
-	u32 range = pool->max_index - pool->min_index + 1;
+	u32 range = pool->index.max_index - pool->index.min_index + 1;
 
-	index = find_next_zero_bit(pool->table, range, pool->last);
+	index = find_next_zero_bit(pool->index.table, range, pool->index.last);
 	if (index >= range)
-		index = find_first_zero_bit(pool->table, range);
+		index = find_first_zero_bit(pool->index.table, range);
 
 	WARN_ON_ONCE(index >= range);
-	set_bit(index, pool->table);
-	pool->last = index;
-	return index + pool->min_index;
+	set_bit(index, pool->index.table);
+	pool->index.last = index;
+	return index + pool->index.min_index;
 }
 
 static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
 {
-	struct rb_node **link = &pool->tree.rb_node;
+	struct rb_node **link = &pool->index.tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct rxe_pool_entry *elem;
 
 	while (*link) {
 		parent = *link;
-		elem = rb_entry(parent, struct rxe_pool_entry, node);
+		elem = rb_entry(parent, struct rxe_pool_entry, index_node);
 
 		if (elem->index == new->index) {
 			pr_warn("element already exists!\n");
@@ -218,25 +219,25 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
 			link = &(*link)->rb_right;
 	}
 
-	rb_link_node(&new->node, parent, link);
-	rb_insert_color(&new->node, &pool->tree);
+	rb_link_node(&new->index_node, parent, link);
+	rb_insert_color(&new->index_node, &pool->index.tree);
 out:
 	return;
 }
 
 static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 {
-	struct rb_node **link = &pool->tree.rb_node;
+	struct rb_node **link = &pool->key.tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct rxe_pool_entry *elem;
 	int cmp;
 
 	while (*link) {
 		parent = *link;
-		elem = rb_entry(parent, struct rxe_pool_entry, node);
+		elem = rb_entry(parent, struct rxe_pool_entry, key_node);
 
-		cmp = memcmp((u8 *)elem + pool->key_offset,
-			     (u8 *)new + pool->key_offset, pool->key_size);
+		cmp = memcmp((u8 *)elem + pool->key.key_offset,
+			     (u8 *)new + pool->key.key_offset, pool->key.key_size);
 
 		if (cmp == 0) {
 			pr_warn("key already exists!\n");
@@ -249,8 +250,8 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 			link = &(*link)->rb_right;
 	}
 
-	rb_link_node(&new->node, parent, link);
-	rb_insert_color(&new->node, &pool->tree);
+	rb_link_node(&new->key_node, parent, link);
+	rb_insert_color(&new->key_node, &pool->key.tree);
 out:
 	return;
 }
@@ -262,7 +263,7 @@ void rxe_add_key(void *arg, void *key)
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
+	memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
 	insert_key(pool, elem);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
@@ -274,7 +275,7 @@ void rxe_drop_key(void *arg)
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	rb_erase(&elem->node, &pool->tree);
+	rb_erase(&elem->key_node, &pool->key.tree);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
@@ -297,8 +298,8 @@ void rxe_drop_index(void *arg)
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	clear_bit(elem->index - pool->min_index, pool->table);
-	rb_erase(&elem->node, &pool->tree);
+	clear_bit(elem->index - pool->index.min_index, pool->index.table);
+	rb_erase(&elem->index_node, &pool->index.tree);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
@@ -402,10 +403,10 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 	if (pool->state != RXE_POOL_STATE_VALID)
 		goto out;
 
-	node = pool->tree.rb_node;
+	node = pool->index.tree.rb_node;
 
 	while (node) {
-		elem = rb_entry(node, struct rxe_pool_entry, node);
+		elem = rb_entry(node, struct rxe_pool_entry, index_node);
 
 		if (elem->index > index)
 			node = node->rb_left;
@@ -434,13 +435,13 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 	if (pool->state != RXE_POOL_STATE_VALID)
 		goto out;
 
-	node = pool->tree.rb_node;
+	node = pool->key.tree.rb_node;
 
 	while (node) {
-		elem = rb_entry(node, struct rxe_pool_entry, node);
+		elem = rb_entry(node, struct rxe_pool_entry, key_node);
 
-		cmp = memcmp((u8 *)elem + pool->key_offset,
-			     key, pool->key_size);
+		cmp = memcmp((u8 *)elem + pool->key.key_offset,
+			     key, pool->key.key_size);
 
 		if (cmp > 0)
 			node = node->rb_left;
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 432745ffc8d4..3d722aae5f15 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -56,8 +56,11 @@ struct rxe_pool_entry {
 	struct kref		ref_cnt;
 	struct list_head	list;
 
-	/* only used if indexed or keyed */
-	struct rb_node		node;
+	/* only used if keyed */
+	struct rb_node		key_node;
+
+	/* only used if indexed */
+	struct rb_node		index_node;
 	u32			index;
 };
 
@@ -74,15 +77,22 @@ struct rxe_pool {
 	unsigned int		max_elem;
 	atomic_t		num_elem;
 
-	/* only used if indexed or keyed */
-	struct rb_root		tree;
-	unsigned long		*table;
-	size_t			table_size;
-	u32			max_index;
-	u32			min_index;
-	u32			last;
-	size_t			key_offset;
-	size_t			key_size;
+	/* only used if indexed */
+	struct {
+		struct rb_root		tree;
+		unsigned long		*table;
+		size_t			table_size;
+		u32			last;
+		u32			max_index;
+		u32			min_index;
+	} index;
+
+	/* only used if keyed */
+	struct {
+		struct rb_root		tree;
+		size_t			key_offset;
+		size_t			key_size;
+	} key;
 };
 
 /* initialize a pool of objects with given limit on
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH for-next 3/7] RDMA/rxe: Add elem_offset field to rxe_type_info
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 1/7] RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 2/7] RDMA/rxe: Let pools support both keys and indices Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 4/7] RDMA/rxe: Make pool lookup and alloc APIs type safe Bob Pearson
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

The rxe verbs objects each include an rdma-core object 'ib_xxx'
and a rxe_pool_entry 'pelem' in addition to rxe specific data.
Originally these all had pelem first and ib_xxx second. Currently
about half have ib_xxx first and half have pelem first. Saving
the offset of the pelem field in rxe_type info will enable making
the rxe_pool APIs type safe as the pelem field continues to vary.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 10 ++++++++++
 drivers/infiniband/sw/rxe/rxe_pool.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index f1ecf5983742..4d667b78af9b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -15,21 +15,25 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_UC] = {
 		.name		= "rxe-uc",
 		.size		= sizeof(struct rxe_ucontext),
+		.elem_offset	= offsetof(struct rxe_ucontext, pelem),
 		.flags          = RXE_POOL_NO_ALLOC,
 	},
 	[RXE_TYPE_PD] = {
 		.name		= "rxe-pd",
 		.size		= sizeof(struct rxe_pd),
+		.elem_offset	= offsetof(struct rxe_pd, pelem),
 		.flags		= RXE_POOL_NO_ALLOC,
 	},
 	[RXE_TYPE_AH] = {
 		.name		= "rxe-ah",
 		.size		= sizeof(struct rxe_ah),
+		.elem_offset	= offsetof(struct rxe_ah, pelem),
 		.flags		= RXE_POOL_NO_ALLOC,
 	},
 	[RXE_TYPE_SRQ] = {
 		.name		= "rxe-srq",
 		.size		= sizeof(struct rxe_srq),
+		.elem_offset	= offsetof(struct rxe_srq, pelem),
 		.flags		= RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
 		.min_index	= RXE_MIN_SRQ_INDEX,
 		.max_index	= RXE_MAX_SRQ_INDEX,
@@ -37,6 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_QP] = {
 		.name		= "rxe-qp",
 		.size		= sizeof(struct rxe_qp),
+		.elem_offset	= offsetof(struct rxe_qp, pelem),
 		.cleanup	= rxe_qp_cleanup,
 		.flags		= RXE_POOL_INDEX,
 		.min_index	= RXE_MIN_QP_INDEX,
@@ -45,12 +50,14 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_CQ] = {
 		.name		= "rxe-cq",
 		.size		= sizeof(struct rxe_cq),
+		.elem_offset	= offsetof(struct rxe_cq, pelem),
 		.flags          = RXE_POOL_NO_ALLOC,
 		.cleanup	= rxe_cq_cleanup,
 	},
 	[RXE_TYPE_MR] = {
 		.name		= "rxe-mr",
 		.size		= sizeof(struct rxe_mem),
+		.elem_offset	= offsetof(struct rxe_mem, pelem),
 		.cleanup	= rxe_mem_cleanup,
 		.flags		= RXE_POOL_INDEX,
 		.max_index	= RXE_MAX_MR_INDEX,
@@ -59,6 +66,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_MW] = {
 		.name		= "rxe-mw",
 		.size		= sizeof(struct rxe_mem),
+		.elem_offset	= offsetof(struct rxe_mem, pelem),
 		.flags		= RXE_POOL_INDEX,
 		.max_index	= RXE_MAX_MW_INDEX,
 		.min_index	= RXE_MIN_MW_INDEX,
@@ -66,6 +74,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_MC_GRP] = {
 		.name		= "rxe-mc_grp",
 		.size		= sizeof(struct rxe_mc_grp),
+		.elem_offset	= offsetof(struct rxe_mc_grp, pelem),
 		.cleanup	= rxe_mc_cleanup,
 		.flags		= RXE_POOL_KEY,
 		.key_offset	= offsetof(struct rxe_mc_grp, mgid),
@@ -74,6 +83,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_MC_ELEM] = {
 		.name		= "rxe-mc_elem",
 		.size		= sizeof(struct rxe_mc_elem),
+		.elem_offset	= offsetof(struct rxe_mc_elem, pelem),
 		.flags		= RXE_POOL_ATOMIC,
 	},
 };
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 3d722aae5f15..b37df6198e8a 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -36,6 +36,7 @@ struct rxe_pool_entry;
 struct rxe_type_info {
 	const char		*name;
 	size_t			size;
+	size_t			elem_offset;
 	void			(*cleanup)(struct rxe_pool_entry *obj);
 	enum rxe_pool_flags	flags;
 	u32			max_index;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH for-next 4/7] RDMA/rxe: Make pool lookup and alloc APIs type safe
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
                   ` (2 preceding siblings ...)
  2020-12-16 23:15 ` [PATCH for-next 3/7] RDMA/rxe: Add elem_offset field to rxe_type_info Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2021-01-12 20:57   ` Jason Gunthorpe
  2020-12-16 23:15 ` [PATCH for-next 5/7] RDMA/rxe: Make add/drop key/index " Bob Pearson
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

The allocate, lookup index, lookup key and cleanup routines
in rxe_pool.c currently are not type safe against relocating
the pelem field in the objects. Planned changes to move
allocation of objects into rdma-core make addressing this a
requirement.

Use the elem_offset field in rxe_type_info make these APIs
safe against moving the pelem field.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 55 +++++++++++++++++++---------
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 4d667b78af9b..2873ecfb84c2 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -315,7 +315,9 @@ void rxe_drop_index(void *arg)
 
 void *rxe_alloc(struct rxe_pool *pool)
 {
+	struct rxe_type_info *info = &rxe_type_info[pool->type];
 	struct rxe_pool_entry *elem;
+	u8 *obj;
 	unsigned long flags;
 
 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
@@ -334,16 +336,17 @@ void *rxe_alloc(struct rxe_pool *pool)
 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
 		goto out_cnt;
 
-	elem = kzalloc(rxe_type_info[pool->type].size,
-				 (pool->flags & RXE_POOL_ATOMIC) ?
-				 GFP_ATOMIC : GFP_KERNEL);
-	if (!elem)
+	obj = kzalloc(info->size, (pool->flags & RXE_POOL_ATOMIC) ?
+		      GFP_ATOMIC : GFP_KERNEL);
+	if (!obj)
 		goto out_cnt;
 
+	elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
+
 	elem->pool = pool;
 	kref_init(&elem->ref_cnt);
 
-	return elem;
+	return obj;
 
 out_cnt:
 	atomic_dec(&pool->num_elem);
@@ -391,12 +394,17 @@ void rxe_elem_release(struct kref *kref)
 	struct rxe_pool_entry *elem =
 		container_of(kref, struct rxe_pool_entry, ref_cnt);
 	struct rxe_pool *pool = elem->pool;
+	struct rxe_type_info *info = &rxe_type_info[pool->type];
+	u8 *obj;
 
 	if (pool->cleanup)
 		pool->cleanup(elem);
 
-	if (!(pool->flags & RXE_POOL_NO_ALLOC))
-		kfree(elem);
+	if (!(pool->flags & RXE_POOL_NO_ALLOC)) {
+		obj = (u8 *)elem - info->elem_offset;
+		kfree(obj);
+	}
+
 	atomic_dec(&pool->num_elem);
 	ib_device_put(&pool->rxe->ib_dev);
 	rxe_pool_put(pool);
@@ -404,8 +412,10 @@ void rxe_elem_release(struct kref *kref)
 
 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 {
-	struct rb_node *node = NULL;
-	struct rxe_pool_entry *elem = NULL;
+	struct rxe_type_info *info = &rxe_type_info[pool->type];
+	struct rb_node *node;
+	struct rxe_pool_entry *elem;
+	u8 *obj = NULL;
 	unsigned long flags;
 
 	read_lock_irqsave(&pool->pool_lock, flags);
@@ -422,21 +432,28 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 			node = node->rb_left;
 		else if (elem->index < index)
 			node = node->rb_right;
-		else {
-			kref_get(&elem->ref_cnt);
+		else
 			break;
-		}
+	}
+
+	if (node) {
+		kref_get(&elem->ref_cnt);
+		obj = (u8 *)elem - info->elem_offset;
+	} else {
+		obj = NULL;
 	}
 
 out:
 	read_unlock_irqrestore(&pool->pool_lock, flags);
-	return node ? elem : NULL;
+	return obj;
 }
 
 void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 {
-	struct rb_node *node = NULL;
-	struct rxe_pool_entry *elem = NULL;
+	struct rxe_type_info *info = &rxe_type_info[pool->type];
+	struct rb_node *node;
+	struct rxe_pool_entry *elem;
+	u8 *obj = NULL;
 	int cmp;
 	unsigned long flags;
 
@@ -461,10 +478,14 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 			break;
 	}
 
-	if (node)
+	if (node) {
 		kref_get(&elem->ref_cnt);
+		obj = (u8 *)elem - info->elem_offset;
+	} else {
+		obj = NULL;
+	}
 
 out:
 	read_unlock_irqrestore(&pool->pool_lock, flags);
-	return node ? elem : NULL;
+	return obj;
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH for-next 5/7] RDMA/rxe: Make add/drop key/index APIs type safe
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
                   ` (3 preceding siblings ...)
  2020-12-16 23:15 ` [PATCH for-next 4/7] RDMA/rxe: Make pool lookup and alloc APIs type safe Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2020-12-16 23:15 ` [PATCH for-next 6/7] RDMA/rxe: Add unlocked versions of pool APIs Bob Pearson
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Replace 'void *' parameters with 'struct rxe_pool_entry *' and
use a macro to allow
   rxe_add_index,
   rxe_drop_index,
   rxe_add_key,
   rxe_drop_key and
   rxe_add_to_pool
APIs to be type safe against changing the position of pelem in
the objects.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c  | 14 +++++---------
 drivers/infiniband/sw/rxe/rxe_pool.h  | 20 +++++++++++++++-----
 drivers/infiniband/sw/rxe/rxe_verbs.c | 16 ++++++++--------
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 2873ecfb84c2..c8a6d0d7f01a 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -266,9 +266,8 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 	return;
 }
 
-void rxe_add_key(void *arg, void *key)
+void __rxe_add_key(struct rxe_pool_entry *elem, void *key)
 {
-	struct rxe_pool_entry *elem = arg;
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
@@ -278,9 +277,8 @@ void rxe_add_key(void *arg, void *key)
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
-void rxe_drop_key(void *arg)
+void __rxe_drop_key(struct rxe_pool_entry *elem)
 {
-	struct rxe_pool_entry *elem = arg;
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
@@ -289,9 +287,8 @@ void rxe_drop_key(void *arg)
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
-void rxe_add_index(void *arg)
+void __rxe_add_index(struct rxe_pool_entry *elem)
 {
-	struct rxe_pool_entry *elem = arg;
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
@@ -301,9 +298,8 @@ void rxe_add_index(void *arg)
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
-void rxe_drop_index(void *arg)
+void __rxe_drop_index(struct rxe_pool_entry *elem)
 {
-	struct rxe_pool_entry *elem = arg;
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
@@ -356,7 +352,7 @@ void *rxe_alloc(struct rxe_pool *pool)
 	return NULL;
 }
 
-int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
 {
 	unsigned long flags;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index b37df6198e8a..5db0bdde185e 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -110,23 +110,33 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
 void *rxe_alloc(struct rxe_pool *pool);
 
 /* connect already allocated object to pool */
-int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+
+#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem)
 
 /* assign an index to an indexed object and insert object into
  *  pool's rb tree
  */
-void rxe_add_index(void *elem);
+void __rxe_add_index(struct rxe_pool_entry *elem);
+
+#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
 
 /* drop an index and remove object from rb tree */
-void rxe_drop_index(void *elem);
+void __rxe_drop_index(struct rxe_pool_entry *elem);
+
+#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem)
 
 /* assign a key to a keyed object and insert object into
  *  pool's rb tree
  */
-void rxe_add_key(void *elem, void *key);
+void __rxe_add_key(struct rxe_pool_entry *elem, void *key);
+
+#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
 
 /* remove elem from rb tree */
-void rxe_drop_key(void *elem);
+void __rxe_drop_key(struct rxe_pool_entry *elem);
+
+#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem)
 
 /* lookup an indexed object from index. takes a reference on object */
 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index a031514e2f41..7483a33bcec5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -106,12 +106,12 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 	return IB_LINK_LAYER_ETHERNET;
 }
 
-static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
+static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
 {
-	struct rxe_dev *rxe = to_rdev(uctx->device);
-	struct rxe_ucontext *uc = to_ruc(uctx);
+	struct rxe_dev *rxe = to_rdev(ibuc->device);
+	struct rxe_ucontext *uc = to_ruc(ibuc);
 
-	return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem);
+	return rxe_add_to_pool(&rxe->uc_pool, uc);
 }
 
 static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
@@ -145,7 +145,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct rxe_dev *rxe = to_rdev(ibpd->device);
 	struct rxe_pd *pd = to_rpd(ibpd);
 
-	return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
+	return rxe_add_to_pool(&rxe->pd_pool, pd);
 }
 
 static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -169,7 +169,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
 	if (err)
 		return err;
 
-	err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem);
+	err = rxe_add_to_pool(&rxe->ah_pool, ah);
 	if (err)
 		return err;
 
@@ -273,7 +273,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 	if (err)
 		goto err1;
 
-	err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem);
+	err = rxe_add_to_pool(&rxe->srq_pool, srq);
 	if (err)
 		goto err1;
 
@@ -774,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	if (err)
 		return err;
 
-	return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
+	return rxe_add_to_pool(&rxe->cq_pool, cq);
 }
 
 static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH for-next 6/7] RDMA/rxe: Add unlocked versions of pool APIs
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
                   ` (4 preceding siblings ...)
  2020-12-16 23:15 ` [PATCH for-next 5/7] RDMA/rxe: Make add/drop key/index " Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2021-01-12 20:41   ` Jason Gunthorpe
  2020-12-16 23:15 ` [PATCH 7/7] RDMA/rxe: Fix race in rxe_mcast.c Bob Pearson
  2021-01-13  0:27 ` [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Jason Gunthorpe
  7 siblings, 1 reply; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

The existing pool APIs use the rw_lock pool_lock to protect critical
sections that change the pool state. This does not correctly implement
a typical sequence like the following

        elem = <lookup key in pool>

        if found use elem else

        elem = <alloc new elem in pool>

        <add key to elem>

Which is racy if multiple threads are attempting to perform this at the
same time. We want the second thread to use the elem created by the
first thread not create two equivalent elems.

This patch adds new APIs that are the same as existing APIs but
do not take the pool_lock. A caller can then take the lock and
perform a sequence of pool operations and then release the lock.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 82 +++++++++++++++++++++-------
 drivers/infiniband/sw/rxe/rxe_pool.h | 41 ++++++++++++--
 2 files changed, 97 insertions(+), 26 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index c8a6d0d7f01a..d26730eec720 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -266,65 +266,89 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
 	return;
 }
 
+void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key)
+{
+	struct rxe_pool *pool = elem->pool;
+
+	memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
+	insert_key(pool, elem);
+}
+
 void __rxe_add_key(struct rxe_pool_entry *elem, void *key)
 {
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
-	insert_key(pool, elem);
+	__rxe_add_key_nl(elem, key);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
+void __rxe_drop_key_nl(struct rxe_pool_entry *elem)
+{
+	struct rxe_pool *pool = elem->pool;
+
+	rb_erase(&elem->key_node, &pool->key.tree);
+}
+
 void __rxe_drop_key(struct rxe_pool_entry *elem)
 {
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	rb_erase(&elem->key_node, &pool->key.tree);
+	__rxe_drop_key_nl(elem);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
+void __rxe_add_index_nl(struct rxe_pool_entry *elem)
+{
+	struct rxe_pool *pool = elem->pool;
+
+	elem->index = alloc_index(pool);
+	insert_index(pool, elem);
+}
+
 void __rxe_add_index(struct rxe_pool_entry *elem)
 {
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	elem->index = alloc_index(pool);
-	insert_index(pool, elem);
+	__rxe_add_index_nl(elem);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
+void __rxe_drop_index_nl(struct rxe_pool_entry *elem)
+{
+	struct rxe_pool *pool = elem->pool;
+
+	clear_bit(elem->index - pool->index.min_index, pool->index.table);
+	rb_erase(&elem->index_node, &pool->index.tree);
+}
+
 void __rxe_drop_index(struct rxe_pool_entry *elem)
 {
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
 	write_lock_irqsave(&pool->pool_lock, flags);
-	clear_bit(elem->index - pool->index.min_index, pool->index.table);
-	rb_erase(&elem->index_node, &pool->index.tree);
+	__rxe_drop_index_nl(elem);
 	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
-void *rxe_alloc(struct rxe_pool *pool)
+void *rxe_alloc_nl(struct rxe_pool *pool)
 {
 	struct rxe_type_info *info = &rxe_type_info[pool->type];
 	struct rxe_pool_entry *elem;
 	u8 *obj;
-	unsigned long flags;
 
 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
 
-	read_lock_irqsave(&pool->pool_lock, flags);
-	if (pool->state != RXE_POOL_STATE_VALID) {
-		read_unlock_irqrestore(&pool->pool_lock, flags);
+	if (pool->state != RXE_POOL_STATE_VALID)
 		return NULL;
-	}
+
 	kref_get(&pool->ref_cnt);
-	read_unlock_irqrestore(&pool->pool_lock, flags);
 
 	if (!ib_device_try_get(&pool->rxe->ib_dev))
 		goto out_put_pool;
@@ -352,11 +376,21 @@ void *rxe_alloc(struct rxe_pool *pool)
 	return NULL;
 }
 
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+void *rxe_alloc(struct rxe_pool *pool)
 {
+	u8 *obj;
 	unsigned long flags;
 
-	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+	read_lock_irqsave(&pool->pool_lock, flags);
+	obj = rxe_alloc_nl(pool);
+	read_unlock_irqrestore(&pool->pool_lock, flags);
+
+	return obj;
+}
+
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+{
+	unsigned long flags;
 
 	read_lock_irqsave(&pool->pool_lock, flags);
 	if (pool->state != RXE_POOL_STATE_VALID) {
@@ -444,16 +478,13 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 	return obj;
 }
 
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key)
 {
 	struct rxe_type_info *info = &rxe_type_info[pool->type];
 	struct rb_node *node;
 	struct rxe_pool_entry *elem;
 	u8 *obj = NULL;
 	int cmp;
-	unsigned long flags;
-
-	read_lock_irqsave(&pool->pool_lock, flags);
 
 	if (pool->state != RXE_POOL_STATE_VALID)
 		goto out;
@@ -482,6 +513,17 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 	}
 
 out:
+	return obj;
+}
+
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+{
+	u8 *obj = NULL;
+	unsigned long flags;
+
+	read_lock_irqsave(&pool->pool_lock, flags);
+	obj = rxe_pool_get_key_nl(pool, key);
 	read_unlock_irqrestore(&pool->pool_lock, flags);
+
 	return obj;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 5db0bdde185e..373e08554c1c 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -109,41 +109,70 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
 /* allocate an object from pool */
 void *rxe_alloc(struct rxe_pool *pool);
 
+/* allocate an object from pool - no lock */
+void *rxe_alloc_nl(struct rxe_pool *pool);
+
 /* connect already allocated object to pool */
 int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
 
 #define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem)
 
 /* assign an index to an indexed object and insert object into
- *  pool's rb tree
+ *  pool's rb tree with and without holding the pool_lock
  */
 void __rxe_add_index(struct rxe_pool_entry *elem);
 
 #define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
 
-/* drop an index and remove object from rb tree */
+void __rxe_add_index_nl(struct rxe_pool_entry *elem);
+
+#define rxe_add_index_nl(obj) __rxe_add_index_nl(&(obj)->pelem)
+
+/* drop an index and remove object from rb tree
+ * with and without holding the pool_lock
+ */
 void __rxe_drop_index(struct rxe_pool_entry *elem);
 
 #define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem)
 
+void __rxe_drop_index_nl(struct rxe_pool_entry *elem);
+
+#define rxe_drop_index_nl(obj) __rxe_drop_index_nl(&(obj)->pelem)
+
 /* assign a key to a keyed object and insert object into
- *  pool's rb tree
+ * pool's rb tree with and without holding pool_lock
  */
 void __rxe_add_key(struct rxe_pool_entry *elem, void *key);
 
 #define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
 
-/* remove elem from rb tree */
+void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key);
+
+#define rxe_add_key_nl(obj, key) __rxe_add_key_nl(&(obj)->pelem, key)
+
+/* remove elem from rb tree with and without holding pool_lock */
 void __rxe_drop_key(struct rxe_pool_entry *elem);
 
 #define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem)
 
-/* lookup an indexed object from index. takes a reference on object */
+void __rxe_drop_key_nl(struct rxe_pool_entry *elem);
+
+#define rxe_drop_key_nl(obj) __rxe_drop_key_nl(&(obj)->pelem)
+
+/* lookup an indexed object from index with and without holding pool_lock.
+ * takes a reference on object
+ */
 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
 
-/* lookup keyed object from key. takes a reference on the object */
+void *rxe_pool_get_index_nl(struct rxe_pool *pool, u32 index);
+
+/* lookup keyed object from key with and without holding pool_lock.
+ * takes a reference on the objecti
+ */
 void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
 
+void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key);
+
 /* cleanup an object when all references are dropped */
 void rxe_elem_release(struct kref *kref);
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 7/7] RDMA/rxe: Fix race in rxe_mcast.c
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
                   ` (5 preceding siblings ...)
  2020-12-16 23:15 ` [PATCH for-next 6/7] RDMA/rxe: Add unlocked versions of pool APIs Bob Pearson
@ 2020-12-16 23:15 ` Bob Pearson
  2021-01-13  0:27 ` [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Jason Gunthorpe
  7 siblings, 0 replies; 13+ messages in thread
From: Bob Pearson @ 2020-12-16 23:15 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Fix a race in rxe_mcast.c that occurs when two QPs try at the
same time to attach a multicast address. Both QPs lookup the mgid
address in a pool of multicast groups and if they do not find it
create a new group elem.

Fix this by locking the lookup/alloc/add key sequence and using
the unlocked APIs added in this patch set.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_mcast.c | 64 +++++++++++++++++----------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index c02315aed8d1..5be47ce7d319 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -7,45 +7,61 @@
 #include "rxe.h"
 #include "rxe_loc.h"
 
+/* caller should hold mc_grp_pool->pool_lock */
+static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe,
+				     struct rxe_pool *pool,
+				     union ib_gid *mgid)
+{
+	int err;
+	struct rxe_mc_grp *grp;
+
+	grp = rxe_alloc_nl(&rxe->mc_grp_pool);
+	if (!grp)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&grp->qp_list);
+	spin_lock_init(&grp->mcg_lock);
+	grp->rxe = rxe;
+	rxe_add_key_nl(grp, mgid);
+
+	err = rxe_mcast_add(rxe, mgid);
+	if (unlikely(err)) {
+		rxe_drop_key_nl(grp);
+		rxe_drop_ref(grp);
+		return ERR_PTR(err);
+	}
+
+	return grp;
+}
+
 int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
 		      struct rxe_mc_grp **grp_p)
 {
 	int err;
 	struct rxe_mc_grp *grp;
+	struct rxe_pool *pool = &rxe->mc_grp_pool;
+	unsigned long flags;
 
-	if (rxe->attr.max_mcast_qp_attach == 0) {
-		err = -EINVAL;
-		goto err1;
-	}
+	if (rxe->attr.max_mcast_qp_attach == 0)
+		return -EINVAL;
 
-	grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
+	write_lock_irqsave(&pool->pool_lock, flags);
+
+	grp = rxe_pool_get_key_nl(pool, mgid);
 	if (grp)
 		goto done;
 
-	grp = rxe_alloc(&rxe->mc_grp_pool);
-	if (!grp) {
-		err = -ENOMEM;
-		goto err1;
+	grp = create_grp(rxe, pool, mgid);
+	if (IS_ERR(grp)) {
+		write_unlock_irqrestore(&pool->pool_lock, flags);
+		err = PTR_ERR(grp);
+		return err;
 	}
 
-	INIT_LIST_HEAD(&grp->qp_list);
-	spin_lock_init(&grp->mcg_lock);
-	grp->rxe = rxe;
-
-	rxe_add_key(grp, mgid);
-
-	err = rxe_mcast_add(rxe, mgid);
-	if (err)
-		goto err2;
-
 done:
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 	*grp_p = grp;
 	return 0;
-
-err2:
-	rxe_drop_ref(grp);
-err1:
-	return err;
 }
 
 int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH for-next 6/7] RDMA/rxe: Add unlocked versions of pool APIs
  2020-12-16 23:15 ` [PATCH for-next 6/7] RDMA/rxe: Add unlocked versions of pool APIs Bob Pearson
@ 2021-01-12 20:41   ` Jason Gunthorpe
  0 siblings, 0 replies; 13+ messages in thread
From: Jason Gunthorpe @ 2021-01-12 20:41 UTC (permalink / raw)
  To: Bob Pearson; +Cc: zyjzyj2000, linux-rdma, Bob Pearson

On Wed, Dec 16, 2020 at 05:15:49PM -0600, Bob Pearson wrote:

> -int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
> +void *rxe_alloc(struct rxe_pool *pool)
>  {
> +	u8 *obj;

obj shouldn't be a u8 * here

Jason

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH for-next 4/7] RDMA/rxe: Make pool lookup and alloc APIs type safe
  2020-12-16 23:15 ` [PATCH for-next 4/7] RDMA/rxe: Make pool lookup and alloc APIs type safe Bob Pearson
@ 2021-01-12 20:57   ` Jason Gunthorpe
  0 siblings, 0 replies; 13+ messages in thread
From: Jason Gunthorpe @ 2021-01-12 20:57 UTC (permalink / raw)
  To: Bob Pearson; +Cc: zyjzyj2000, linux-rdma, Bob Pearson

On Wed, Dec 16, 2020 at 05:15:47PM -0600, Bob Pearson wrote:
> The allocate, lookup index, lookup key and cleanup routines
> in rxe_pool.c currently are not type safe against relocating
> the pelem field in the objects. Planned changes to move
> allocation of objects into rdma-core make addressing this a
> requirement.
> 
> Use the elem_offset field in rxe_type_info make these APIs
> safe against moving the pelem field.
> 
> Signed-off-by: Bob Pearson <rpearson@hpe.com>
>  drivers/infiniband/sw/rxe/rxe_pool.c | 55 +++++++++++++++++++---------
>  1 file changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
> index 4d667b78af9b..2873ecfb84c2 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
> @@ -315,7 +315,9 @@ void rxe_drop_index(void *arg)
>  
>  void *rxe_alloc(struct rxe_pool *pool)
>  {
> +	struct rxe_type_info *info = &rxe_type_info[pool->type];
>  	struct rxe_pool_entry *elem;
> +	u8 *obj;
>  	unsigned long flags;
>  
>  	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
> @@ -334,16 +336,17 @@ void *rxe_alloc(struct rxe_pool *pool)
>  	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
>  		goto out_cnt;
>  
> -	elem = kzalloc(rxe_type_info[pool->type].size,
> -				 (pool->flags & RXE_POOL_ATOMIC) ?
> -				 GFP_ATOMIC : GFP_KERNEL);
> -	if (!elem)
> +	obj = kzalloc(info->size, (pool->flags & RXE_POOL_ATOMIC) ?
> +		      GFP_ATOMIC : GFP_KERNEL);
> +	if (!obj)
>  		goto out_cnt;
>  
> +	elem = (struct rxe_pool_entry *)(obj + info->elem_offset);

This makes more sense squashed with the

https://patchwork.kernel.org/project/linux-rdma/patch/20201216231550.27224-4-rpearson@hpe.com/

patch

But I would suggest a simpler answer, the pool APIs should always work
with 'struct rxe_pool_entry *'

Here it should return it:

struct rxe_pool_entry *_rxe_alloc(struct rxe_pool *pool, size_t size)

And then use a compile time macro to enforce that pelm is always
first:

#define rxe_alloc(pool, type) \
   container_of(_rxe_alloc(pool, sizeof(type) + BUILD_BUG_ON(offsetof(type, pelem) != 0)), type, pelm)

This would eliminate all the ugly void *'s from the API. Just the
allocator needs the BUILD_BUG_ON, but all the other places really
ought to use container_of() not void * to transform the rxe_pool_entry
to its larger struct.

Also I noticed this when I was looking - which also means size can be
removed from the struct rxe_type_info as the allocation here was the
only use.

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index d26730eec720c6..d515314510ed6a 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -128,14 +128,12 @@ int rxe_pool_init(
 	unsigned int		max_elem)
 {
 	int			err = 0;
-	size_t			size = rxe_type_info[type].size;
 
 	memset(pool, 0, sizeof(*pool));
 
 	pool->rxe		= rxe;
 	pool->type		= type;
 	pool->max_elem		= max_elem;
-	pool->elem_size		= ALIGN(size, RXE_POOL_ALIGN);
 	pool->flags		= rxe_type_info[type].flags;
 	pool->index.tree	= RB_ROOT;
 	pool->key.tree		= RB_ROOT;
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 373e08554c1c9d..f34da4d33e243b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -68,7 +68,6 @@ struct rxe_pool_entry {
 struct rxe_pool {
 	struct rxe_dev		*rxe;
 	rwlock_t		pool_lock; /* protects pool add/del/search */
-	size_t			elem_size;
 	struct kref		ref_cnt;
 	void			(*cleanup)(struct rxe_pool_entry *obj);
 	enum rxe_pool_state	state;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions
  2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
                   ` (6 preceding siblings ...)
  2020-12-16 23:15 ` [PATCH 7/7] RDMA/rxe: Fix race in rxe_mcast.c Bob Pearson
@ 2021-01-13  0:27 ` Jason Gunthorpe
  7 siblings, 0 replies; 13+ messages in thread
From: Jason Gunthorpe @ 2021-01-13  0:27 UTC (permalink / raw)
  To: Bob Pearson; +Cc: zyjzyj2000, linux-rdma, Bob Pearson

On Wed, Dec 16, 2020 at 05:15:43PM -0600, Bob Pearson wrote:
> This patch series makes various cleanups and extensions to the
> object pool core in RDMA/rxe. They are mostly extracted from an
> earlier patch set that implemented memory windows and extended
> verbs APIs but are separated out since they stand on their own.
> 
> Bob Pearson (7):
>   RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag
>   RDMA/rxe: Let pools support both keys and indices
>   RDMA/rxe: Add elem_offset field to rxe_type_info
>   RDMA/rxe: Make pool lookup and alloc APIs type safe
>   RDMA/rxe: Make add/drop key/index APIs type safe
>   RDMA/rxe: Add unlocked versions of pool APIs
>   RDMA/rxe: Fix race in rxe_mcast.c

The bug fix is worth doing and though not ideal it is not worse than
anything else in rxe. If you want to revise things as I suggested a
followup would be fine.

Also, the pool index mode can be completely replaced with an xarry,
including the bitmap allocator thing.

So applied to for-next

Thanks,
Jason

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions
  2020-12-14 23:49 Bob Pearson
@ 2020-12-15  5:21 ` Leon Romanovsky
  0 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-12-15  5:21 UTC (permalink / raw)
  To: Bob Pearson; +Cc: jgg, zyjzyj2000, linux-rdma, Bob Pearson

On Mon, Dec 14, 2020 at 05:49:12PM -0600, Bob Pearson wrote:
> This patch series makes various cleanups and extensions to the
> object pool core in RDMA/rxe. They are mostly extracted from an
> earlier patch set that implemented memory windows and extended
> verbs APIs but are separated out since they stand on their own.
>
> Bob Pearson (7):
>   RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag
>   RDMA/rxe: Let pools support both keys and indices
>   RDMA/rxe: Add elem_offset field to rxe_type_info
>   RDMA/rxe: Make pool lookup and alloc APIs type safe
>   RDMA/rxe: Make add/drop key/index APIs type safe
>   RDMA/rxe: Add unlocked versions of pool APIs
>   RDMA/rxe: Fix race in rxe_mcast.c

I don't see the patches in the ML.
https://lore.kernel.org/linux-rdma/20201214234919.4639-1-rpearson@hpe.com/
Did you send them?

Thanks

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions
@ 2020-12-14 23:49 Bob Pearson
  2020-12-15  5:21 ` Leon Romanovsky
  0 siblings, 1 reply; 13+ messages in thread
From: Bob Pearson @ 2020-12-14 23:49 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

This patch series makes various cleanups and extensions to the
object pool core in RDMA/rxe. They are mostly extracted from an
earlier patch set that implemented memory windows and extended
verbs APIs but are separated out since they stand on their own.

Bob Pearson (7):
  RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag
  RDMA/rxe: Let pools support both keys and indices
  RDMA/rxe: Add elem_offset field to rxe_type_info
  RDMA/rxe: Make pool lookup and alloc APIs type safe
  RDMA/rxe: Make add/drop key/index APIs type safe
  RDMA/rxe: Add unlocked versions of pool APIs
  RDMA/rxe: Fix race in rxe_mcast.c

 drivers/infiniband/sw/rxe/rxe_mcast.c |  64 +++++---
 drivers/infiniband/sw/rxe/rxe_pool.c  | 226 +++++++++++++++++---------
 drivers/infiniband/sw/rxe/rxe_pool.h  |  94 ++++++++---
 drivers/infiniband/sw/rxe/rxe_verbs.c |  16 +-
 4 files changed, 268 insertions(+), 132 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-01-13  0:46 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-16 23:15 [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Bob Pearson
2020-12-16 23:15 ` [PATCH for-next 1/7] RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag Bob Pearson
2020-12-16 23:15 ` [PATCH for-next 2/7] RDMA/rxe: Let pools support both keys and indices Bob Pearson
2020-12-16 23:15 ` [PATCH for-next 3/7] RDMA/rxe: Add elem_offset field to rxe_type_info Bob Pearson
2020-12-16 23:15 ` [PATCH for-next 4/7] RDMA/rxe: Make pool lookup and alloc APIs type safe Bob Pearson
2021-01-12 20:57   ` Jason Gunthorpe
2020-12-16 23:15 ` [PATCH for-next 5/7] RDMA/rxe: Make add/drop key/index " Bob Pearson
2020-12-16 23:15 ` [PATCH for-next 6/7] RDMA/rxe: Add unlocked versions of pool APIs Bob Pearson
2021-01-12 20:41   ` Jason Gunthorpe
2020-12-16 23:15 ` [PATCH 7/7] RDMA/rxe: Fix race in rxe_mcast.c Bob Pearson
2021-01-13  0:27 ` [PATCH for-next 0/7] RDMA/rxe: cleanup and extensions Jason Gunthorpe
  -- strict thread matches above, loose matches on Subject: below --
2020-12-14 23:49 Bob Pearson
2020-12-15  5:21 ` Leon Romanovsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.