All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rdma-next 0/9] MR cache fixes and refactoring
@ 2020-02-27 12:33 Leon Romanovsky
  2020-02-27 12:33 ` [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
                   ` (8 more replies)
  0 siblings, 9 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, Artemy Kovalyov, Eli Cohen, Jack Morgenstein,
	linux-rdma, Michael Guralnik, netdev, Or Gerlitz, Saeed Mahameed,
	Yishai Hadas

From: Leon Romanovsky <leonro@mellanox.com>

Hi,

This series fixes various corner cases in the mlx5_ib MR
cache implementation, see specific commit messages for more
information.

Thanks

Jason Gunthorpe (8):
  RDMA/mlx5: Rename the tracking variables for the MR cache
  RDMA/mlx5: Simplify how the MR cache bucket is located
  RDMA/mlx5: Always remove MRs from the cache before destroying them
  RDMA/mlx5: Fix MR cache size and limit debugfs
  RDMA/mlx5: Lock access to ent->available_mrs/limit when doing
    queue_work
  RDMA/mlx5: Fix locking in MR cache work queue
  RDMA/mlx5: Revise how the hysteresis scheme works for cache filling
  RDMA/mlx5: Allow MRs to be created in the cache synchronously

Michael Guralnik (1):
  RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib

 drivers/infiniband/hw/mlx5/mlx5_ib.h         |  33 +-
 drivers/infiniband/hw/mlx5/mr.c              | 642 +++++++++++--------
 drivers/infiniband/hw/mlx5/odp.c             |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mr.c |  22 +-
 include/linux/mlx5/driver.h                  |   6 -
 5 files changed, 404 insertions(+), 301 deletions(-)

--
2.24.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 19:41   ` Saeed Mahameed
  2020-02-27 12:33 ` [PATCH rdma-next 2/9] RDMA/mlx5: Rename the tracking variables for the MR cache Leon Romanovsky
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Michael Guralnik, linux-rdma, netdev, Saeed Mahameed

From: Michael Guralnik <michaelgur@mellanox.com>

As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the
logic inside mlx5_ib and cleanup the code in mlx5_core.

Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mr.c              | 25 ++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++--------------
 include/linux/mlx5/driver.h                  |  6 -----
 3 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 6fa0a83c19de..dea14477a676 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -79,6 +79,25 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
 		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
 }
 
+static int create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_ib_mr *mr,
+			  struct mlx5_async_ctx *async_ctx, u32 *in, int inlen,
+			  mlx5_async_cbk_t callback)
+{
+	void *mkc;
+	u8 key;
+
+	spin_lock_irq(&dev->priv.mkey_lock);
+	key = dev->priv.mkey_key++;
+	spin_unlock_irq(&dev->priv.mkey_lock);
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+	MLX5_SET(mkc, mkc, mkey_7_0, key);
+
+	return mlx5_cmd_exec_cb(async_ctx, in, inlen, mr->out, sizeof(mr->out),
+				callback, &mr->cb_work);
+}
+
 static void reg_mr_callback(int status, struct mlx5_async_work *context)
 {
 	struct mlx5_ib_mr *mr =
@@ -163,10 +182,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
-					       &dev->async_ctx, in, inlen,
-					       mr->out, sizeof(mr->out),
-					       reg_mr_callback, &mr->cb_work);
+		err = create_mkey_cb(dev->mdev, mr, &dev->async_ctx, in, inlen,
+				     reg_mr_callback);
 		if (err) {
 			spin_lock_irq(&ent->lock);
 			ent->pending--;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 42cc3c7ac5b6..83841e4119d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -36,12 +36,9 @@
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
-int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
-			     struct mlx5_core_mkey *mkey,
-			     struct mlx5_async_ctx *async_ctx, u32 *in,
-			     int inlen, u32 *out, int outlen,
-			     mlx5_async_cbk_t callback,
-			     struct mlx5_async_work *context)
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+			  struct mlx5_core_mkey *mkey,
+			  u32 *in, int inlen)
 {
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
 	u32 mkey_index;
@@ -57,10 +54,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
 	MLX5_SET(mkc, mkc, mkey_7_0, key);
 
-	if (callback)
-		return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
-					callback, context);
-
 	err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
 	if (err)
 		return err;
@@ -75,15 +68,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 		      mkey_index, key, mkey->key);
 	return 0;
 }
-EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
-
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-			  struct mlx5_core_mkey *mkey,
-			  u32 *in, int inlen)
-{
-	return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
-					NULL, 0, NULL, NULL);
-}
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f2b4225ed650..7225e9ca0f25 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -947,12 +947,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 						      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 				 struct mlx5_cmd_mailbox *head);
-int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
-			     struct mlx5_core_mkey *mkey,
-			     struct mlx5_async_ctx *async_ctx, u32 *in,
-			     int inlen, u32 *out, int outlen,
-			     mlx5_async_cbk_t callback,
-			     struct mlx5_async_work *context);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 			  struct mlx5_core_mkey *mkey,
 			  u32 *in, int inlen);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 2/9] RDMA/mlx5: Rename the tracking variables for the MR cache
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
  2020-02-27 12:33 ` [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 3/9] RDMA/mlx5: Simplify how the MR cache bucket is located Leon Romanovsky
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@mellanox.com>

The old names do not clearly indicate the intent.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 +++++++---
 drivers/infiniband/hw/mlx5/mr.c      | 54 ++++++++++++++--------------
 2 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index fd812a24db25..2b566829427e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -703,15 +703,26 @@ struct mlx5_cache_ent {
 	u32			access_mode;
 	u32			page;
 
-	u32			size;
-	u32                     cur;
+	/*
+	 * - available_mrs is the length of list head, ie the number of MRs
+	 *   available for immediate allocation.
+	 * - total_mrs is available_mrs plus all in use MRs that could be
+	 *   returned to the cache.
+	 * - limit is the low water mark for available_mrs, 2* limit is the
+	 *   upper water mark.
+	 * - pending is the number of MRs currently being created
+	 */
+	u32 total_mrs;
+	u32 available_mrs;
+	u32 limit;
+	u32 pending;
+
+	/* Statistics */
 	u32                     miss;
-	u32			limit;
 
 	struct mlx5_ib_dev     *dev;
 	struct work_struct	work;
 	struct delayed_work	dwork;
-	int			pending;
 	struct completion	compl;
 };
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index dea14477a676..9e504f18864a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -130,8 +130,8 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 
 	spin_lock_irqsave(&ent->lock, flags);
 	list_add_tail(&mr->list, &ent->head);
-	ent->cur++;
-	ent->size++;
+	ent->available_mrs++;
+	ent->total_mrs++;
 	spin_unlock_irqrestore(&ent->lock, flags);
 
 	if (!completion_done(&ent->compl))
@@ -215,8 +215,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 		}
 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 		list_move(&mr->list, &del_list);
-		ent->cur--;
-		ent->size--;
+		ent->available_mrs--;
+		ent->total_mrs--;
 		spin_unlock_irq(&ent->lock);
 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 	}
@@ -249,16 +249,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
 	if (var < ent->limit)
 		return -EINVAL;
 
-	if (var > ent->size) {
+	if (var > ent->total_mrs) {
 		do {
-			err = add_keys(dev, c, var - ent->size);
+			err = add_keys(dev, c, var - ent->total_mrs);
 			if (err && err != -EAGAIN)
 				return err;
 
 			usleep_range(3000, 5000);
 		} while (err);
-	} else if (var < ent->size) {
-		remove_keys(dev, c, ent->size - var);
+	} else if (var < ent->total_mrs) {
+		remove_keys(dev, c, ent->total_mrs - var);
 	}
 
 	return count;
@@ -271,7 +271,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 	char lbuf[20];
 	int err;
 
-	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
 	if (err < 0)
 		return err;
 
@@ -304,13 +304,13 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
 	if (sscanf(lbuf, "%u", &var) != 1)
 		return -EINVAL;
 
-	if (var > ent->size)
+	if (var > ent->total_mrs)
 		return -EINVAL;
 
 	ent->limit = var;
 
-	if (ent->cur < ent->limit) {
-		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
+	if (ent->available_mrs < ent->limit) {
+		err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs);
 		if (err)
 			return err;
 	}
@@ -344,7 +344,7 @@ static int someone_adding(struct mlx5_mr_cache *cache)
 	int i;
 
 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-		if (cache->ent[i].cur < cache->ent[i].limit)
+		if (cache->ent[i].available_mrs < cache->ent[i].limit)
 			return 1;
 	}
 
@@ -362,9 +362,9 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		return;
 
 	ent = &dev->cache.ent[i];
-	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+	if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) {
 		err = add_keys(dev, i, 1);
-		if (ent->cur < 2 * ent->limit) {
+		if (ent->available_mrs < 2 * ent->limit) {
 			if (err == -EAGAIN) {
 				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 					    i + 2);
@@ -379,7 +379,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 				queue_work(cache->wq, &ent->work);
 			}
 		}
-	} else if (ent->cur > 2 * ent->limit) {
+	} else if (ent->available_mrs > 2 * ent->limit) {
 		/*
 		 * The remove_keys() logic is performed as garbage collection
 		 * task. Such task is intended to be run when no other active
@@ -395,7 +395,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		if (!need_resched() && !someone_adding(cache) &&
 		    time_after(jiffies, cache->last_add + 300 * HZ)) {
 			remove_keys(dev, i, 1);
-			if (ent->cur > ent->limit)
+			if (ent->available_mrs > ent->limit)
 				queue_work(cache->wq, &ent->work);
 		} else {
 			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
@@ -446,9 +446,9 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 					      list);
 			list_del(&mr->list);
-			ent->cur--;
+			ent->available_mrs--;
 			spin_unlock_irq(&ent->lock);
-			if (ent->cur < ent->limit)
+			if (ent->available_mrs < ent->limit)
 				queue_work(cache->wq, &ent->work);
 			return mr;
 		}
@@ -481,9 +481,9 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 					      list);
 			list_del(&mr->list);
-			ent->cur--;
+			ent->available_mrs--;
 			spin_unlock_irq(&ent->lock);
-			if (ent->cur < ent->limit)
+			if (ent->available_mrs < ent->limit)
 				queue_work(cache->wq, &ent->work);
 			break;
 		}
@@ -515,7 +515,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		mr->allocated_from_cache = false;
 		destroy_mkey(dev, mr);
 		ent = &cache->ent[c];
-		if (ent->cur < ent->limit)
+		if (ent->available_mrs < ent->limit)
 			queue_work(cache->wq, &ent->work);
 		return;
 	}
@@ -523,8 +523,8 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	ent = &cache->ent[c];
 	spin_lock_irq(&ent->lock);
 	list_add_tail(&mr->list, &ent->head);
-	ent->cur++;
-	if (ent->cur > 2 * ent->limit)
+	ent->available_mrs++;
+	if (ent->available_mrs > 2 * ent->limit)
 		shrink = 1;
 	spin_unlock_irq(&ent->lock);
 
@@ -549,8 +549,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 		}
 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 		list_move(&mr->list, &del_list);
-		ent->cur--;
-		ent->size--;
+		ent->available_mrs--;
+		ent->total_mrs--;
 		spin_unlock_irq(&ent->lock);
 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 	}
@@ -588,7 +588,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 		dir = debugfs_create_dir(ent->name, cache->root);
 		debugfs_create_file("size", 0600, dir, ent, &size_fops);
 		debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
-		debugfs_create_u32("cur", 0400, dir, &ent->cur);
+		debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
 		debugfs_create_u32("miss", 0600, dir, &ent->miss);
 	}
 }
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 3/9] RDMA/mlx5: Simplify how the MR cache bucket is located
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
  2020-02-27 12:33 ` [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 2/9] RDMA/mlx5: Rename the tracking variables for the MR cache Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 4/9] RDMA/mlx5: Always remove MRs from the cache before destroying them Leon Romanovsky
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@mellanox.com>

There are many bad APIs here that are accepting a cache bucket index
instead of a bucket pointer. Many of the callers already have a bucket
pointer, so this results in a lot of confusing uses of order2idx().

Pass the struct mlx5_cache_ent into add_keys(), remove_keys(), and
alloc_cached_mr().

Once the MR is in the cache, store the cache bucket pointer directly in
the MR, replacing the 'bool allocated_from cache'.

In the end there is only one place that needs to form index from order,
alloc_mr_from_cache(). Increase the safety of this function by disallowing
it from accessing cache entries in the ODP special area.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   7 +-
 drivers/infiniband/hw/mlx5/mr.c      | 155 +++++++++++----------------
 drivers/infiniband/hw/mlx5/odp.c     |   2 +-
 3 files changed, 69 insertions(+), 95 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2b566829427e..7aef89cc472d 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -621,8 +621,8 @@ struct mlx5_ib_mr {
 	struct ib_umem	       *umem;
 	struct mlx5_shared_mr_info	*smr_info;
 	struct list_head	list;
-	int			order;
-	bool			allocated_from_cache;
+	unsigned int		order;
+	struct mlx5_cache_ent  *cache_ent;
 	int			npages;
 	struct mlx5_ib_dev     *dev;
 	u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
@@ -1276,7 +1276,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+				       unsigned int entry);
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9e504f18864a..68e5a3955b1d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -63,16 +63,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 }
 
-static int order2idx(struct mlx5_ib_dev *dev, int order)
-{
-	struct mlx5_mr_cache *cache = &dev->cache;
-
-	if (order < cache->ent[0].order)
-		return 0;
-	else
-		return order - cache->ent[0].order;
-}
-
 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
 {
 	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
@@ -103,9 +93,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	struct mlx5_ib_mr *mr =
 		container_of(context, struct mlx5_ib_mr, cb_work);
 	struct mlx5_ib_dev *dev = mr->dev;
-	struct mlx5_mr_cache *cache = &dev->cache;
-	int c = order2idx(dev, mr->order);
-	struct mlx5_cache_ent *ent = &cache->ent[c];
+	struct mlx5_cache_ent *ent = mr->cache_ent;
 	u8 key;
 	unsigned long flags;
 
@@ -126,7 +114,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
 
-	cache->last_add = jiffies;
+	dev->cache.last_add = jiffies;
 
 	spin_lock_irqsave(&ent->lock, flags);
 	list_add_tail(&mr->list, &ent->head);
@@ -138,10 +126,8 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 		complete(&ent->compl);
 }
 
-static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+static int add_keys(struct mlx5_cache_ent *ent, int num)
 {
-	struct mlx5_mr_cache *cache = &dev->cache;
-	struct mlx5_cache_ent *ent = &cache->ent[c];
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_ib_mr *mr;
 	void *mkc;
@@ -166,8 +152,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 			break;
 		}
 		mr->order = ent->order;
-		mr->allocated_from_cache = true;
-		mr->dev = dev;
+		mr->cache_ent = ent;
+		mr->dev = ent->dev;
 
 		MLX5_SET(mkc, mkc, free, 1);
 		MLX5_SET(mkc, mkc, umr_en, 1);
@@ -182,13 +168,14 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
-		err = create_mkey_cb(dev->mdev, mr, &dev->async_ctx, in, inlen,
+
+		err = create_mkey_cb(ent->dev->mdev, mr, &ent->dev->async_ctx, in, inlen,
 				     reg_mr_callback);
 		if (err) {
 			spin_lock_irq(&ent->lock);
 			ent->pending--;
 			spin_unlock_irq(&ent->lock);
-			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+			mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
 			kfree(mr);
 			break;
 		}
@@ -198,10 +185,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 	return err;
 }
 
-static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+static void remove_keys(struct mlx5_cache_ent *ent, int num)
 {
-	struct mlx5_mr_cache *cache = &dev->cache;
-	struct mlx5_cache_ent *ent = &cache->ent[c];
 	struct mlx5_ib_mr *tmp_mr;
 	struct mlx5_ib_mr *mr;
 	LIST_HEAD(del_list);
@@ -218,7 +203,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 		ent->available_mrs--;
 		ent->total_mrs--;
 		spin_unlock_irq(&ent->lock);
-		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+		mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
 	}
 
 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -231,18 +216,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
 			  size_t count, loff_t *pos)
 {
 	struct mlx5_cache_ent *ent = filp->private_data;
-	struct mlx5_ib_dev *dev = ent->dev;
 	char lbuf[20] = {0};
 	u32 var;
 	int err;
-	int c;
 
 	count = min(count, sizeof(lbuf) - 1);
 	if (copy_from_user(lbuf, buf, count))
 		return -EFAULT;
 
-	c = order2idx(dev, ent->order);
-
 	if (sscanf(lbuf, "%u", &var) != 1)
 		return -EINVAL;
 
@@ -251,14 +232,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
 
 	if (var > ent->total_mrs) {
 		do {
-			err = add_keys(dev, c, var - ent->total_mrs);
+			err = add_keys(ent, var - ent->total_mrs);
 			if (err && err != -EAGAIN)
 				return err;
 
 			usleep_range(3000, 5000);
 		} while (err);
 	} else if (var < ent->total_mrs) {
-		remove_keys(dev, c, ent->total_mrs - var);
+		remove_keys(ent, ent->total_mrs - var);
 	}
 
 	return count;
@@ -289,18 +270,14 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
 			   size_t count, loff_t *pos)
 {
 	struct mlx5_cache_ent *ent = filp->private_data;
-	struct mlx5_ib_dev *dev = ent->dev;
 	char lbuf[20] = {0};
 	u32 var;
 	int err;
-	int c;
 
 	count = min(count, sizeof(lbuf) - 1);
 	if (copy_from_user(lbuf, buf, count))
 		return -EFAULT;
 
-	c = order2idx(dev, ent->order);
-
 	if (sscanf(lbuf, "%u", &var) != 1)
 		return -EINVAL;
 
@@ -310,7 +287,7 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
 	ent->limit = var;
 
 	if (ent->available_mrs < ent->limit) {
-		err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs);
+		err = add_keys(ent, 2 * ent->limit - ent->available_mrs);
 		if (err)
 			return err;
 	}
@@ -355,24 +332,22 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 {
 	struct mlx5_ib_dev *dev = ent->dev;
 	struct mlx5_mr_cache *cache = &dev->cache;
-	int i = order2idx(dev, ent->order);
 	int err;
 
 	if (cache->stopped)
 		return;
 
-	ent = &dev->cache.ent[i];
 	if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) {
-		err = add_keys(dev, i, 1);
+		err = add_keys(ent, 1);
 		if (ent->available_mrs < 2 * ent->limit) {
 			if (err == -EAGAIN) {
 				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
-					    i + 2);
+					    ent->order);
 				queue_delayed_work(cache->wq, &ent->dwork,
 						   msecs_to_jiffies(3));
 			} else if (err) {
 				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
-					     i + 2, err);
+					     ent->order, err);
 				queue_delayed_work(cache->wq, &ent->dwork,
 						   msecs_to_jiffies(1000));
 			} else {
@@ -394,7 +369,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		 */
 		if (!need_resched() && !someone_adding(cache) &&
 		    time_after(jiffies, cache->last_add + 300 * HZ)) {
-			remove_keys(dev, i, 1);
+			remove_keys(ent, 1);
 			if (ent->available_mrs > ent->limit)
 				queue_work(cache->wq, &ent->work);
 		} else {
@@ -419,17 +394,18 @@ static void cache_work_func(struct work_struct *work)
 	__cache_work_func(ent);
 }
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+/* Allocate a special entry from the cache */
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+				       unsigned int entry)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
 	struct mlx5_cache_ent *ent;
 	struct mlx5_ib_mr *mr;
 	int err;
 
-	if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
-		mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+	if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
+		    entry >= ARRAY_SIZE(cache->ent)))
 		return ERR_PTR(-EINVAL);
-	}
 
 	ent = &cache->ent[entry];
 	while (1) {
@@ -437,7 +413,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
 		if (list_empty(&ent->head)) {
 			spin_unlock_irq(&ent->lock);
 
-			err = add_keys(dev, entry, 1);
+			err = add_keys(ent, 1);
 			if (err && err != -EAGAIN)
 				return ERR_PTR(err);
 
@@ -455,26 +431,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
 	}
 }
 
-static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
 {
-	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_ib_dev *dev = req_ent->dev;
 	struct mlx5_ib_mr *mr = NULL;
-	struct mlx5_cache_ent *ent;
-	int last_umr_cache_entry;
-	int c;
-	int i;
-
-	c = order2idx(dev, order);
-	last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
-	if (c < 0 || c > last_umr_cache_entry) {
-		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
-		return NULL;
-	}
+	struct mlx5_cache_ent *ent = req_ent;
 
-	for (i = c; i <= last_umr_cache_entry; i++) {
-		ent = &cache->ent[i];
-
-		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+	/* Try larger MR pools from the cache to satisfy the allocation */
+	for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
+		mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
+			    ent - dev->cache.ent);
 
 		spin_lock_irq(&ent->lock);
 		if (!list_empty(&ent->head)) {
@@ -484,43 +450,36 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 			ent->available_mrs--;
 			spin_unlock_irq(&ent->lock);
 			if (ent->available_mrs < ent->limit)
-				queue_work(cache->wq, &ent->work);
+				queue_work(dev->cache.wq, &ent->work);
 			break;
 		}
 		spin_unlock_irq(&ent->lock);
 
-		queue_work(cache->wq, &ent->work);
+		queue_work(dev->cache.wq, &ent->work);
 	}
 
 	if (!mr)
-		cache->ent[c].miss++;
+		req_ent->miss++;
 
 	return mr;
 }
 
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	struct mlx5_mr_cache *cache = &dev->cache;
-	struct mlx5_cache_ent *ent;
+	struct mlx5_cache_ent *ent = mr->cache_ent;
 	int shrink = 0;
-	int c;
 
-	if (!mr->allocated_from_cache)
+	if (!ent)
 		return;
 
-	c = order2idx(dev, mr->order);
-	WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
-
 	if (mlx5_mr_cache_invalidate(mr)) {
-		mr->allocated_from_cache = false;
+		mr->cache_ent = NULL;
 		destroy_mkey(dev, mr);
-		ent = &cache->ent[c];
 		if (ent->available_mrs < ent->limit)
-			queue_work(cache->wq, &ent->work);
+			queue_work(dev->cache.wq, &ent->work);
 		return;
 	}
 
-	ent = &cache->ent[c];
 	spin_lock_irq(&ent->lock);
 	list_add_tail(&mr->list, &ent->head);
 	ent->available_mrs++;
@@ -529,7 +488,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	spin_unlock_irq(&ent->lock);
 
 	if (shrink)
-		queue_work(cache->wq, &ent->work);
+		queue_work(dev->cache.wq, &ent->work);
 }
 
 static void clean_keys(struct mlx5_ib_dev *dev, int c)
@@ -857,22 +816,38 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 	return err;
 }
 
-static struct mlx5_ib_mr *alloc_mr_from_cache(
-				  struct ib_pd *pd, struct ib_umem *umem,
-				  u64 virt_addr, u64 len, int npages,
-				  int page_shift, int order, int access_flags)
+static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
+						      unsigned int order)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+
+	if (order < cache->ent[0].order)
+		return &cache->ent[0];
+	order = order - cache->ent[0].order;
+	if (order > MR_CACHE_LAST_STD_ENTRY)
+		return NULL;
+	return &cache->ent[order];
+}
+
+static struct mlx5_ib_mr *
+alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
+		    u64 len, int npages, int page_shift, unsigned int order,
+		    int access_flags)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
 	struct mlx5_ib_mr *mr;
 	int err = 0;
 	int i;
 
+	if (!ent)
+		return ERR_PTR(-E2BIG);
 	for (i = 0; i < 1; i++) {
-		mr = alloc_cached_mr(dev, order);
+		mr = alloc_cached_mr(ent);
 		if (mr)
 			break;
 
-		err = add_keys(dev, order2idx(dev, order), 1);
+		err = add_keys(ent, 1);
 		if (err && err != -EAGAIN) {
 			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 			break;
@@ -1456,7 +1431,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		/*
 		 * UMR can't be used - MKey needs to be replaced.
 		 */
-		if (mr->allocated_from_cache)
+		if (mr->cache_ent)
 			err = mlx5_mr_cache_invalidate(mr);
 		else
 			err = destroy_mkey(dev, mr);
@@ -1472,7 +1447,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 			goto err;
 		}
 
-		mr->allocated_from_cache = false;
+		mr->cache_ent = NULL;
 	} else {
 		/*
 		 * Send a UMR WQE
@@ -1559,8 +1534,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
 
 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	int allocated_from_cache = mr->allocated_from_cache;
-
 	if (mr->sig) {
 		if (mlx5_core_destroy_psv(dev->mdev,
 					  mr->sig->psv_memory.psv_idx))
@@ -1575,7 +1548,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		mr->sig = NULL;
 	}
 
-	if (!allocated_from_cache) {
+	if (!mr->cache_ent) {
 		destroy_mkey(dev, mr);
 		mlx5_free_priv_descs(mr);
 	}
@@ -1592,7 +1565,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	else
 		clean_mr(dev, mr);
 
-	if (mr->allocated_from_cache)
+	if (mr->cache_ent)
 		mlx5_mr_cache_free(dev, mr);
 	else
 		kfree(mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4216814ba871..224f480fc441 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
 	odp->private = NULL;
 	mutex_unlock(&odp->umem_mutex);
 
-	if (!mr->allocated_from_cache) {
+	if (!mr->cache_ent) {
 		mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
 		WARN_ON(mr->descs);
 	}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 4/9] RDMA/mlx5: Always remove MRs from the cache before destroying them
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
                   ` (2 preceding siblings ...)
  2020-02-27 12:33 ` [PATCH rdma-next 3/9] RDMA/mlx5: Simplify how the MR cache bucket is located Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 5/9] RDMA/mlx5: Fix MR cache size and limit debugfs Leon Romanovsky
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: Artemy Kovalyov, linux-rdma, Yishai Hadas

From: Jason Gunthorpe <jgg@mellanox.com>

The cache bucket tracks the total number of MRs that exists, both inside
and outside of the cache. Removing a MR from the cache (by setting
cache_ent to NULL) without updating total_mrs will cause the tracking to
leak and be inflated.

Further fix the rereg_mr path to always destroy the MR. reg_create will
always overwrite all the MR data in mlx5_ib_mr, so the MR must be
completely destroyed, in all cases, before this function can be
called. Detach the MR from the cache and unconditionally destroy it to
avoid leaking HW mkeys.

Fixes: afd1417404fb ("IB/mlx5: Use direct mkey destroy command upon UMR unreg failure")
Fixes: 56e11d628c5d ("IB/mlx5: Added support for re-registration of MRs")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 68e5a3955b1d..c951bb14de56 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -464,6 +464,16 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
 	return mr;
 }

+static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
+{
+	struct mlx5_cache_ent *ent = mr->cache_ent;
+
+	mr->cache_ent = NULL;
+	spin_lock_irq(&ent->lock);
+	ent->total_mrs--;
+	spin_unlock_irq(&ent->lock);
+}
+
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	struct mlx5_cache_ent *ent = mr->cache_ent;
@@ -473,7 +483,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		return;

 	if (mlx5_mr_cache_invalidate(mr)) {
-		mr->cache_ent = NULL;
+		detach_mr_from_cache(mr);
 		destroy_mkey(dev, mr);
 		if (ent->available_mrs < ent->limit)
 			queue_work(dev->cache.wq, &ent->work);
@@ -1432,9 +1442,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		 * UMR can't be used - MKey needs to be replaced.
 		 */
 		if (mr->cache_ent)
-			err = mlx5_mr_cache_invalidate(mr);
-		else
-			err = destroy_mkey(dev, mr);
+			detach_mr_from_cache(mr);
+		err = destroy_mkey(dev, mr);
 		if (err)
 			goto err;

@@ -1446,8 +1455,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 			mr = to_mmr(ib_mr);
 			goto err;
 		}
-
-		mr->cache_ent = NULL;
 	} else {
 		/*
 		 * Send a UMR WQE
--
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 5/9] RDMA/mlx5: Fix MR cache size and limit debugfs
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
                   ` (3 preceding siblings ...)
  2020-02-27 12:33 ` [PATCH rdma-next 4/9] RDMA/mlx5: Always remove MRs from the cache before destroying them Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 6/9] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Leon Romanovsky
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Eli Cohen, Jack Morgenstein, linux-rdma, Or Gerlitz

From: Jason Gunthorpe <jgg@mellanox.com>

The size_write function is supposed to adjust the total_mr's to match
the user's request, but lacks locking and safety checking.

total_mrs can only be adjusted by at most available_mrs. mrs already
assigned to users cannot be revoked. Ensure that the user provides
a target value within the range of available_mrs and within the high/low
water mark.

limit_write has confusing and wrong sanity checking, and doesn't have the
ability to deallocate on limit reduction.

Since both functions use the same algorithm to adjust the available_mrs,
consolidate it into one function and write it correctly. Fix the locking
and by holding the spinlock for all accesses to ent->X.

Always fail if the user provides a malformed string.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 152 ++++++++++++++++++--------------
 1 file changed, 88 insertions(+), 64 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c951bb14de56..00d7dc4cd91b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -126,7 +126,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 		complete(&ent->compl);
 }

-static int add_keys(struct mlx5_cache_ent *ent, int num)
+static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_ib_mr *mr;
@@ -185,30 +185,54 @@ static int add_keys(struct mlx5_cache_ent *ent, int num)
 	return err;
 }

-static void remove_keys(struct mlx5_cache_ent *ent, int num)
+static void remove_cache_mr(struct mlx5_cache_ent *ent)
 {
-	struct mlx5_ib_mr *tmp_mr;
 	struct mlx5_ib_mr *mr;
-	LIST_HEAD(del_list);
-	int i;

-	for (i = 0; i < num; i++) {
-		spin_lock_irq(&ent->lock);
-		if (list_empty(&ent->head)) {
-			spin_unlock_irq(&ent->lock);
-			break;
-		}
-		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
-		list_move(&mr->list, &del_list);
-		ent->available_mrs--;
-		ent->total_mrs--;
+	spin_lock_irq(&ent->lock);
+	if (list_empty(&ent->head)) {
 		spin_unlock_irq(&ent->lock);
-		mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+		return;
 	}
+	mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+	list_del(&mr->list);
+	ent->available_mrs--;
+	ent->total_mrs--;
+	spin_unlock_irq(&ent->lock);
+	mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+	kfree(mr);
+}

-	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
-		list_del(&mr->list);
-		kfree(mr);
+static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
+				bool limit_fill)
+{
+	int err;
+
+	lockdep_assert_held(&ent->lock);
+
+	while (true) {
+		if (limit_fill)
+			target = ent->limit * 2;
+		if (target == ent->available_mrs + ent->pending)
+			return 0;
+		if (target > ent->available_mrs + ent->pending) {
+			u32 todo = target - (ent->available_mrs + ent->pending);
+
+			spin_unlock_irq(&ent->lock);
+			err = add_keys(ent, todo);
+			if (err == -EAGAIN)
+				usleep_range(3000, 5000);
+			spin_lock_irq(&ent->lock);
+			if (err) {
+				if (err != -EAGAIN)
+					return err;
+			} else
+				return 0;
+		} else {
+			spin_unlock_irq(&ent->lock);
+			remove_cache_mr(ent);
+			spin_lock_irq(&ent->lock);
+		}
 	}
 }

@@ -216,33 +240,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
 			  size_t count, loff_t *pos)
 {
 	struct mlx5_cache_ent *ent = filp->private_data;
-	char lbuf[20] = {0};
-	u32 var;
+	u32 target;
 	int err;

-	count = min(count, sizeof(lbuf) - 1);
-	if (copy_from_user(lbuf, buf, count))
-		return -EFAULT;
-
-	if (sscanf(lbuf, "%u", &var) != 1)
-		return -EINVAL;
-
-	if (var < ent->limit)
-		return -EINVAL;
-
-	if (var > ent->total_mrs) {
-		do {
-			err = add_keys(ent, var - ent->total_mrs);
-			if (err && err != -EAGAIN)
-				return err;
+	err = kstrtou32_from_user(buf, count, 0, &target);
+	if (err)
+		return err;

-			usleep_range(3000, 5000);
-		} while (err);
-	} else if (var < ent->total_mrs) {
-		remove_keys(ent, ent->total_mrs - var);
+	/*
+	 * Target is the new value of total_mrs the user requests, however we
+	 * cannot free MRs that are in use. Compute the target value for
+	 * available_mrs.
+	 */
+	spin_lock_irq(&ent->lock);
+	if (target < ent->total_mrs - ent->available_mrs) {
+		err = -EINVAL;
+		goto err_unlock;
+	}
+	target = target - (ent->total_mrs - ent->available_mrs);
+	if (target < ent->limit || target > ent->limit*2) {
+		err = -EINVAL;
+		goto err_unlock;
 	}
+	err = resize_available_mrs(ent, target, false);
+	if (err)
+		goto err_unlock;
+	spin_unlock_irq(&ent->lock);

 	return count;
+
+err_unlock:
+	spin_unlock_irq(&ent->lock);
+	return err;
 }

 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@@ -270,28 +299,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
 			   size_t count, loff_t *pos)
 {
 	struct mlx5_cache_ent *ent = filp->private_data;
-	char lbuf[20] = {0};
 	u32 var;
 	int err;

-	count = min(count, sizeof(lbuf) - 1);
-	if (copy_from_user(lbuf, buf, count))
-		return -EFAULT;
-
-	if (sscanf(lbuf, "%u", &var) != 1)
-		return -EINVAL;
-
-	if (var > ent->total_mrs)
-		return -EINVAL;
+	err = kstrtou32_from_user(buf, count, 0, &var);
+	if (err)
+		return err;

+	/*
+	 * Upon set we immediately fill the cache to high water mark implied by
+	 * the limit.
+	 */
+	spin_lock_irq(&ent->lock);
 	ent->limit = var;
-
-	if (ent->available_mrs < ent->limit) {
-		err = add_keys(ent, 2 * ent->limit - ent->available_mrs);
-		if (err)
-			return err;
-	}
-
+	err = resize_available_mrs(ent, 0, true);
+	spin_unlock_irq(&ent->lock);
+	if (err)
+		return err;
 	return count;
 }

@@ -356,20 +380,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		}
 	} else if (ent->available_mrs > 2 * ent->limit) {
 		/*
-		 * The remove_keys() logic is performed as garbage collection
-		 * task. Such task is intended to be run when no other active
-		 * processes are running.
+		 * The remove_cache_mr() logic is performed as garbage
+		 * collection task. Such task is intended to be run when no
+		 * other active processes are running.
 		 *
 		 * The need_resched() will return TRUE if there are user tasks
 		 * to be activated in near future.
 		 *
-		 * In such case, we don't execute remove_keys() and postpone
-		 * the garbage collection work to try to run in next cycle,
-		 * in order to free CPU resources to other tasks.
+		 * In such case, we don't execute remove_cache_mr() and postpone
+		 * the garbage collection work to try to run in next cycle, in
+		 * order to free CPU resources to other tasks.
 		 */
 		if (!need_resched() && !someone_adding(cache) &&
 		    time_after(jiffies, cache->last_add + 300 * HZ)) {
-			remove_keys(ent, 1);
+			remove_cache_mr(ent);
 			if (ent->available_mrs > ent->limit)
 				queue_work(cache->wq, &ent->work);
 		} else {
--
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 6/9] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
                   ` (4 preceding siblings ...)
  2020-02-27 12:33 ` [PATCH rdma-next 5/9] RDMA/mlx5: Fix MR cache size and limit debugfs Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 7/9] RDMA/mlx5: Fix locking in MR cache work queue Leon Romanovsky
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@mellanox.com>

Accesses to these members needs to be locked. There is no reason not
to hold a spinlock while calling queue_work(), so move the tests into
a helper and always call it under lock.

The helper should be called when available_mrs is adjusted.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 40 ++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 00d7dc4cd91b..17dda58360c4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -120,6 +120,10 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	list_add_tail(&mr->list, &ent->head);
 	ent->available_mrs++;
 	ent->total_mrs++;
+	/*
+	 * Creating is always done in response to some demand, so do not call
+	 * queue_adjust_cache_locked().
+	 */
 	spin_unlock_irqrestore(&ent->lock, flags);
 
 	if (!completion_done(&ent->compl))
@@ -352,6 +356,20 @@ static int someone_adding(struct mlx5_mr_cache *cache)
 	return 0;
 }
 
+/*
+ * Check if the bucket is outside the high/low water mark and schedule an async
+ * update. The cache refill has hysteresis, once the low water mark is hit it is
+ * refilled up to the high mark.
+ */
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
+{
+	lockdep_assert_held(&ent->lock);
+
+	if (ent->available_mrs < ent->limit ||
+	    ent->available_mrs > 2 * ent->limit)
+		queue_work(ent->dev->cache.wq, &ent->work);
+}
+
 static void __cache_work_func(struct mlx5_cache_ent *ent)
 {
 	struct mlx5_ib_dev *dev = ent->dev;
@@ -447,9 +465,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
 					      list);
 			list_del(&mr->list);
 			ent->available_mrs--;
+			queue_adjust_cache_locked(ent);
 			spin_unlock_irq(&ent->lock);
-			if (ent->available_mrs < ent->limit)
-				queue_work(cache->wq, &ent->work);
 			return mr;
 		}
 	}
@@ -472,14 +489,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
 					      list);
 			list_del(&mr->list);
 			ent->available_mrs--;
+			queue_adjust_cache_locked(ent);
 			spin_unlock_irq(&ent->lock);
-			if (ent->available_mrs < ent->limit)
-				queue_work(dev->cache.wq, &ent->work);
 			break;
 		}
+		queue_adjust_cache_locked(ent);
 		spin_unlock_irq(&ent->lock);
-
-		queue_work(dev->cache.wq, &ent->work);
 	}
 
 	if (!mr)
@@ -501,7 +516,6 @@ static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	struct mlx5_cache_ent *ent = mr->cache_ent;
-	int shrink = 0;
 
 	if (!ent)
 		return;
@@ -509,20 +523,14 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	if (mlx5_mr_cache_invalidate(mr)) {
 		detach_mr_from_cache(mr);
 		destroy_mkey(dev, mr);
-		if (ent->available_mrs < ent->limit)
-			queue_work(dev->cache.wq, &ent->work);
 		return;
 	}
 
 	spin_lock_irq(&ent->lock);
 	list_add_tail(&mr->list, &ent->head);
 	ent->available_mrs++;
-	if (ent->available_mrs > 2 * ent->limit)
-		shrink = 1;
+	queue_adjust_cache_locked(ent);
 	spin_unlock_irq(&ent->lock);
-
-	if (shrink)
-		queue_work(dev->cache.wq, &ent->work);
 }
 
 static void clean_keys(struct mlx5_ib_dev *dev, int c)
@@ -638,7 +646,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
 		else
 			ent->limit = 0;
-		queue_work(cache->wq, &ent->work);
+		spin_lock_irq(&ent->lock);
+		queue_adjust_cache_locked(ent);
+		spin_unlock_irq(&ent->lock);
 	}
 
 	mlx5_mr_cache_debugfs_init(dev);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 7/9] RDMA/mlx5: Fix locking in MR cache work queue
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
                   ` (5 preceding siblings ...)
  2020-02-27 12:33 ` [PATCH rdma-next 6/9] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:33 ` [PATCH rdma-next 8/9] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Leon Romanovsky
  2020-02-27 12:34 ` [PATCH rdma-next 9/9] RDMA/mlx5: Allow MRs to be created in the cache synchronously Leon Romanovsky
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@mellanox.com>

All of the members of mlx5_cache_ent must be accessed while holding the
spinlock, add the missing spinlock in the __cache_work_func().

Using cache->stopped and flush_workqueue() is an inherently racy way to
shutdown self-scheduling work on a queue. Replace it with ent->disabled
under lock, and always check disabled before queuing any new work. Use
cancel_work_sync() to shutdown the queue.

Use READ_ONCE/WRITE_ONCE for dev->last_add to manage concurrency as
coherency is less important here.

Split fill_delay from the bitfield. C bitfield updates are not atomic and
this is just a mess. Use READ_ONCE/WRITE_ONCE, but this could also use
test_bit()/set_bit().

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   5 +-
 drivers/infiniband/hw/mlx5/mr.c      | 121 +++++++++++++++++----------
 2 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 7aef89cc472d..08554bd8941e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -703,6 +703,8 @@ struct mlx5_cache_ent {
 	u32			access_mode;
 	u32			page;
 
+	u8 disabled:1;
+
 	/*
 	 * - available_mrs is the length of list head, ie the number of MRs
 	 *   available for immediate allocation.
@@ -729,7 +731,6 @@ struct mlx5_cache_ent {
 struct mlx5_mr_cache {
 	struct workqueue_struct *wq;
 	struct mlx5_cache_ent	ent[MAX_MR_CACHE_ENTRIES];
-	int			stopped;
 	struct dentry		*root;
 	unsigned long		last_add;
 };
@@ -999,10 +1000,10 @@ struct mlx5_ib_dev {
 	 */
 	struct mutex			cap_mask_mutex;
 	u8				ib_active:1;
-	u8				fill_delay:1;
 	u8				is_rep:1;
 	u8				lag_active:1;
 	u8				wc_support:1;
+	u8				fill_delay;
 	struct umr_common		umrc;
 	/* sync used page count stats
 	 */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 17dda58360c4..f475284c618c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -97,13 +97,13 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	u8 key;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ent->lock, flags);
-	ent->pending--;
-	spin_unlock_irqrestore(&ent->lock, flags);
 	if (status) {
 		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
 		kfree(mr);
-		dev->fill_delay = 1;
+		spin_lock_irqsave(&ent->lock, flags);
+		ent->pending--;
+		WRITE_ONCE(dev->fill_delay, 1);
+		spin_unlock_irqrestore(&ent->lock, flags);
 		mod_timer(&dev->delay_timer, jiffies + HZ);
 		return;
 	}
@@ -114,12 +114,13 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
 
-	dev->cache.last_add = jiffies;
+	WRITE_ONCE(dev->cache.last_add, jiffies);
 
 	spin_lock_irqsave(&ent->lock, flags);
 	list_add_tail(&mr->list, &ent->head);
 	ent->available_mrs++;
 	ent->total_mrs++;
+	ent->pending--;
 	/*
 	 * Creating is always done in response to some demand, so do not call
 	 * queue_adjust_cache_locked().
@@ -145,11 +146,6 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	for (i = 0; i < num; i++) {
-		if (ent->pending >= MAX_PENDING_REG_MR) {
-			err = -EAGAIN;
-			break;
-		}
-
 		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 		if (!mr) {
 			err = -ENOMEM;
@@ -170,6 +166,12 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 		MLX5_SET(mkc, mkc, log_page_size, ent->page);
 
 		spin_lock_irq(&ent->lock);
+		if (ent->pending >= MAX_PENDING_REG_MR) {
+			err = -EAGAIN;
+			spin_unlock_irq(&ent->lock);
+			kfree(mr);
+			break;
+		}
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
 
@@ -189,15 +191,13 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 	return err;
 }
 
-static void remove_cache_mr(struct mlx5_cache_ent *ent)
+static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
 {
 	struct mlx5_ib_mr *mr;
 
-	spin_lock_irq(&ent->lock);
-	if (list_empty(&ent->head)) {
-		spin_unlock_irq(&ent->lock);
+	lockdep_assert_held(&ent->lock);
+	if (list_empty(&ent->head))
 		return;
-	}
 	mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 	list_del(&mr->list);
 	ent->available_mrs--;
@@ -205,6 +205,7 @@ static void remove_cache_mr(struct mlx5_cache_ent *ent)
 	spin_unlock_irq(&ent->lock);
 	mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
 	kfree(mr);
+	spin_lock_irq(&ent->lock);
 }
 
 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
@@ -233,9 +234,7 @@ static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
 			} else
 				return 0;
 		} else {
-			spin_unlock_irq(&ent->lock);
-			remove_cache_mr(ent);
-			spin_lock_irq(&ent->lock);
+			remove_cache_mr_locked(ent);
 		}
 	}
 }
@@ -344,16 +343,21 @@ static const struct file_operations limit_fops = {
 	.read	= limit_read,
 };
 
-static int someone_adding(struct mlx5_mr_cache *cache)
+static bool someone_adding(struct mlx5_mr_cache *cache)
 {
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-		if (cache->ent[i].available_mrs < cache->ent[i].limit)
-			return 1;
-	}
+		struct mlx5_cache_ent *ent = &cache->ent[i];
+		bool ret;
 
-	return 0;
+		spin_lock_irq(&ent->lock);
+		ret = ent->available_mrs < ent->limit;
+		spin_unlock_irq(&ent->lock);
+		if (ret)
+			return true;
+	}
+	return false;
 }
 
 /*
@@ -365,6 +369,8 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
 {
 	lockdep_assert_held(&ent->lock);
 
+	if (ent->disabled)
+		return;
 	if (ent->available_mrs < ent->limit ||
 	    ent->available_mrs > 2 * ent->limit)
 		queue_work(ent->dev->cache.wq, &ent->work);
@@ -376,27 +382,42 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 	struct mlx5_mr_cache *cache = &dev->cache;
 	int err;
 
-	if (cache->stopped)
-		return;
+	spin_lock_irq(&ent->lock);
+	if (ent->disabled)
+		goto out;
 
-	if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) {
+	if (ent->available_mrs + ent->pending < 2 * ent->limit &&
+	    !READ_ONCE(dev->fill_delay)) {
+		spin_unlock_irq(&ent->lock);
 		err = add_keys(ent, 1);
-		if (ent->available_mrs < 2 * ent->limit) {
+
+		spin_lock_irq(&ent->lock);
+		if (ent->disabled)
+			goto out;
+		if (err) {
 			if (err == -EAGAIN) {
 				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 					    ent->order);
 				queue_delayed_work(cache->wq, &ent->dwork,
 						   msecs_to_jiffies(3));
-			} else if (err) {
-				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
-					     ent->order, err);
+			} else {
+				mlx5_ib_warn(
+					dev,
+					"command failed order %d, err %d\n",
+					ent->order, err);
 				queue_delayed_work(cache->wq, &ent->dwork,
 						   msecs_to_jiffies(1000));
-			} else {
-				queue_work(cache->wq, &ent->work);
 			}
 		}
+		/*
+		 * Once we start populating due to hitting a low water mark
+		 * continue until we pass the high water mark.
+		 */
+		if (ent->available_mrs + ent->pending < 2 * ent->limit)
+			queue_work(cache->wq, &ent->work);
 	} else if (ent->available_mrs > 2 * ent->limit) {
+		bool need_delay;
+
 		/*
 		 * The remove_cache_mr() logic is performed as garbage
 		 * collection task. Such task is intended to be run when no
@@ -409,15 +430,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		 * the garbage collection work to try to run in next cycle, in
 		 * order to free CPU resources to other tasks.
 		 */
-		if (!need_resched() && !someone_adding(cache) &&
-		    time_after(jiffies, cache->last_add + 300 * HZ)) {
-			remove_cache_mr(ent);
-			if (ent->available_mrs > ent->limit)
-				queue_work(cache->wq, &ent->work);
-		} else {
+		spin_unlock_irq(&ent->lock);
+		need_delay = need_resched() || someone_adding(cache) ||
+			     time_after(jiffies,
+					READ_ONCE(cache->last_add) + 300 * HZ);
+		spin_lock_irq(&ent->lock);
+		if (ent->disabled)
+			goto out;
+		if (need_delay)
 			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
-		}
+		remove_cache_mr_locked(ent);
+		queue_adjust_cache_locked(ent);
 	}
+out:
+	spin_unlock_irq(&ent->lock);
 }
 
 static void delayed_cache_work_func(struct work_struct *work)
@@ -598,7 +624,7 @@ static void delay_time_func(struct timer_list *t)
 {
 	struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
 
-	dev->fill_delay = 0;
+	WRITE_ONCE(dev->fill_delay, 0);
 }
 
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
@@ -658,13 +684,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 {
-	int i;
+	unsigned int i;
 
 	if (!dev->cache.wq)
 		return 0;
 
-	dev->cache.stopped = 1;
-	flush_workqueue(dev->cache.wq);
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+		struct mlx5_cache_ent *ent = &dev->cache.ent[i];
+
+		spin_lock_irq(&ent->lock);
+		ent->disabled = true;
+		spin_unlock_irq(&ent->lock);
+		cancel_work_sync(&ent->work);
+		cancel_delayed_work_sync(&ent->dwork);
+	}
 
 	mlx5_mr_cache_debugfs_cleanup(dev);
 	mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 8/9] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
                   ` (6 preceding siblings ...)
  2020-02-27 12:33 ` [PATCH rdma-next 7/9] RDMA/mlx5: Fix locking in MR cache work queue Leon Romanovsky
@ 2020-02-27 12:33 ` Leon Romanovsky
  2020-02-27 12:34 ` [PATCH rdma-next 9/9] RDMA/mlx5: Allow MRs to be created in the cache synchronously Leon Romanovsky
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:33 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@mellanox.com>

Currently if the work queue is running then it is in 'hysteresis' mode and
will fill until the cache reaches the high water mark. This implicit state
is very tricky and doesn't interact with pending very well.

Instead of self re-scheduling the work queue after the add_keys() has
started to create the new MR, have the queue scheduled from
reg_mr_callback() only after the requested MR has been added.

This avoids the bad design of an in-rush of queue'd work doing back to
back add_keys() until EAGAIN then sleeping. The add_keys() will be paced
one at a time as they complete, slowly filling up the cache.

Also, fix pending to be only manipulated under lock.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  1 +
 drivers/infiniband/hw/mlx5/mr.c      | 41 ++++++++++++++++++----------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 08554bd8941e..e997837e600c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -704,6 +704,7 @@ struct mlx5_cache_ent {
 	u32			page;
 
 	u8 disabled:1;
+	u8 fill_to_high_water:1;
 
 	/*
 	 * - available_mrs is the length of list head, ie the number of MRs
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index f475284c618c..c15de55c5a73 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -50,6 +50,7 @@ enum {
 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
 
 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
 {
@@ -120,11 +121,9 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	list_add_tail(&mr->list, &ent->head);
 	ent->available_mrs++;
 	ent->total_mrs++;
+	/* If we are doing fill_to_high_water then keep going. */
+	queue_adjust_cache_locked(ent);
 	ent->pending--;
-	/*
-	 * Creating is always done in response to some demand, so do not call
-	 * queue_adjust_cache_locked().
-	 */
 	spin_unlock_irqrestore(&ent->lock, flags);
 
 	if (!completion_done(&ent->compl))
@@ -369,11 +368,29 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
 {
 	lockdep_assert_held(&ent->lock);
 
-	if (ent->disabled)
+	if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
 		return;
-	if (ent->available_mrs < ent->limit ||
-	    ent->available_mrs > 2 * ent->limit)
+	if (ent->available_mrs < ent->limit) {
+		ent->fill_to_high_water = true;
+		queue_work(ent->dev->cache.wq, &ent->work);
+	} else if (ent->fill_to_high_water &&
+		   ent->available_mrs + ent->pending < 2 * ent->limit) {
+		/*
+		 * Once we start populating due to hitting a low water mark
+		 * continue until we pass the high water mark.
+		 */
 		queue_work(ent->dev->cache.wq, &ent->work);
+	} else if (ent->available_mrs == 2 * ent->limit) {
+		ent->fill_to_high_water = false;
+	} else if (ent->available_mrs > 2 * ent->limit) {
+		/* Queue deletion of excess entries */
+		ent->fill_to_high_water = false;
+		if (ent->pending)
+			queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
+					   msecs_to_jiffies(1000));
+		else
+			queue_work(ent->dev->cache.wq, &ent->work);
+	}
 }
 
 static void __cache_work_func(struct mlx5_cache_ent *ent)
@@ -386,11 +403,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 	if (ent->disabled)
 		goto out;
 
-	if (ent->available_mrs + ent->pending < 2 * ent->limit &&
+	if (ent->fill_to_high_water &&
+	    ent->available_mrs + ent->pending < 2 * ent->limit &&
 	    !READ_ONCE(dev->fill_delay)) {
 		spin_unlock_irq(&ent->lock);
 		err = add_keys(ent, 1);
-
 		spin_lock_irq(&ent->lock);
 		if (ent->disabled)
 			goto out;
@@ -409,12 +426,6 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 						   msecs_to_jiffies(1000));
 			}
 		}
-		/*
-		 * Once we start populating due to hitting a low water mark
-		 * continue until we pass the high water mark.
-		 */
-		if (ent->available_mrs + ent->pending < 2 * ent->limit)
-			queue_work(cache->wq, &ent->work);
 	} else if (ent->available_mrs > 2 * ent->limit) {
 		bool need_delay;
 
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH rdma-next 9/9] RDMA/mlx5: Allow MRs to be created in the cache synchronously
  2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
                   ` (7 preceding siblings ...)
  2020-02-27 12:33 ` [PATCH rdma-next 8/9] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Leon Romanovsky
@ 2020-02-27 12:34 ` Leon Romanovsky
  8 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2020-02-27 12:34 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe; +Cc: linux-rdma

From: Jason Gunthorpe <jgg@mellanox.com>

If the cache is completely out of MRs, and we are running in cache mode,
then directly, and synchronously, create an MR that is compatible with the
cache bucket using a sleeping mailbox command. This ensures that the
thread that is waiting for the MR absolutely will get one.

When a MR allocated in this way becomes freed then it is compatible with
the cache bucket and will be recycled back into it.

Deletes the very buggy ent->compl scheme to create a synchronous MR
allocation.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   1 -
 drivers/infiniband/hw/mlx5/mr.c      | 173 ++++++++++++++++-----------
 2 files changed, 105 insertions(+), 69 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e997837e600c..0ea8d57827fb 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -726,7 +726,6 @@ struct mlx5_cache_ent {
 	struct mlx5_ib_dev     *dev;
 	struct work_struct	work;
 	struct delayed_work	dwork;
-	struct completion	compl;
 };
 
 struct mlx5_mr_cache {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c15de55c5a73..e64ac5a8434e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -89,13 +89,29 @@ static int create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_ib_mr *mr,
 				callback, &mr->cb_work);
 }
 
+static void finalize_new_mr(struct mlx5_ib_mr *mr)
+{
+	struct mlx5_ib_dev *dev = mr->dev;
+	unsigned long flags;
+	u8 key;
+
+	mr->mmkey.type = MLX5_MKEY_MR;
+	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
+	key = dev->mdev->priv.mkey_key++;
+	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
+	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out,
+						  mkey_index)) |
+			key;
+
+	WRITE_ONCE(dev->cache.last_add, jiffies);
+}
+
 static void reg_mr_callback(int status, struct mlx5_async_work *context)
 {
 	struct mlx5_ib_mr *mr =
 		container_of(context, struct mlx5_ib_mr, cb_work);
 	struct mlx5_ib_dev *dev = mr->dev;
 	struct mlx5_cache_ent *ent = mr->cache_ent;
-	u8 key;
 	unsigned long flags;
 
 	if (status) {
@@ -109,13 +125,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 		return;
 	}
 
-	mr->mmkey.type = MLX5_MKEY_MR;
-	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
-	key = dev->mdev->priv.mkey_key++;
-	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
-	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
-
-	WRITE_ONCE(dev->cache.last_add, jiffies);
+	finalize_new_mr(mr);
 
 	spin_lock_irqsave(&ent->lock, flags);
 	list_add_tail(&mr->list, &ent->head);
@@ -125,14 +135,34 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	queue_adjust_cache_locked(ent);
 	ent->pending--;
 	spin_unlock_irqrestore(&ent->lock, flags);
+}
+
+static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
+{
+	struct mlx5_ib_mr *mr;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return NULL;
+	mr->order = ent->order;
+	mr->cache_ent = ent;
+	mr->dev = ent->dev;
 
-	if (!completion_done(&ent->compl))
-		complete(&ent->compl);
+	MLX5_SET(mkc, mkc, free, 1);
+	MLX5_SET(mkc, mkc, umr_en, 1);
+	MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
+	MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
+
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+	MLX5_SET(mkc, mkc, log_page_size, ent->page);
+	return mr;
 }
 
+/* Asynchronously schedule new MRs to be populated in the cache. */
 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 {
-	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_ib_mr *mr;
 	void *mkc;
 	u32 *in;
@@ -145,25 +175,11 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	for (i = 0; i < num; i++) {
-		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+		mr = alloc_cache_mr(ent, mkc);
 		if (!mr) {
 			err = -ENOMEM;
 			break;
 		}
-		mr->order = ent->order;
-		mr->cache_ent = ent;
-		mr->dev = ent->dev;
-
-		MLX5_SET(mkc, mkc, free, 1);
-		MLX5_SET(mkc, mkc, umr_en, 1);
-		MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
-		MLX5_SET(mkc, mkc, access_mode_4_2,
-			 (ent->access_mode >> 2) & 0x7);
-
-		MLX5_SET(mkc, mkc, qpn, 0xffffff);
-		MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
-		MLX5_SET(mkc, mkc, log_page_size, ent->page);
-
 		spin_lock_irq(&ent->lock);
 		if (ent->pending >= MAX_PENDING_REG_MR) {
 			err = -EAGAIN;
@@ -190,6 +206,44 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 	return err;
 }
 
+/* Synchronously create a MR in the cache */
+static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
+{
+	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	struct mlx5_ib_mr *mr;
+	void *mkc;
+	u32 *in;
+	int err;
+
+	in = kzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+	mr = alloc_cache_mr(ent, mkc);
+	if (!mr) {
+		err = -ENOMEM;
+		goto free_in;
+	}
+
+	err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
+	if (err)
+		goto free_mr;
+
+	mr->mmkey.type = MLX5_MKEY_MR;
+	WRITE_ONCE(ent->dev->cache.last_add, jiffies);
+	spin_lock_irq(&ent->lock);
+	ent->total_mrs++;
+	spin_unlock_irq(&ent->lock);
+	kfree(in);
+	return mr;
+free_mr:
+	kfree(mr);
+free_in:
+	kfree(in);
+	return ERR_PTR(err);
+}
+
 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
 {
 	struct mlx5_ib_mr *mr;
@@ -412,12 +466,12 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		if (ent->disabled)
 			goto out;
 		if (err) {
-			if (err == -EAGAIN) {
-				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
-					    ent->order);
-				queue_delayed_work(cache->wq, &ent->dwork,
-						   msecs_to_jiffies(3));
-			} else {
+			/*
+			 * EAGAIN only happens if pending is positive, so we
+			 * will be rescheduled from reg_mr_callback(). The only
+			 * failure path here is ENOMEM.
+			 */
+			if (err != -EAGAIN) {
 				mlx5_ib_warn(
 					dev,
 					"command failed order %d, err %d\n",
@@ -480,36 +534,30 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
 	struct mlx5_mr_cache *cache = &dev->cache;
 	struct mlx5_cache_ent *ent;
 	struct mlx5_ib_mr *mr;
-	int err;
 
 	if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
 		    entry >= ARRAY_SIZE(cache->ent)))
 		return ERR_PTR(-EINVAL);
 
 	ent = &cache->ent[entry];
-	while (1) {
-		spin_lock_irq(&ent->lock);
-		if (list_empty(&ent->head)) {
-			spin_unlock_irq(&ent->lock);
-
-			err = add_keys(ent, 1);
-			if (err && err != -EAGAIN)
-				return ERR_PTR(err);
-
-			wait_for_completion(&ent->compl);
-		} else {
-			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
-					      list);
-			list_del(&mr->list);
-			ent->available_mrs--;
-			queue_adjust_cache_locked(ent);
-			spin_unlock_irq(&ent->lock);
+	spin_lock_irq(&ent->lock);
+	if (list_empty(&ent->head)) {
+		spin_unlock_irq(&ent->lock);
+		mr = create_cache_mr(ent);
+		if (IS_ERR(mr))
 			return mr;
-		}
+	} else {
+		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+		list_del(&mr->list);
+		ent->available_mrs--;
+		queue_adjust_cache_locked(ent);
+		spin_unlock_irq(&ent->lock);
 	}
+	return mr;
 }
 
-static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
+/* Return a MR already available in the cache */
+static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
 {
 	struct mlx5_ib_dev *dev = req_ent->dev;
 	struct mlx5_ib_mr *mr = NULL;
@@ -661,7 +709,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		ent->dev = dev;
 		ent->limit = 0;
 
-		init_completion(&ent->compl);
 		INIT_WORK(&ent->work, cache_work_func);
 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 
@@ -925,26 +972,16 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
 	struct mlx5_ib_mr *mr;
-	int err = 0;
-	int i;
 
 	if (!ent)
 		return ERR_PTR(-E2BIG);
-	for (i = 0; i < 1; i++) {
-		mr = alloc_cached_mr(ent);
-		if (mr)
-			break;
-
-		err = add_keys(ent, 1);
-		if (err && err != -EAGAIN) {
-			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
-			break;
-		}
+	mr = get_cache_mr(ent);
+	if (!mr) {
+		mr = create_cache_mr(ent);
+		if (IS_ERR(mr))
+			return mr;
 	}
 
-	if (!mr)
-		return ERR_PTR(-EAGAIN);
-
 	mr->ibmr.pd = pd;
 	mr->umem = umem;
 	mr->access_flags = access_flags;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib
  2020-02-27 12:33 ` [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
@ 2020-02-27 19:41   ` Saeed Mahameed
  2020-02-27 20:00     ` Jason Gunthorpe
  0 siblings, 1 reply; 13+ messages in thread
From: Saeed Mahameed @ 2020-02-27 19:41 UTC (permalink / raw)
  To: Jason Gunthorpe, leon, dledford; +Cc: Michael Guralnik, netdev, linux-rdma

On Thu, 2020-02-27 at 14:33 +0200, Leon Romanovsky wrote:
> From: Michael Guralnik <michaelgur@mellanox.com>
> 
> As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the
> logic inside mlx5_ib and cleanup the code in mlx5_core.
> 

I have a WIP series that is moving the whole mr.c to mlx5_ib.
https://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git/log/?h=topic/mr-relocate


> Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
>  drivers/infiniband/hw/mlx5/mr.c              | 25 ++++++++++++++++
> ----
>  drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++--------------
>  include/linux/mlx5/driver.h                  |  6 -----
>  3 files changed, 24 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/mr.c
> b/drivers/infiniband/hw/mlx5/mr.c
> index 6fa0a83c19de..dea14477a676 100644
> --- a/drivers/infiniband/hw/mlx5/mr.c
> +++ b/drivers/infiniband/hw/mlx5/mr.c
> @@ -79,6 +79,25 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr
> *mr, u64 start, u64 length)
>  		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
>  }
>  
> +static int create_mkey_cb(struct mlx5_core_dev *dev, struct
> mlx5_ib_mr *mr,
> +			  struct mlx5_async_ctx *async_ctx, u32 *in,
> int inlen,
> +			  mlx5_async_cbk_t callback)
> +{
> +	void *mkc;
> +	u8 key;
> +
> +	spin_lock_irq(&dev->priv.mkey_lock);
> +	key = dev->priv.mkey_key++;

you know i don't like mlx5_ib sniffing around mlx5_core->priv .. 

this is handled correctly in my series, i can rebase it and make it
ready in a couple of days.. let me know if this will be good enough for
you.

> +	spin_unlock_irq(&dev->priv.mkey_lock);
> +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> +
> +	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
> +	MLX5_SET(mkc, mkc, mkey_7_0, key);
> +
> +	return mlx5_cmd_exec_cb(async_ctx, in, inlen, mr->out,
> sizeof(mr->out),
> +				callback, &mr->cb_work);
> +}
> +
>  static void reg_mr_callback(int status, struct mlx5_async_work
> *context)
>  {
>  	struct mlx5_ib_mr *mr =
> @@ -163,10 +182,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int
> c, int num)
>  		spin_lock_irq(&ent->lock);
>  		ent->pending++;
>  		spin_unlock_irq(&ent->lock);
> -		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
> -					       &dev->async_ctx, in,
> inlen,
> -					       mr->out, sizeof(mr-
> >out),
> -					       reg_mr_callback, &mr-
> >cb_work);
> +		err = create_mkey_cb(dev->mdev, mr, &dev->async_ctx,
> in, inlen,
> +				     reg_mr_callback);
>  		if (err) {
>  			spin_lock_irq(&ent->lock);
>  			ent->pending--;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
> b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
> index 42cc3c7ac5b6..83841e4119d7 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
> @@ -36,12 +36,9 @@
>  #include <linux/mlx5/cmd.h>
>  #include "mlx5_core.h"
>  
> -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
> -			     struct mlx5_core_mkey *mkey,
> -			     struct mlx5_async_ctx *async_ctx, u32 *in,
> -			     int inlen, u32 *out, int outlen,
> -			     mlx5_async_cbk_t callback,
> -			     struct mlx5_async_work *context)
> +int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
> +			  struct mlx5_core_mkey *mkey,
> +			  u32 *in, int inlen)
>  {
>  	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
>  	u32 mkey_index;
> @@ -57,10 +54,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev
> *dev,
>  	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
>  	MLX5_SET(mkc, mkc, mkey_7_0, key);
>  
> -	if (callback)
> -		return mlx5_cmd_exec_cb(async_ctx, in, inlen, out,
> outlen,
> -					callback, context);
> -
>  	err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
>  	if (err)
>  		return err;
> @@ -75,15 +68,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev
> *dev,
>  		      mkey_index, key, mkey->key);
>  	return 0;
>  }
> -EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
> -
> -int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
> -			  struct mlx5_core_mkey *mkey,
> -			  u32 *in, int inlen)
> -{
> -	return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
> -					NULL, 0, NULL, NULL);
> -}
>  EXPORT_SYMBOL(mlx5_core_create_mkey);
>  
>  int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
> diff --git a/include/linux/mlx5/driver.h
> b/include/linux/mlx5/driver.h
> index f2b4225ed650..7225e9ca0f25 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -947,12 +947,6 @@ struct mlx5_cmd_mailbox
> *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
>  						      gfp_t flags, int
> npages);
>  void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
>  				 struct mlx5_cmd_mailbox *head);
> -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
> -			     struct mlx5_core_mkey *mkey,
> -			     struct mlx5_async_ctx *async_ctx, u32 *in,
> -			     int inlen, u32 *out, int outlen,
> -			     mlx5_async_cbk_t callback,
> -			     struct mlx5_async_work *context);
>  int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
>  			  struct mlx5_core_mkey *mkey,
>  			  u32 *in, int inlen);

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib
  2020-02-27 19:41   ` Saeed Mahameed
@ 2020-02-27 20:00     ` Jason Gunthorpe
  2020-02-27 20:42       ` Saeed Mahameed
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Gunthorpe @ 2020-02-27 20:00 UTC (permalink / raw)
  To: Saeed Mahameed; +Cc: leon, dledford, Michael Guralnik, netdev, linux-rdma

On Thu, Feb 27, 2020 at 07:41:24PM +0000, Saeed Mahameed wrote:
> On Thu, 2020-02-27 at 14:33 +0200, Leon Romanovsky wrote:
> > From: Michael Guralnik <michaelgur@mellanox.com>
> > 
> > As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the
> > logic inside mlx5_ib and cleanup the code in mlx5_core.
> > 
> 
> I have a WIP series that is moving the whole mr.c to mlx5_ib.
> https://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git/log/?h=topic/mr-relocate
> 
> 
> > Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> >  drivers/infiniband/hw/mlx5/mr.c              | 25 ++++++++++++++++
> >  drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++--------------
> >  include/linux/mlx5/driver.h                  |  6 -----
> >  3 files changed, 24 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/infiniband/hw/mlx5/mr.c
> > b/drivers/infiniband/hw/mlx5/mr.c
> > index 6fa0a83c19de..dea14477a676 100644
> > +++ b/drivers/infiniband/hw/mlx5/mr.c
> > @@ -79,6 +79,25 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr
> > *mr, u64 start, u64 length)
> >  		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
> >  }
> >  
> > +static int create_mkey_cb(struct mlx5_core_dev *dev, struct
> > mlx5_ib_mr *mr,
> > +			  struct mlx5_async_ctx *async_ctx, u32 *in,
> > int inlen,
> > +			  mlx5_async_cbk_t callback)
> > +{
> > +	void *mkc;
> > +	u8 key;
> > +
> > +	spin_lock_irq(&dev->priv.mkey_lock);
> > +	key = dev->priv.mkey_key++;
> 
> you know i don't like mlx5_ib sniffing around mlx5_core->priv .. 
> 
> this is handled correctly in my series, i can rebase it and make it
> ready in a couple of days.. let me know if this will be good enough for
> you.

How about Michael just take the two relevant patches into this series

{IB,net}/mlx5: Setup mkey variant before mr create command invocation
{IB,net}/mlx5: Assign mkey variant in mlx5_ib only

And this partially moves toward your series. It will be more than a
few days to rebase and check all the parts of your series I think.

Jason

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib
  2020-02-27 20:00     ` Jason Gunthorpe
@ 2020-02-27 20:42       ` Saeed Mahameed
  0 siblings, 0 replies; 13+ messages in thread
From: Saeed Mahameed @ 2020-02-27 20:42 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Michael Guralnik, netdev, linux-rdma, leon, dledford

On Thu, 2020-02-27 at 16:00 -0400, Jason Gunthorpe wrote:
> On Thu, Feb 27, 2020 at 07:41:24PM +0000, Saeed Mahameed wrote:
> > On Thu, 2020-02-27 at 14:33 +0200, Leon Romanovsky wrote:
> > > From: Michael Guralnik <michaelgur@mellanox.com>
> > > 
> > > As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move
> > > the
> > > logic inside mlx5_ib and cleanup the code in mlx5_core.
> > > 
> > 
> > I have a WIP series that is moving the whole mr.c to mlx5_ib.
> > https://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git/log/?h=topic/mr-relocate
> > 
> > 
> > > Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
> > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > >  drivers/infiniband/hw/mlx5/mr.c              | 25
> > > ++++++++++++++++
> > >  drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++-----------
> > > ---
> > >  include/linux/mlx5/driver.h                  |  6 -----
> > >  3 files changed, 24 insertions(+), 29 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/hw/mlx5/mr.c
> > > b/drivers/infiniband/hw/mlx5/mr.c
> > > index 6fa0a83c19de..dea14477a676 100644
> > > +++ b/drivers/infiniband/hw/mlx5/mr.c
> > > @@ -79,6 +79,25 @@ static bool use_umr_mtt_update(struct
> > > mlx5_ib_mr
> > > *mr, u64 start, u64 length)
> > >  		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
> > >  }
> > >  
> > > +static int create_mkey_cb(struct mlx5_core_dev *dev, struct
> > > mlx5_ib_mr *mr,
> > > +			  struct mlx5_async_ctx *async_ctx, u32 *in,
> > > int inlen,
> > > +			  mlx5_async_cbk_t callback)
> > > +{
> > > +	void *mkc;
> > > +	u8 key;
> > > +
> > > +	spin_lock_irq(&dev->priv.mkey_lock);
> > > +	key = dev->priv.mkey_key++;
> > 
> > you know i don't like mlx5_ib sniffing around mlx5_core->priv .. 
> > 
> > this is handled correctly in my series, i can rebase it and make it
> > ready in a couple of days.. let me know if this will be good enough
> > for
> > you.
> 
> How about Michael just take the two relevant patches into this series
> 
> {IB,net}/mlx5: Setup mkey variant before mr create command invocation
> {IB,net}/mlx5: Assign mkey variant in mlx5_ib only
> 

Also, IB/mlx5: Replace spinlock protected write with atomic var

it is a good one :)

> And this partially moves toward your series. It will be more than a
> few days to rebase and check all the parts of your series I think.

Okay.

-Saeed.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-02-27 20:42 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-27 12:33 [PATCH rdma-next 0/9] MR cache fixes and refactoring Leon Romanovsky
2020-02-27 12:33 ` [PATCH mlx5-next 1/9] RDMA/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
2020-02-27 19:41   ` Saeed Mahameed
2020-02-27 20:00     ` Jason Gunthorpe
2020-02-27 20:42       ` Saeed Mahameed
2020-02-27 12:33 ` [PATCH rdma-next 2/9] RDMA/mlx5: Rename the tracking variables for the MR cache Leon Romanovsky
2020-02-27 12:33 ` [PATCH rdma-next 3/9] RDMA/mlx5: Simplify how the MR cache bucket is located Leon Romanovsky
2020-02-27 12:33 ` [PATCH rdma-next 4/9] RDMA/mlx5: Always remove MRs from the cache before destroying them Leon Romanovsky
2020-02-27 12:33 ` [PATCH rdma-next 5/9] RDMA/mlx5: Fix MR cache size and limit debugfs Leon Romanovsky
2020-02-27 12:33 ` [PATCH rdma-next 6/9] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Leon Romanovsky
2020-02-27 12:33 ` [PATCH rdma-next 7/9] RDMA/mlx5: Fix locking in MR cache work queue Leon Romanovsky
2020-02-27 12:33 ` [PATCH rdma-next 8/9] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Leon Romanovsky
2020-02-27 12:34 ` [PATCH rdma-next 9/9] RDMA/mlx5: Allow MRs to be created in the cache synchronously Leon Romanovsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.