All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rdma-next 00/12] Refactor UMR post send logic
@ 2022-04-12  7:23 Leon Romanovsky
  2022-04-12  7:23 ` [PATCH rdma-next 01/12] RDMA/mlx5: Move init and cleanup of UMR to umr.c Leon Romanovsky
                   ` (12 more replies)
  0 siblings, 13 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:23 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, Aharon Landau, linux-kernel, linux-rdma,
	Michael Guralnik

From: Leon Romanovsky <leonro@nvidia.com>

Hi,

UMR are special QPs that require slightly different logic than other
QPs. This series from Aharon refactors the logic around UMR QP to
separate file and functions to clean the post send flow.

Thanks

Aharon Landau (12):
  RDMA/mlx5: Move init and cleanup of UMR to umr.c
  RDMA/mlx5: Move umr checks to umr.h
  RDMA/mlx5: Move mkey ctrl segment logic to umr.c
  RDMA/mlx5: Simplify get_umr_update_access_mask()
  RDMA/mlx5: Expose wqe posting helpers outside of wr.c
  RDMA/mlx5: Introduce mlx5_umr_post_send_wait()
  RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs
  RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access
  RDMA/mlx5: Move creation and free of translation tables to umr.c
  RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas
  RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt
  RDMA/mlx5: Clean UMR QP type flow from mlx5_ib_post_send()

 drivers/infiniband/hw/mlx5/Makefile  |   1 +
 drivers/infiniband/hw/mlx5/main.c    | 109 +----
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  89 +---
 drivers/infiniband/hw/mlx5/mr.c      | 421 +---------------
 drivers/infiniband/hw/mlx5/odp.c     |  63 ++-
 drivers/infiniband/hw/mlx5/qp.c      |   1 +
 drivers/infiniband/hw/mlx5/umr.c     | 700 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h     |  97 ++++
 drivers/infiniband/hw/mlx5/wr.c      | 377 +++------------
 drivers/infiniband/hw/mlx5/wr.h      |  60 +++
 10 files changed, 976 insertions(+), 942 deletions(-)
 create mode 100644 drivers/infiniband/hw/mlx5/umr.c
 create mode 100644 drivers/infiniband/hw/mlx5/umr.h

-- 
2.35.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 01/12] RDMA/mlx5: Move init and cleanup of UMR to umr.c
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
@ 2022-04-12  7:23 ` Leon Romanovsky
  2022-04-12  7:23 ` [PATCH rdma-next 02/12] RDMA/mlx5: Move umr checks to umr.h Leon Romanovsky
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:23 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

The first patch in a series to split UMR logic to a dedicated file.
As a start, move the init and cleanup of UMR resources to umr.c.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/Makefile |   1 +
 drivers/infiniband/hw/mlx5/main.c   | 109 ++--------------------------
 drivers/infiniband/hw/mlx5/umr.c    | 106 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h    |  12 +++
 4 files changed, 125 insertions(+), 103 deletions(-)
 create mode 100644 drivers/infiniband/hw/mlx5/umr.c
 create mode 100644 drivers/infiniband/hw/mlx5/umr.h

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index f43380106bd0..612ee8190a2d 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -19,6 +19,7 @@ mlx5_ib-y := ah.o \
 	     restrack.o \
 	     srq.o \
 	     srq_cmd.o \
+	     umr.o \
 	     wr.o
 
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 09a78a03cf73..e2d88027a9a7 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -41,6 +41,7 @@
 #include "wr.h"
 #include "restrack.h"
 #include "counters.h"
+#include "umr.h"
 #include <rdma/uverbs_std_types.h>
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/mlx5_user_ioctl_verbs.h>
@@ -4004,12 +4005,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 	if (err)
 		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
 
-	if (dev->umrc.qp)
-		ib_destroy_qp(dev->umrc.qp);
-	if (dev->umrc.cq)
-		ib_free_cq(dev->umrc.cq);
-	if (dev->umrc.pd)
-		ib_dealloc_pd(dev->umrc.pd);
+	mlx5r_umr_resource_cleanup(dev);
 }
 
 static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -4017,112 +4013,19 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 	ib_unregister_device(&dev->ib_dev);
 }
 
-enum {
-	MAX_UMR_WR = 128,
-};
-
 static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 {
-	struct ib_qp_init_attr *init_attr = NULL;
-	struct ib_qp_attr *attr = NULL;
-	struct ib_pd *pd;
-	struct ib_cq *cq;
-	struct ib_qp *qp;
 	int ret;
 
-	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
-	init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
-	if (!attr || !init_attr) {
-		ret = -ENOMEM;
-		goto error_0;
-	}
-
-	pd = ib_alloc_pd(&dev->ib_dev, 0);
-	if (IS_ERR(pd)) {
-		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
-		ret = PTR_ERR(pd);
-		goto error_0;
-	}
-
-	cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
-	if (IS_ERR(cq)) {
-		mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
-		ret = PTR_ERR(cq);
-		goto error_2;
-	}
-
-	init_attr->send_cq = cq;
-	init_attr->recv_cq = cq;
-	init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->cap.max_send_wr = MAX_UMR_WR;
-	init_attr->cap.max_send_sge = 1;
-	init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
-	init_attr->port_num = 1;
-	qp = ib_create_qp(pd, init_attr);
-	if (IS_ERR(qp)) {
-		mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
-		ret = PTR_ERR(qp);
-		goto error_3;
-	}
-
-	attr->qp_state = IB_QPS_INIT;
-	attr->port_num = 1;
-	ret = ib_modify_qp(qp, attr,
-			   IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
-	if (ret) {
-		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
-		goto error_4;
-	}
-
-	memset(attr, 0, sizeof(*attr));
-	attr->qp_state = IB_QPS_RTR;
-	attr->path_mtu = IB_MTU_256;
-
-	ret = ib_modify_qp(qp, attr, IB_QP_STATE);
-	if (ret) {
-		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
-		goto error_4;
-	}
-
-	memset(attr, 0, sizeof(*attr));
-	attr->qp_state = IB_QPS_RTS;
-	ret = ib_modify_qp(qp, attr, IB_QP_STATE);
-	if (ret) {
-		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
-		goto error_4;
-	}
-
-	dev->umrc.qp = qp;
-	dev->umrc.cq = cq;
-	dev->umrc.pd = pd;
+	ret = mlx5r_umr_resource_init(dev);
+	if (ret)
+		return ret;
 
-	sema_init(&dev->umrc.sem, MAX_UMR_WR);
 	ret = mlx5_mr_cache_init(dev);
 	if (ret) {
 		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
-		goto error_4;
+		mlx5r_umr_resource_cleanup(dev);
 	}
-
-	kfree(attr);
-	kfree(init_attr);
-
-	return 0;
-
-error_4:
-	ib_destroy_qp(qp);
-	dev->umrc.qp = NULL;
-
-error_3:
-	ib_free_cq(cq);
-	dev->umrc.cq = NULL;
-
-error_2:
-	ib_dealloc_pd(pd);
-	dev->umrc.pd = NULL;
-
-error_0:
-	kfree(attr);
-	kfree(init_attr);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
new file mode 100644
index 000000000000..46eaf919eb49
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "mlx5_ib.h"
+#include "umr.h"
+
+enum {
+	MAX_UMR_WR = 128,
+};
+
+static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
+{
+	struct ib_qp_attr attr = {};
+	int ret;
+
+	attr.qp_state = IB_QPS_INIT;
+	attr.port_num = 1;
+	ret = ib_modify_qp(qp, &attr,
+			   IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
+	if (ret) {
+		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+		return ret;
+	}
+
+	memset(&attr, 0, sizeof(attr));
+	attr.qp_state = IB_QPS_RTR;
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret) {
+		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+		return ret;
+	}
+
+	memset(&attr, 0, sizeof(attr));
+	attr.qp_state = IB_QPS_RTS;
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret) {
+		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+{
+	struct ib_qp_init_attr init_attr = {};
+	struct ib_pd *pd;
+	struct ib_cq *cq;
+	struct ib_qp *qp;
+	int ret;
+
+	pd = ib_alloc_pd(&dev->ib_dev, 0);
+	if (IS_ERR(pd)) {
+		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+		return PTR_ERR(pd);
+	}
+
+	cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+	if (IS_ERR(cq)) {
+		mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+		ret = PTR_ERR(cq);
+		goto destroy_pd;
+	}
+
+	init_attr.send_cq = cq;
+	init_attr.recv_cq = cq;
+	init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+	init_attr.cap.max_send_wr = MAX_UMR_WR;
+	init_attr.cap.max_send_sge = 1;
+	init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
+	init_attr.port_num = 1;
+	qp = ib_create_qp(pd, &init_attr);
+	if (IS_ERR(qp)) {
+		mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+		ret = PTR_ERR(qp);
+		goto destroy_cq;
+	}
+
+	ret = mlx5r_umr_qp_rst2rts(dev, qp);
+	if (ret)
+		goto destroy_qp;
+
+	dev->umrc.qp = qp;
+	dev->umrc.cq = cq;
+	dev->umrc.pd = pd;
+
+	sema_init(&dev->umrc.sem, MAX_UMR_WR);
+
+	return 0;
+
+destroy_qp:
+	ib_destroy_qp(qp);
+destroy_cq:
+	ib_free_cq(cq);
+destroy_pd:
+	ib_dealloc_pd(pd);
+	return ret;
+}
+
+void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
+{
+	ib_destroy_qp(dev->umrc.qp);
+	ib_free_cq(dev->umrc.cq);
+	ib_dealloc_pd(dev->umrc.pd);
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
new file mode 100644
index 000000000000..cb1a2c95aac2
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef _MLX5_IB_UMR_H
+#define _MLX5_IB_UMR_H
+
+#include "mlx5_ib.h"
+
+int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
+void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
+
+#endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 02/12] RDMA/mlx5: Move umr checks to umr.h
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
  2022-04-12  7:23 ` [PATCH rdma-next 01/12] RDMA/mlx5: Move init and cleanup of UMR to umr.c Leon Romanovsky
@ 2022-04-12  7:23 ` Leon Romanovsky
  2022-04-12  7:23 ` [PATCH rdma-next 03/12] RDMA/mlx5: Move mkey ctrl segment logic to umr.c Leon Romanovsky
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:23 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Move mlx5_ib_can_load_pas_with_umr() and
mlx5_ib_can_reconfig_with_umr() to umr.h and rename them accordingly.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 56 ---------------------------
 drivers/infiniband/hw/mlx5/mr.c      | 19 +++++-----
 drivers/infiniband/hw/mlx5/odp.c     |  7 ++--
 drivers/infiniband/hw/mlx5/umr.h     | 57 ++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/wr.c      |  3 +-
 5 files changed, 72 insertions(+), 70 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4f04bb55c4c6..18ba11e4e2e6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1471,9 +1471,6 @@ static inline int is_qp1(enum ib_qp_type qp_type)
 	return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
 }
 
-#define MLX5_MAX_UMR_SHIFT 16
-#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
-
 static inline u32 check_cq_create_flags(u32 flags)
 {
 	/*
@@ -1545,59 +1542,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
 			struct mlx5_bfreg_info *bfregi, u32 bfregn,
 			bool dyn_bfreg);
 
-static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
-						 size_t length)
-{
-	/*
-	 * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
-	 * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
-	 * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
-	 * can never be enabled without this capability. Simplify this weird
-	 * quirky hardware by just saying it can't use PAS lists with UMR at
-	 * all.
-	 */
-	if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
-		return false;
-
-	/*
-	 * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
-	 * used.
-	 */
-	if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
-	    length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
-		return false;
-	return true;
-}
-
-/*
- * true if an existing MR can be reconfigured to new access_flags using UMR.
- * Older HW cannot use UMR to update certain elements of the MKC. See
- * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
- */
-static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
-						 unsigned int current_access_flags,
-						 unsigned int target_access_flags)
-{
-	unsigned int diffs = current_access_flags ^ target_access_flags;
-
-	if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
-	    MLX5_CAP_GEN(dev->mdev, atomic) &&
-	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
-		return false;
-
-	if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
-	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
-	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
-		return false;
-
-	if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
-	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
-	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
-		return false;
-
-	return true;
-}
-
 static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
 				       struct mlx5_ib_mkey *mmkey)
 {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 956f8e875daa..6d87a93e03db 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -44,6 +44,7 @@
 #include <rdma/ib_verbs.h>
 #include "dm.h"
 #include "mlx5_ib.h"
+#include "umr.h"
 
 /*
  * We can't use an array for xlt_emergency_page because dma_map_single doesn't
@@ -598,7 +599,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
 	struct mlx5_ib_mr *mr;
 
 	/* Matches access in alloc_cache_mr() */
-	if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+	if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	spin_lock_irq(&ent->lock);
@@ -738,7 +739,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 		if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
 		    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
-		    mlx5_ib_can_load_pas_with_umr(dev, 0))
+		    mlx5r_umr_can_load_pas(dev, 0))
 			ent->limit = dev->mdev->profile.mr_cache[i].limit;
 		else
 			ent->limit = 0;
@@ -946,7 +947,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 	 * cache then synchronously create an uncached one.
 	 */
 	if (!ent || ent->limit == 0 ||
-	    !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+	    !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
 		mutex_lock(&dev->slow_path_mutex);
 		mr = reg_create(pd, umem, iova, access_flags, page_size, false);
 		mutex_unlock(&dev->slow_path_mutex);
@@ -1438,7 +1439,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
 	bool xlt_with_umr;
 	int err;
 
-	xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
+	xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
 	if (xlt_with_umr) {
 		mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
 	} else {
@@ -1501,7 +1502,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
 	}
 
 	/* ODP requires xlt update via umr to work. */
-	if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+	if (!mlx5r_umr_can_load_pas(dev, length))
 		return ERR_PTR(-EINVAL);
 
 	odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
@@ -1591,7 +1592,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
 		    offset, virt_addr, length, fd, access_flags);
 
 	/* dmabuf requires xlt update via umr to work. */
-	if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+	if (!mlx5r_umr_can_load_pas(dev, length))
 		return ERR_PTR(-EINVAL);
 
 	umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
@@ -1666,8 +1667,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
 	if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
 		      IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
 		return false;
-	return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
-					     target_access_flags);
+	return mlx5r_umr_can_reconfig(dev, current_access_flags,
+				      target_access_flags);
 }
 
 static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
@@ -1704,7 +1705,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
 	/* We only track the allocated sizes of MRs from the cache */
 	if (!mr->cache_ent)
 		return false;
-	if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
+	if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
 		return false;
 
 	*page_size =
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 41c964a45f89..99dc06f589d4 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -38,6 +38,7 @@
 
 #include "mlx5_ib.h"
 #include "cmd.h"
+#include "umr.h"
 #include "qp.h"
 
 #include <linux/mlx5/eq.h>
@@ -323,8 +324,7 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 
 	memset(caps, 0, sizeof(*caps));
 
-	if (!MLX5_CAP_GEN(dev->mdev, pg) ||
-	    !mlx5_ib_can_load_pas_with_umr(dev, 0))
+	if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0))
 		return;
 
 	caps->general_caps = IB_ODP_SUPPORT;
@@ -487,8 +487,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 	struct mlx5_ib_mr *imr;
 	int err;
 
-	if (!mlx5_ib_can_load_pas_with_umr(dev,
-					   MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
+	if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index cb1a2c95aac2..eea9505575f6 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -6,7 +6,64 @@
 
 #include "mlx5_ib.h"
 
+
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
 
+static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
+					  size_t length)
+{
+	/*
+	 * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
+	 * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
+	 * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
+	 * can never be enabled without this capability. Simplify this weird
+	 * quirky hardware by just saying it can't use PAS lists with UMR at
+	 * all.
+	 */
+	if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+		return false;
+
+	/*
+	 * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
+	 * used.
+	 */
+	if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+	    length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
+		return false;
+	return true;
+}
+
+/*
+ * true if an existing MR can be reconfigured to new access_flags using UMR.
+ * Older HW cannot use UMR to update certain elements of the MKC. See
+ * get_umr_update_access_mask() and umr_check_mkey_mask()
+ */
+static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
+					  unsigned int current_access_flags,
+					  unsigned int target_access_flags)
+{
+	unsigned int diffs = current_access_flags ^ target_access_flags;
+
+	if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
+	    MLX5_CAP_GEN(dev->mdev, atomic) &&
+	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+		return false;
+
+	if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+		return false;
+
+	if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+		return false;
+
+	return true;
+}
+
 #endif /* _MLX5_IB_UMR_H */
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 51e48ca9016e..4a0fb10d9de9 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -7,6 +7,7 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/driver.h>
 #include "wr.h"
+#include "umr.h"
 
 static const u32 mlx5_ib_opcode[] = {
 	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
@@ -870,7 +871,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
 	 * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
 	 * kernel ULPs are not aware of it, so we don't set it here.
 	 */
-	if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
+	if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
 		mlx5_ib_warn(
 			to_mdev(qp->ibqp.device),
 			"Fast update for MR access flags is not possible\n");
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 03/12] RDMA/mlx5: Move mkey ctrl segment logic to umr.c
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
  2022-04-12  7:23 ` [PATCH rdma-next 01/12] RDMA/mlx5: Move init and cleanup of UMR to umr.c Leon Romanovsky
  2022-04-12  7:23 ` [PATCH rdma-next 02/12] RDMA/mlx5: Move umr checks to umr.h Leon Romanovsky
@ 2022-04-12  7:23 ` Leon Romanovsky
  2022-04-25 15:22   ` Jason Gunthorpe
  2022-04-12  7:23 ` [PATCH rdma-next 04/12] RDMA/mlx5: Simplify get_umr_update_access_mask() Leon Romanovsky
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:23 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Move set_reg_umr_segment() and its helpers to umr.c.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   3 -
 drivers/infiniband/hw/mlx5/qp.c      |   1 +
 drivers/infiniband/hw/mlx5/umr.c     | 129 ++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h     |  13 +++
 drivers/infiniband/hw/mlx5/wr.c      | 142 +--------------------------
 5 files changed, 147 insertions(+), 141 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 18ba11e4e2e6..d77a27503488 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -311,9 +311,6 @@ struct mlx5_ib_flow_db {
 #define MLX5_IB_QPT_DCT		IB_QPT_RESERVED4
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 
-#define MLX5_IB_UMR_OCTOWORD	       16
-#define MLX5_IB_UMR_XLT_ALIGNMENT      64
-
 #define MLX5_IB_UPD_XLT_ZAP	      BIT(0)
 #define MLX5_IB_UPD_XLT_ENABLE	      BIT(1)
 #define MLX5_IB_UPD_XLT_ATOMIC	      BIT(2)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 3f467557d34e..d2f243d3c4e2 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -40,6 +40,7 @@
 #include "ib_rep.h"
 #include "counters.h"
 #include "cmd.h"
+#include "umr.h"
 #include "qp.h"
 #include "wr.h"
 
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 46eaf919eb49..d3626a9dc8ab 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -4,6 +4,135 @@
 #include "mlx5_ib.h"
 #include "umr.h"
 
+static __be64 get_umr_enable_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_KEY |
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_disable_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_translation_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LEN |
+		 MLX5_MKEY_MASK_PAGE_SIZE |
+		 MLX5_MKEY_MASK_START_ADDR;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_access_mask(int atomic,
+					 int relaxed_ordering_write,
+					 int relaxed_ordering_read)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LR |
+		 MLX5_MKEY_MASK_LW |
+		 MLX5_MKEY_MASK_RR |
+		 MLX5_MKEY_MASK_RW;
+
+	if (atomic)
+		result |= MLX5_MKEY_MASK_A;
+
+	if (relaxed_ordering_write)
+		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
+
+	if (relaxed_ordering_read)
+		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_pd_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_PD;
+
+	return cpu_to_be64(result);
+}
+
+static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
+{
+	if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+	    MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+		return -EPERM;
+
+	if (mask & MLX5_MKEY_MASK_A &&
+	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+		return -EPERM;
+
+	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
+	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+		return -EPERM;
+
+	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
+	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+		return -EPERM;
+
+	return 0;
+}
+
+int mlx5r_umr_set_umr_ctrl_seg(struct mlx5_ib_dev *dev,
+			       struct mlx5_wqe_umr_ctrl_seg *umr,
+			       const struct ib_send_wr *wr)
+{
+	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
+
+	memset(umr, 0, sizeof(*umr));
+
+	if (!umrwr->ignore_free_state) {
+		if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+			 /* fail if free */
+			umr->flags = MLX5_UMR_CHECK_FREE;
+		else
+			/* fail if not free */
+			umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+	}
+
+	umr->xlt_octowords =
+		cpu_to_be16(mlx5r_umr_get_xlt_octo(umrwr->xlt_size));
+	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
+		u64 offset = mlx5r_umr_get_xlt_octo(umrwr->offset);
+
+		umr->xlt_offset = cpu_to_be16(offset & 0xffff);
+		umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
+		umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+	}
+	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
+		umr->mkey_mask |= get_umr_update_translation_mask();
+	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
+		umr->mkey_mask |= get_umr_update_access_mask(
+			!!MLX5_CAP_GEN(dev->mdev, atomic),
+			!!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr),
+			!!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr));
+		umr->mkey_mask |= get_umr_update_pd_mask();
+	}
+	if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
+		umr->mkey_mask |= get_umr_enable_mr_mask();
+	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
+		umr->mkey_mask |= get_umr_disable_mr_mask();
+
+	if (!wr->num_sge)
+		umr->flags |= MLX5_UMR_INLINE;
+
+	return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
+}
+
 enum {
 	MAX_UMR_WR = 128,
 };
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index eea9505575f6..0fe6cdd633d4 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -10,6 +10,9 @@
 #define MLX5_MAX_UMR_SHIFT 16
 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
 
+#define MLX5_IB_UMR_OCTOWORD	       16
+#define MLX5_IB_UMR_XLT_ALIGNMENT      64
+
 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
 
@@ -66,4 +69,14 @@ static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
 	return true;
 }
 
+static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
+{
+	return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
+	       MLX5_IB_UMR_OCTOWORD;
+}
+
+int mlx5r_umr_set_umr_ctrl_seg(struct mlx5_ib_dev *dev,
+			       struct mlx5_wqe_umr_ctrl_seg *umr,
+			       const struct ib_send_wr *wr);
+
 #endif /* _MLX5_IB_UMR_H */
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 4a0fb10d9de9..24457eb5e4b7 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -166,12 +166,6 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static u64 get_xlt_octo(u64 bytes)
-{
-	return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
-	       MLX5_IB_UMR_OCTOWORD;
-}
-
 static __be64 frwr_mkey_mask(bool atomic)
 {
 	u64 result;
@@ -223,7 +217,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
 	memset(umr, 0, sizeof(*umr));
 
 	umr->flags = flags;
-	umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
+	umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
 	umr->mkey_mask = frwr_mkey_mask(atomic);
 }
 
@@ -234,134 +228,6 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
 	umr->flags = MLX5_UMR_INLINE;
 }
 
-static __be64 get_umr_enable_mr_mask(void)
-{
-	u64 result;
-
-	result = MLX5_MKEY_MASK_KEY |
-		 MLX5_MKEY_MASK_FREE;
-
-	return cpu_to_be64(result);
-}
-
-static __be64 get_umr_disable_mr_mask(void)
-{
-	u64 result;
-
-	result = MLX5_MKEY_MASK_FREE;
-
-	return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_translation_mask(void)
-{
-	u64 result;
-
-	result = MLX5_MKEY_MASK_LEN |
-		 MLX5_MKEY_MASK_PAGE_SIZE |
-		 MLX5_MKEY_MASK_START_ADDR;
-
-	return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_access_mask(int atomic,
-					 int relaxed_ordering_write,
-					 int relaxed_ordering_read)
-{
-	u64 result;
-
-	result = MLX5_MKEY_MASK_LR |
-		 MLX5_MKEY_MASK_LW |
-		 MLX5_MKEY_MASK_RR |
-		 MLX5_MKEY_MASK_RW;
-
-	if (atomic)
-		result |= MLX5_MKEY_MASK_A;
-
-	if (relaxed_ordering_write)
-		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
-
-	if (relaxed_ordering_read)
-		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
-
-	return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_pd_mask(void)
-{
-	u64 result;
-
-	result = MLX5_MKEY_MASK_PD;
-
-	return cpu_to_be64(result);
-}
-
-static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
-{
-	if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
-	    MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
-		return -EPERM;
-
-	if (mask & MLX5_MKEY_MASK_A &&
-	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
-		return -EPERM;
-
-	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
-	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
-		return -EPERM;
-
-	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
-	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
-		return -EPERM;
-
-	return 0;
-}
-
-static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
-			       struct mlx5_wqe_umr_ctrl_seg *umr,
-			       const struct ib_send_wr *wr)
-{
-	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
-	memset(umr, 0, sizeof(*umr));
-
-	if (!umrwr->ignore_free_state) {
-		if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
-			 /* fail if free */
-			umr->flags = MLX5_UMR_CHECK_FREE;
-		else
-			/* fail if not free */
-			umr->flags = MLX5_UMR_CHECK_NOT_FREE;
-	}
-
-	umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
-		u64 offset = get_xlt_octo(umrwr->offset);
-
-		umr->xlt_offset = cpu_to_be16(offset & 0xffff);
-		umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
-		umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
-	}
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
-		umr->mkey_mask |= get_umr_update_translation_mask();
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
-		umr->mkey_mask |= get_umr_update_access_mask(
-			!!(MLX5_CAP_GEN(dev->mdev, atomic)),
-			!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
-			!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
-		umr->mkey_mask |= get_umr_update_pd_mask();
-	}
-	if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
-		umr->mkey_mask |= get_umr_enable_mr_mask();
-	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
-		umr->mkey_mask |= get_umr_disable_mr_mask();
-
-	if (!wr->num_sge)
-		umr->flags |= MLX5_UMR_INLINE;
-
-	return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
-}
-
 static u8 get_umr_flags(int acc)
 {
 	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
@@ -761,7 +627,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
 				    MLX5_MKEY_BSF_EN | pdn);
 	seg->len = cpu_to_be64(length);
-	seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
+	seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
 	seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
 }
 
@@ -771,7 +637,7 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 	memset(umr, 0, sizeof(*umr));
 
 	umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
-	umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
+	umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
 	umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
 	umr->mkey_mask = sig_mkey_mask();
 }
@@ -1262,7 +1128,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 
 	qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
 	(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
-	err = set_reg_umr_segment(dev, *seg, wr);
+	err = mlx5r_umr_set_umr_ctrl_seg(dev, *seg, wr);
 	if (unlikely(err))
 		goto out;
 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 04/12] RDMA/mlx5: Simplify get_umr_update_access_mask()
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (2 preceding siblings ...)
  2022-04-12  7:23 ` [PATCH rdma-next 03/12] RDMA/mlx5: Move mkey ctrl segment logic to umr.c Leon Romanovsky
@ 2022-04-12  7:23 ` Leon Romanovsky
  2022-04-12  7:24 ` [PATCH rdma-next 05/12] RDMA/mlx5: Expose wqe posting helpers outside of wr.c Leon Romanovsky
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:23 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Instead of getting the update access capabilities each call to
get_umr_update_access_mask(), pass struct mlx5_ib_dev and get the
capabilities inside the function.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/umr.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index d3626a9dc8ab..8131501dc052 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -34,9 +34,7 @@ static __be64 get_umr_update_translation_mask(void)
 	return cpu_to_be64(result);
 }
 
-static __be64 get_umr_update_access_mask(int atomic,
-					 int relaxed_ordering_write,
-					 int relaxed_ordering_read)
+static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
 {
 	u64 result;
 
@@ -45,13 +43,13 @@ static __be64 get_umr_update_access_mask(int atomic,
 		 MLX5_MKEY_MASK_RR |
 		 MLX5_MKEY_MASK_RW;
 
-	if (atomic)
+	if (MLX5_CAP_GEN(dev->mdev, atomic))
 		result |= MLX5_MKEY_MASK_A;
 
-	if (relaxed_ordering_write)
+	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
 		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
 
-	if (relaxed_ordering_read)
+	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
 		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
 
 	return cpu_to_be64(result);
@@ -116,10 +114,7 @@ int mlx5r_umr_set_umr_ctrl_seg(struct mlx5_ib_dev *dev,
 	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
 		umr->mkey_mask |= get_umr_update_translation_mask();
 	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
-		umr->mkey_mask |= get_umr_update_access_mask(
-			!!MLX5_CAP_GEN(dev->mdev, atomic),
-			!!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr),
-			!!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr));
+		umr->mkey_mask |= get_umr_update_access_mask(dev);
 		umr->mkey_mask |= get_umr_update_pd_mask();
 	}
 	if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 05/12] RDMA/mlx5: Expose wqe posting helpers outside of wr.c
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (3 preceding siblings ...)
  2022-04-12  7:23 ` [PATCH rdma-next 04/12] RDMA/mlx5: Simplify get_umr_update_access_mask() Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-12  7:24 ` [PATCH rdma-next 06/12] RDMA/mlx5: Introduce mlx5_umr_post_send_wait() Leon Romanovsky
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Split posting WQEs logic to helpers, generalize it and expose for future
use in the UMR post send.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/wr.c | 172 +++++++++++---------------------
 drivers/infiniband/hw/mlx5/wr.h |  60 +++++++++++
 2 files changed, 121 insertions(+), 111 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 24457eb5e4b7..7949f83b2cd2 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -26,58 +26,7 @@ static const u32 mlx5_ib_opcode[] = {
 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
 };
 
-/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
- * next nearby edge and get new address translation for current WQE position.
- * @sq - SQ buffer.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @cur_edge: Updated current edge.
- */
-static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
-					 u32 wqe_sz, void **cur_edge)
-{
-	u32 idx;
-
-	if (likely(*seg != *cur_edge))
-		return;
-
-	idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
-	*cur_edge = get_sq_edge(sq, idx);
-
-	*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
-}
-
-/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
- * pointers. At the end @seg is aligned to 16B regardless the copied size.
- * @sq - SQ buffer.
- * @cur_edge: Updated current edge.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @src: Pointer to copy from.
- * @n: Number of bytes to copy.
- */
-static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
-				   void **seg, u32 *wqe_sz, const void *src,
-				   size_t n)
-{
-	while (likely(n)) {
-		size_t leftlen = *cur_edge - *seg;
-		size_t copysz = min_t(size_t, leftlen, n);
-		size_t stride;
-
-		memcpy(*seg, src, copysz);
-
-		n -= copysz;
-		src += copysz;
-		stride = !n ? ALIGN(copysz, 16) : copysz;
-		*seg += stride;
-		*wqe_sz += stride >> 4;
-		handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
-	}
-}
-
-static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
-			    struct ib_cq *ib_cq)
+int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
 {
 	struct mlx5_ib_cq *cq;
 	unsigned int cur;
@@ -123,9 +72,9 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
 		eseg->mss = cpu_to_be16(ud_wr->mss);
 		eseg->inline_hdr.sz = cpu_to_be16(left);
 
-		/* memcpy_send_wqe should get a 16B align address. Hence, we
-		 * first copy up to the current edge and then, if needed,
-		 * continue to memcpy_send_wqe.
+		/* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
+		 * we first copy up to the current edge and then, if needed,
+		 * continue to mlx5r_memcpy_send_wqe.
 		 */
 		copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
 			       left);
@@ -139,8 +88,8 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
 			handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 			left -= copysz;
 			pdata += copysz;
-			memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
-					left);
+			mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
+					      pdata, left);
 		}
 
 		return;
@@ -766,8 +715,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
 	if (umr_inline) {
-		memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
-				mr_list_size);
+		mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+				      mr_list_size);
 		*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
 	} else {
 		set_reg_data_seg(*seg, mr, pd);
@@ -809,23 +758,22 @@ static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
 	}
 }
 
-static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
-		       struct mlx5_wqe_ctrl_seg **ctrl,
-		       const struct ib_send_wr *wr, unsigned int *idx,
-		       int *size, void **cur_edge, int nreq,
-		       bool send_signaled, bool solicited)
+int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+		    struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
+		    int *size, void **cur_edge, int nreq, __be32 general_id,
+		    bool send_signaled, bool solicited)
 {
-	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+	if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
 		return -ENOMEM;
 
 	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
 	*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
 	*ctrl = *seg;
 	*(uint32_t *)(*seg + 8) = 0;
-	(*ctrl)->imm = send_ieth(wr);
+	(*ctrl)->general_id = general_id;
 	(*ctrl)->fm_ce_se = qp->sq_signal_bits |
-		(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
-		(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
+			    (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+			    (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
 
 	*seg += sizeof(**ctrl);
 	*size = sizeof(**ctrl) / 16;
@@ -839,16 +787,14 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 		     const struct ib_send_wr *wr, unsigned int *idx, int *size,
 		     void **cur_edge, int nreq)
 {
-	return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
-			   wr->send_flags & IB_SEND_SIGNALED,
-			   wr->send_flags & IB_SEND_SOLICITED);
+	return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
+			       send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
+			       wr->send_flags & IB_SEND_SOLICITED);
 }
 
-static void finish_wqe(struct mlx5_ib_qp *qp,
-		       struct mlx5_wqe_ctrl_seg *ctrl,
-		       void *seg, u8 size, void *cur_edge,
-		       unsigned int idx, u64 wr_id, int nreq, u8 fence,
-		       u32 mlx5_opcode)
+void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
+		      void *seg, u8 size, void *cur_edge, unsigned int idx,
+		      u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
 {
 	u8 opmod = 0;
 
@@ -912,8 +858,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	/*
 	 * SET_PSV WQEs are not signaled and solicited on error.
 	 */
-	err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
-			  false, true);
+	err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
+			      send_ieth(wr), false, true);
 	if (unlikely(err)) {
 		mlx5_ib_warn(dev, "\n");
 		err = -ENOMEM;
@@ -924,8 +870,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 		mlx5_ib_warn(dev, "\n");
 		goto out;
 	}
-	finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
-		   next_fence, MLX5_OPCODE_SET_PSV);
+	mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+			 nreq, next_fence, MLX5_OPCODE_SET_PSV);
 
 out:
 	return err;
@@ -965,8 +911,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
 		if (unlikely(err))
 			goto out;
 
-		finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
-			   nreq, fence, MLX5_OPCODE_UMR);
+		mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
+				 wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
 
 		err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
 		if (unlikely(err)) {
@@ -997,8 +943,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
 		mlx5_ib_warn(dev, "\n");
 		goto out;
 	}
-	finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
-		   fence, MLX5_OPCODE_UMR);
+	mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+			 nreq, fence, MLX5_OPCODE_UMR);
 
 	sig_attrs = mr->ibmr.sig_attrs;
 	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
@@ -1142,6 +1088,32 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	return err;
 }
 
+void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
+		   struct mlx5_wqe_ctrl_seg *ctrl)
+{
+	struct mlx5_bf *bf = &qp->bf;
+
+	qp->sq.head += nreq;
+
+	/* Make sure that descriptors are written before
+	 * updating doorbell record and ringing the doorbell
+	 */
+	wmb();
+
+	qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+	/* Make sure doorbell record is visible to the HCA before
+	 * we hit doorbell.
+	 */
+	wmb();
+
+	mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
+	/* Make sure doorbells don't leak out of SQ spinlock
+	 * and reach the HCA out of order.
+	 */
+	bf->offset ^= bf->buf_size;
+}
+
 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		      const struct ib_send_wr **bad_wr, bool drain)
 {
@@ -1150,7 +1122,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_wqe_xrc_seg *xrc;
-	struct mlx5_bf *bf;
 	void *cur_edge;
 	int size;
 	unsigned long flags;
@@ -1172,8 +1143,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	if (qp->type == IB_QPT_GSI)
 		return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
 
-	bf = &qp->bf;
-
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -1285,35 +1254,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		}
 
 		qp->next_fence = next_fence;
-		finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
-			   fence, mlx5_ib_opcode[wr->opcode]);
+		mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
+				 nreq, fence, mlx5_ib_opcode[wr->opcode]);
 skip_psv:
 		if (0)
 			dump_wqe(qp, idx, size);
 	}
 
 out:
-	if (likely(nreq)) {
-		qp->sq.head += nreq;
-
-		/* Make sure that descriptors are written before
-		 * updating doorbell record and ringing the doorbell
-		 */
-		wmb();
-
-		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
-
-		/* Make sure doorbell record is visible to the HCA before
-		 * we hit doorbell.
-		 */
-		wmb();
-
-		mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
-		/* Make sure doorbells don't leak out of SQ spinlock
-		 * and reach the HCA out of order.
-		 */
-		bf->offset ^= bf->buf_size;
-	}
+	if (likely(nreq))
+		mlx5r_ring_db(qp, nreq, ctrl);
 
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
 
@@ -1353,7 +1303,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
-		if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+		if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
 			err = -ENOMEM;
 			*bad_wr = wr;
 			goto out;
diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h
index 4f0057516402..2dc89438000d 100644
--- a/drivers/infiniband/hw/mlx5/wr.h
+++ b/drivers/infiniband/hw/mlx5/wr.h
@@ -41,6 +41,66 @@ static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
 	return fragment_end + MLX5_SEND_WQE_BB;
 }
 
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq: SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+					 u32 wqe_sz, void **cur_edge)
+{
+	u32 idx;
+
+	if (likely(*seg != *cur_edge))
+		return;
+
+	idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+	*cur_edge = get_sq_edge(sq, idx);
+
+	*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant
+ * WQ's pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq: SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+					 void **seg, u32 *wqe_sz,
+					 const void *src, size_t n)
+{
+	while (likely(n)) {
+		size_t leftlen = *cur_edge - *seg;
+		size_t copysz = min_t(size_t, leftlen, n);
+		size_t stride;
+
+		memcpy(*seg, src, copysz);
+
+		n -= copysz;
+		src += copysz;
+		stride = !n ? ALIGN(copysz, 16) : copysz;
+		*seg += stride;
+		*wqe_sz += stride >> 4;
+		handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+	}
+}
+
+int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq);
+int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+		    struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
+		    int *size, void **cur_edge, int nreq, __be32 general_id,
+		    bool send_signaled, bool solicited);
+void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
+		      void *seg, u8 size, void *cur_edge, unsigned int idx,
+		      u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode);
+void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
+		   struct mlx5_wqe_ctrl_seg *ctrl);
 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		      const struct ib_send_wr **bad_wr, bool drain);
 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 06/12] RDMA/mlx5: Introduce mlx5_umr_post_send_wait()
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (4 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 05/12] RDMA/mlx5: Expose wqe posting helpers outside of wr.c Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-12  7:24 ` [PATCH rdma-next 07/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs Leon Romanovsky
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Introduce mlx5_umr_post_send_wait() that uses a UMR adjusted flow for
posting WQEs. The next patches will gradually move UMR operations to use
this flow. Once done, will get rid of mlx5_ib_post_send_wait().

mlx5_umr_post_send_wait gets already written WQE segments and will only
memcpy it to the SQ. This way, we avoid packing all the data in a WR just
to unpack it into the WQE.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/umr.c | 92 ++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h | 12 +++++
 2 files changed, 104 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 8131501dc052..f17f64cb1925 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -3,6 +3,7 @@
 
 #include "mlx5_ib.h"
 #include "umr.h"
+#include "wr.h"
 
 static __be64 get_umr_enable_mr_mask(void)
 {
@@ -228,3 +229,94 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
 	ib_free_cq(dev->umrc.cq);
 	ib_dealloc_pd(dev->umrc.pd);
 }
+
+static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+			       struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+	unsigned int wqe_size =
+		with_data ? sizeof(struct mlx5r_umr_wqe) :
+			    sizeof(struct mlx5r_umr_wqe) -
+				    sizeof(struct mlx5_wqe_data_seg);
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_wqe_ctrl_seg *ctrl;
+	union {
+		struct ib_cqe *ib_cqe;
+		u64 wr_id;
+	} id;
+	void *cur_edge, *seg;
+	unsigned long flags;
+	unsigned int idx;
+	int size, err;
+
+	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
+		return -EIO;
+
+	spin_lock_irqsave(&qp->sq.lock, flags);
+
+	err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
+			      cpu_to_be32(mkey), false, false);
+	if (WARN_ON(err))
+		goto out;
+
+	qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+
+	mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
+
+	id.ib_cqe = cqe;
+	mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
+			 MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
+
+	mlx5r_ring_db(qp, 1, ctrl);
+
+out:
+	spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+	return err;
+}
+
+static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct mlx5_ib_umr_context *context =
+		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
+
+	context->status = wc->status;
+	complete(&context->done);
+}
+
+static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
+{
+	context->cqe.done = mlx5r_umr_done;
+	init_completion(&context->done);
+}
+
+static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
+				   struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+	struct umr_common *umrc = &dev->umrc;
+	struct mlx5r_umr_context umr_context;
+	int err;
+
+	err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
+	if (WARN_ON(err))
+		return err;
+
+	mlx5r_umr_init_context(&umr_context);
+
+	down(&umrc->sem);
+	err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+				  with_data);
+	if (err)
+		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+	else {
+		wait_for_completion(&umr_context.done);
+		if (umr_context.status != IB_WC_SUCCESS) {
+			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+				     umr_context.status);
+			err = -EFAULT;
+		}
+	}
+	up(&umrc->sem);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 0fe6cdd633d4..d984213caf60 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -79,4 +79,16 @@ int mlx5r_umr_set_umr_ctrl_seg(struct mlx5_ib_dev *dev,
 			       struct mlx5_wqe_umr_ctrl_seg *umr,
 			       const struct ib_send_wr *wr);
 
+struct mlx5r_umr_context {
+	struct ib_cqe cqe;
+	enum ib_wc_status status;
+	struct completion done;
+};
+
+struct mlx5r_umr_wqe {
+	struct mlx5_wqe_umr_ctrl_seg ctrl_seg;
+	struct mlx5_mkey_seg mkey_seg;
+	struct mlx5_wqe_data_seg data_seg;
+};
+
 #endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 07/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (5 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 06/12] RDMA/mlx5: Introduce mlx5_umr_post_send_wait() Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-12  7:24 ` [PATCH rdma-next 08/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access Leon Romanovsky
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Move the revoke_mr logic to umr.c, and using mlx5_umr_post_send_wait()
instead of mlx5_ib_post_send_wait().

In the new implementation, do not zero out the access flags. Before
reusing the MR, we will update it to the required access.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c  | 31 +++----------------------------
 drivers/infiniband/hw/mlx5/umr.c | 29 +++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h |  2 ++
 3 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 6d87a93e03db..32ad93e69a89 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1629,31 +1629,6 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
 	return ERR_PTR(err);
 }
 
-/**
- * revoke_mr - Fence all DMA on the MR
- * @mr: The MR to fence
- *
- * Upon return the NIC will not be doing any DMA to the pages under the MR,
- * and any DMA in progress will be completed. Failure of this function
- * indicates the HW has failed catastrophically.
- */
-static int revoke_mr(struct mlx5_ib_mr *mr)
-{
-	struct mlx5_umr_wr umrwr = {};
-
-	if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-		return 0;
-
-	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
-			      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
-	umrwr.wr.opcode = MLX5_IB_WR_UMR;
-	umrwr.pd = mr_to_mdev(mr)->umrc.pd;
-	umrwr.mkey = mr->mmkey.key;
-	umrwr.ignore_free_state = 1;
-
-	return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
-}
-
 /*
  * True if the change in access flags can be done via UMR, only some access
  * flags can be updated.
@@ -1730,7 +1705,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 	 * with it. This ensure the change is atomic relative to any use of the
 	 * MR.
 	 */
-	err = revoke_mr(mr);
+	err = mlx5r_umr_revoke_mr(mr);
 	if (err)
 		return err;
 
@@ -1808,7 +1783,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		 * Only one active MR can refer to a umem at one time, revoke
 		 * the old MR before assigning the umem to the new one.
 		 */
-		err = revoke_mr(mr);
+		err = mlx5r_umr_revoke_mr(mr);
 		if (err)
 			return ERR_PTR(err);
 		umem = mr->umem;
@@ -1953,7 +1928,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 
 	/* Stop DMA */
 	if (mr->cache_ent) {
-		if (revoke_mr(mr)) {
+		if (mlx5r_umr_revoke_mr(mr)) {
 			spin_lock_irq(&mr->cache_ent->lock);
 			mr->cache_ent->total_mrs--;
 			spin_unlock_irq(&mr->cache_ent->lock);
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index f17f64cb1925..2f14f6ccf9da 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -320,3 +320,32 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
 	up(&umrc->sem);
 	return err;
 }
+
+/**
+ * mlx5r_umr_revoke_mr - Fence all DMA on the MR
+ * @mr: The MR to fence
+ *
+ * Upon return the NIC will not be doing any DMA to the pages under the MR,
+ * and any DMA in progress will be completed. Failure of this function
+ * indicates the HW has failed catastrophically.
+ */
+int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
+{
+	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+	struct mlx5r_umr_wqe wqe = {};
+
+	if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		return 0;
+
+	wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+	wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
+	wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
+
+	MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
+	MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
+	MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+	MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+		 mlx5_mkey_variant(mr->mmkey.key));
+
+	return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index d984213caf60..c14072b06ffb 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -91,4 +91,6 @@ struct mlx5r_umr_wqe {
 	struct mlx5_wqe_data_seg data_seg;
 };
 
+int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
+
 #endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 08/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (6 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 07/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-12  7:24 ` [PATCH rdma-next 09/12] RDMA/mlx5: Move creation and free of translation tables to umr.c Leon Romanovsky
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Move rereg_pd_access logic to umr.c, and use mlx5_umr_post_send_wait()
instead of mlx5_ib_post_send_wait().

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c  | 27 ++-------------------
 drivers/infiniband/hw/mlx5/umr.c | 41 ++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h |  2 ++
 3 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 32ad93e69a89..50b4ccd38fe2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1646,30 +1646,6 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
 				      target_access_flags);
 }
 
-static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
-			       int access_flags)
-{
-	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
-	struct mlx5_umr_wr umrwr = {
-		.wr = {
-			.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
-				      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
-			.opcode = MLX5_IB_WR_UMR,
-		},
-		.mkey = mr->mmkey.key,
-		.pd = pd,
-		.access_flags = access_flags,
-	};
-	int err;
-
-	err = mlx5_ib_post_send_wait(dev, &umrwr);
-	if (err)
-		return err;
-
-	mr->access_flags = access_flags;
-	return 0;
-}
-
 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
 				  struct ib_umem *new_umem,
 				  int new_access_flags, u64 iova,
@@ -1770,7 +1746,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		/* Fast path for PD/access change */
 		if (can_use_umr_rereg_access(dev, mr->access_flags,
 					     new_access_flags)) {
-			err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
+			err = mlx5r_umr_rereg_pd_access(mr, new_pd,
+							new_access_flags);
 			if (err)
 				return ERR_PTR(err);
 			return NULL;
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 2f14f6ccf9da..716c35258e33 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -349,3 +349,44 @@ int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
 
 	return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
 }
+
+static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
+				       struct mlx5_mkey_seg *seg,
+				       unsigned int access_flags)
+{
+	MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+	MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+	MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
+	MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
+	MLX5_SET(mkc, seg, lr, 1);
+	MLX5_SET(mkc, seg, relaxed_ordering_write,
+		 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+	MLX5_SET(mkc, seg, relaxed_ordering_read,
+		 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+}
+
+int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+			      int access_flags)
+{
+	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+	struct mlx5r_umr_wqe wqe = {};
+	int err;
+
+	wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
+	wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+	wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
+	wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
+
+	mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
+	MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
+	MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+	MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+		 mlx5_mkey_variant(mr->mmkey.key));
+
+	err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+	if (err)
+		return err;
+
+	mr->access_flags = access_flags;
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index c14072b06ffb..53816316cb1f 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -92,5 +92,7 @@ struct mlx5r_umr_wqe {
 };
 
 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
+int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+			      int access_flags);
 
 #endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 09/12] RDMA/mlx5: Move creation and free of translation tables to umr.c
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (7 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 08/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-25 15:11   ` Jason Gunthorpe
  2022-04-12  7:24 ` [PATCH rdma-next 10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas Leon Romanovsky
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

The only use of the translation tables is to update the mkey translation
by a UMR operation. Move the responsibility of creating and freeing them
to umr.c

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c  | 103 +----------------------------
 drivers/infiniband/hw/mlx5/umr.c | 109 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h |   5 ++
 3 files changed, 117 insertions(+), 100 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 50b4ccd38fe2..e7cc32b46851 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -46,13 +46,6 @@
 #include "mlx5_ib.h"
 #include "umr.h"
 
-/*
- * We can't use an array for xlt_emergency_page because dma_map_single doesn't
- * work on kernel modules memory
- */
-void *xlt_emergency_page;
-static DEFINE_MUTEX(xlt_emergency_page_mutex);
-
 enum {
 	MAX_PENDING_REG_MR = 8,
 };
@@ -966,74 +959,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 	return mr;
 }
 
-#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
-			    MLX5_UMR_MTT_ALIGNMENT)
-#define MLX5_SPARE_UMR_CHUNK 0x10000
-
-/*
- * Allocate a temporary buffer to hold the per-page information to transfer to
- * HW. For efficiency this should be as large as it can be, but buffer
- * allocation failure is not allowed, so try smaller sizes.
- */
-static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
-{
-	const size_t xlt_chunk_align =
-		MLX5_UMR_MTT_ALIGNMENT / ent_size;
-	size_t size;
-	void *res = NULL;
-
-	static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
-
-	/*
-	 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
-	 * allocation can't trigger any kind of reclaim.
-	 */
-	might_sleep();
-
-	gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
-
-	/*
-	 * If the system already has a suitable high order page then just use
-	 * that, but don't try hard to create one. This max is about 1M, so a
-	 * free x86 huge page will satisfy it.
-	 */
-	size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
-		     MLX5_MAX_UMR_CHUNK);
-	*nents = size / ent_size;
-	res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
-				       get_order(size));
-	if (res)
-		return res;
-
-	if (size > MLX5_SPARE_UMR_CHUNK) {
-		size = MLX5_SPARE_UMR_CHUNK;
-		*nents = size / ent_size;
-		res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
-					       get_order(size));
-		if (res)
-			return res;
-	}
-
-	*nents = PAGE_SIZE / ent_size;
-	res = (void *)__get_free_page(gfp_mask);
-	if (res)
-		return res;
-
-	mutex_lock(&xlt_emergency_page_mutex);
-	memset(xlt_emergency_page, 0, PAGE_SIZE);
-	return xlt_emergency_page;
-}
-
-static void mlx5_ib_free_xlt(void *xlt, size_t length)
-{
-	if (xlt == xlt_emergency_page) {
-		mutex_unlock(&xlt_emergency_page_mutex);
-		return;
-	}
-
-	free_pages((unsigned long)xlt, get_order(length));
-}
-
 /*
  * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
  * submission.
@@ -1044,22 +969,9 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
 				   unsigned int flags)
 {
 	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
-	struct device *ddev = &dev->mdev->pdev->dev;
-	dma_addr_t dma;
 	void *xlt;
 
-	xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
-				flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
-								 GFP_KERNEL);
-	sg->length = nents * ent_size;
-	dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
-	if (dma_mapping_error(ddev, dma)) {
-		mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
-		mlx5_ib_free_xlt(xlt, sg->length);
-		return NULL;
-	}
-	sg->addr = dma;
-	sg->lkey = dev->umrc.pd->local_dma_lkey;
+	xlt = mlx5r_umr_create_xlt(dev, sg, nents, ent_size, flags);
 
 	memset(wr, 0, sizeof(*wr));
 	wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
@@ -1078,15 +990,6 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
 	return xlt;
 }
 
-static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
-				   struct ib_sge *sg)
-{
-	struct device *ddev = &dev->mdev->pdev->dev;
-
-	dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
-	mlx5_ib_free_xlt(xlt, sg->length);
-}
-
 static unsigned int xlt_wr_final_send_flags(unsigned int flags)
 {
 	unsigned int res = 0;
@@ -1175,7 +1078,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 		err = mlx5_ib_post_send_wait(dev, &wr);
 	}
 	sg.length = orig_sg_length;
-	mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
+	mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
 	return err;
 }
 
@@ -1245,7 +1148,7 @@ int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
 
 err:
 	sg.length = orig_sg_length;
-	mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
+	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
 	return err;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 716c35258e33..ba61e6280cb5 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -5,6 +5,13 @@
 #include "umr.h"
 #include "wr.h"
 
+/*
+ * We can't use an array for xlt_emergency_page because dma_map_single doesn't
+ * work on kernel modules memory
+ */
+void *xlt_emergency_page;
+static DEFINE_MUTEX(xlt_emergency_page_mutex);
+
 static __be64 get_umr_enable_mr_mask(void)
 {
 	u64 result;
@@ -390,3 +397,105 @@ int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 	mr->access_flags = access_flags;
 	return 0;
 }
+
+#define MLX5_MAX_UMR_CHUNK                                                     \
+	((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
+#define MLX5_SPARE_UMR_CHUNK 0x10000
+
+/*
+ * Allocate a temporary buffer to hold the per-page information to transfer to
+ * HW. For efficiency this should be as large as it can be, but buffer
+ * allocation failure is not allowed, so try smaller sizes.
+ */
+static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
+{
+	const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
+	size_t size;
+	void *res = NULL;
+
+	static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
+
+	/*
+	 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
+	 * allocation can't trigger any kind of reclaim.
+	 */
+	might_sleep();
+
+	gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
+
+	/*
+	 * If the system already has a suitable high order page then just use
+	 * that, but don't try hard to create one. This max is about 1M, so a
+	 * free x86 huge page will satisfy it.
+	 */
+	size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
+		     MLX5_MAX_UMR_CHUNK);
+	*nents = size / ent_size;
+	res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+				       get_order(size));
+	if (res)
+		return res;
+
+	if (size > MLX5_SPARE_UMR_CHUNK) {
+		size = MLX5_SPARE_UMR_CHUNK;
+		*nents = size / ent_size;
+		res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+					       get_order(size));
+		if (res)
+			return res;
+	}
+
+	*nents = PAGE_SIZE / ent_size;
+	res = (void *)__get_free_page(gfp_mask);
+	if (res)
+		return res;
+
+	mutex_lock(&xlt_emergency_page_mutex);
+	memset(xlt_emergency_page, 0, PAGE_SIZE);
+	return xlt_emergency_page;
+}
+
+void mlx5r_umr_free_xlt(void *xlt, size_t length)
+{
+	if (xlt == xlt_emergency_page) {
+		mutex_unlock(&xlt_emergency_page_mutex);
+		return;
+	}
+
+	free_pages((unsigned long)xlt, get_order(length));
+}
+
+void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+			     struct ib_sge *sg)
+{
+	struct device *ddev = &dev->mdev->pdev->dev;
+
+	dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
+	mlx5r_umr_free_xlt(xlt, sg->length);
+}
+
+/*
+ * Create an XLT buffer ready for submission.
+ */
+void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
+			  size_t nents, size_t ent_size, unsigned int flags)
+{
+	struct device *ddev = &dev->mdev->pdev->dev;
+	dma_addr_t dma;
+	void *xlt;
+
+	xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
+				 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
+								  GFP_KERNEL);
+	sg->length = nents * ent_size;
+	dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, dma)) {
+		mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+		mlx5r_umr_free_xlt(xlt, sg->length);
+		return NULL;
+	}
+	sg->addr = dma;
+	sg->lkey = dev->umrc.pd->local_dma_lkey;
+
+	return xlt;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 53816316cb1f..2485379fec32 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -94,5 +94,10 @@ struct mlx5r_umr_wqe {
 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 			      int access_flags);
+void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
+			   size_t nents, size_t ent_size, unsigned int flags);
+void mlx5r_umr_free_xlt(void *xlt, size_t length);
+void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+			      struct ib_sge *sg);
 
 #endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (8 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 09/12] RDMA/mlx5: Move creation and free of translation tables to umr.c Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-25 17:43   ` Jason Gunthorpe
  2022-04-12  7:24 ` [PATCH rdma-next 11/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt Leon Romanovsky
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Move mlx5_ib_update_mr_pas logic to umr.c, and use mlx5_umr_post_send_wait()
instead of mlx5_ib_post_send_wait().

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   1 -
 drivers/infiniband/hw/mlx5/mr.c      |  76 +------------
 drivers/infiniband/hw/mlx5/odp.c     |   2 +-
 drivers/infiniband/hw/mlx5/umr.c     | 159 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h     |   1 +
 5 files changed, 164 insertions(+), 75 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d77a27503488..ea4d5519df8c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1289,7 +1289,6 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 		       int page_shift, int flags);
-int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 					     int access_flags);
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index e7cc32b46851..df79fd5be5f2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1082,76 +1082,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 	return err;
 }
 
-/*
- * Send the DMA list to the HW for a normal MR using UMR.
- * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
- * flag may be used.
- */
-int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
-{
-	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
-	struct device *ddev = &dev->mdev->pdev->dev;
-	struct ib_block_iter biter;
-	struct mlx5_mtt *cur_mtt;
-	struct mlx5_umr_wr wr;
-	size_t orig_sg_length;
-	struct mlx5_mtt *mtt;
-	size_t final_size;
-	struct ib_sge sg;
-	int err = 0;
-
-	if (WARN_ON(mr->umem->is_odp))
-		return -EINVAL;
-
-	mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
-				    ib_umem_num_dma_blocks(mr->umem,
-							   1 << mr->page_shift),
-				    sizeof(*mtt), flags);
-	if (!mtt)
-		return -ENOMEM;
-	orig_sg_length = sg.length;
-
-	cur_mtt = mtt;
-	rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
-			     mr->umem->sgt_append.sgt.nents,
-			     BIT(mr->page_shift)) {
-		if (cur_mtt == (void *)mtt + sg.length) {
-			dma_sync_single_for_device(ddev, sg.addr, sg.length,
-						   DMA_TO_DEVICE);
-			err = mlx5_ib_post_send_wait(dev, &wr);
-			if (err)
-				goto err;
-			dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
-						DMA_TO_DEVICE);
-			wr.offset += sg.length;
-			cur_mtt = mtt;
-		}
-
-		cur_mtt->ptag =
-			cpu_to_be64(rdma_block_iter_dma_address(&biter) |
-				    MLX5_IB_MTT_PRESENT);
-
-		if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
-			cur_mtt->ptag = 0;
-
-		cur_mtt++;
-	}
-
-	final_size = (void *)cur_mtt - (void *)mtt;
-	sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
-	memset(cur_mtt, 0, sg.length - final_size);
-	wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
-	wr.xlt_size = sg.length;
-
-	dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
-	err = mlx5_ib_post_send_wait(dev, &wr);
-
-err:
-	sg.length = orig_sg_length;
-	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
-	return err;
-}
-
 /*
  * If ibmr is NULL it will be allocated by reg_create.
  * Else, the given ibmr will be used.
@@ -1368,7 +1298,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
 		 * configured properly but left disabled. It is safe to go ahead
 		 * and configure it again via UMR while enabling it.
 		 */
-		err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
+		err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
 		if (err) {
 			mlx5_ib_dereg_mr(&mr->ibmr, NULL);
 			return ERR_PTR(err);
@@ -1467,7 +1397,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
 	if (!umem_dmabuf->sgt)
 		return;
 
-	mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+	mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
 	ib_umem_dmabuf_unmap_pages(umem_dmabuf);
 }
 
@@ -1602,7 +1532,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 	mr->ibmr.length = new_umem->length;
 	mr->page_shift = order_base_2(page_size);
 	mr->umem = new_umem;
-	err = mlx5_ib_update_mr_pas(mr, upd_flags);
+	err = mlx5r_umr_update_mr_pas(mr, upd_flags);
 	if (err) {
 		/*
 		 * The MR is revoked at this point so there is no issue to free
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 99dc06f589d4..c00e70e74d94 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -715,7 +715,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
 		ib_umem_dmabuf_unmap_pages(umem_dmabuf);
 		err = -EINVAL;
 	} else {
-		err = mlx5_ib_update_mr_pas(mr, xlt_flags);
+		err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
 	}
 	dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
 
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index ba61e6280cb5..b56e3569c677 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -499,3 +499,162 @@ void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
 
 	return xlt;
 }
+
+static void
+mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+				  unsigned int flags, struct ib_sge *sg)
+{
+	if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+		/* fail if free */
+		ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
+	else
+		/* fail if not free */
+		ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
+	ctrl_seg->xlt_octowords =
+		cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+}
+
+static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
+					      struct mlx5_mkey_seg *mkey_seg,
+					      struct mlx5_ib_mr *mr,
+					      unsigned int page_shift)
+{
+	mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
+	MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
+	MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
+	MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
+	MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
+	MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
+	MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
+}
+
+static void
+mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
+				  struct ib_sge *sg)
+{
+	data_seg->byte_count = cpu_to_be32(sg->length);
+	data_seg->lkey = cpu_to_be32(sg->lkey);
+	data_seg->addr = cpu_to_be64(sg->addr);
+}
+
+static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+				    u64 offset)
+{
+	u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
+
+	ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
+	ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
+	ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+}
+
+static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
+				       struct mlx5r_umr_wqe *wqe,
+				       struct mlx5_ib_mr *mr, struct ib_sge *sg,
+				       unsigned int flags)
+{
+	bool update_pd_access, update_translation;
+
+	if (flags & MLX5_IB_UPD_XLT_ENABLE)
+		wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
+
+	update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
+			   flags & MLX5_IB_UPD_XLT_PD ||
+			   flags & MLX5_IB_UPD_XLT_ACCESS;
+
+	if (update_pd_access) {
+		wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
+		wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+	}
+
+	update_translation =
+		flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
+
+	if (update_translation) {
+		wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
+		if (!mr->ibmr.length)
+			MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
+	}
+
+	wqe->ctrl_seg.xlt_octowords =
+		cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+	wqe->data_seg.byte_count = cpu_to_be32(sg->length);
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
+ */
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+	struct device *ddev = &dev->mdev->pdev->dev;
+	struct mlx5r_umr_wqe wqe = {};
+	struct ib_block_iter biter;
+	struct mlx5_mtt *cur_mtt;
+	size_t orig_sg_length;
+	struct mlx5_mtt *mtt;
+	size_t final_size;
+	struct ib_sge sg;
+	u64 offset = 0;
+	int err = 0;
+
+	if (WARN_ON(mr->umem->is_odp))
+		return -EINVAL;
+
+	mtt = mlx5r_umr_create_xlt(
+		dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
+		sizeof(*mtt), flags);
+	if (!mtt)
+		return -ENOMEM;
+
+	orig_sg_length = sg.length;
+
+	mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
+	mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
+					  mr->page_shift);
+	mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
+
+	cur_mtt = mtt;
+	rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
+			    mr->umem->sgt_append.sgt.nents,
+			    BIT(mr->page_shift)) {
+		if (cur_mtt == (void *)mtt + sg.length) {
+			dma_sync_single_for_device(ddev, sg.addr, sg.length,
+						   DMA_TO_DEVICE);
+
+			err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
+						       true);
+			if (err)
+				goto err;
+			dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+						DMA_TO_DEVICE);
+			offset += sg.length;
+			mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
+
+			cur_mtt = mtt;
+		}
+
+		cur_mtt->ptag =
+			cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+				    MLX5_IB_MTT_PRESENT);
+
+		if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+			cur_mtt->ptag = 0;
+
+		cur_mtt++;
+	}
+
+	final_size = (void *)cur_mtt - (void *)mtt;
+	sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
+	memset(cur_mtt, 0, sg.length - final_size);
+	mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+
+	dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+	err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+
+err:
+	sg.length = orig_sg_length;
+	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 2485379fec32..3bb251f07f4e 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -99,5 +99,6 @@ void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
 void mlx5r_umr_free_xlt(void *xlt, size_t length);
 void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
 			      struct ib_sge *sg);
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
 
 #endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 11/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (9 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-12  7:24 ` [PATCH rdma-next 12/12] RDMA/mlx5: Clean UMR QP type flow from mlx5_ib_post_send() Leon Romanovsky
  2022-04-25 18:48 ` [PATCH rdma-next 00/12] Refactor UMR post send logic Jason Gunthorpe
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

Move mlx5_ib_update_mr_pas logic to umr.c, and use mlx5_umr_post_send_wait()
instead of mlx5_ib_post_send_wait().

Since it is the last use of mlx5_ib_post_send_wait(), remove it.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   2 -
 drivers/infiniband/hw/mlx5/mr.c      | 171 ---------------------------
 drivers/infiniband/hw/mlx5/odp.c     |  54 ++++-----
 drivers/infiniband/hw/mlx5/umr.c     |  93 ++++++++++++++-
 drivers/infiniband/hw/mlx5/umr.h     |   7 +-
 5 files changed, 117 insertions(+), 210 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ea4d5519df8c..10e60c9281b4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1287,8 +1287,6 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
 		      struct uverbs_attr_bundle *attrs);
 int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
-int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
-		       int page_shift, int flags);
 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 					     int access_flags);
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index df79fd5be5f2..9bfbd8d199ee 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -122,11 +122,6 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
 
-static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
-{
-	return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
-}
-
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
@@ -839,49 +834,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
 	return MLX5_MAX_UMR_SHIFT;
 }
 
-static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct mlx5_ib_umr_context *context =
-		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
-
-	context->status = wc->status;
-	complete(&context->done);
-}
-
-static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
-{
-	context->cqe.done = mlx5_ib_umr_done;
-	context->status = -1;
-	init_completion(&context->done);
-}
-
-static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
-				  struct mlx5_umr_wr *umrwr)
-{
-	struct umr_common *umrc = &dev->umrc;
-	const struct ib_send_wr *bad;
-	int err;
-	struct mlx5_ib_umr_context umr_context;
-
-	mlx5_ib_init_umr_context(&umr_context);
-	umrwr->wr.wr_cqe = &umr_context.cqe;
-
-	down(&umrc->sem);
-	err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
-	if (err) {
-		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
-	} else {
-		wait_for_completion(&umr_context.done);
-		if (umr_context.status != IB_WC_SUCCESS) {
-			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
-				     umr_context.status);
-			err = -EFAULT;
-		}
-	}
-	up(&umrc->sem);
-	return err;
-}
-
 static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
 						      unsigned int order)
 {
@@ -959,129 +911,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 	return mr;
 }
 
-/*
- * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
- * submission.
- */
-static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
-				   struct mlx5_umr_wr *wr, struct ib_sge *sg,
-				   size_t nents, size_t ent_size,
-				   unsigned int flags)
-{
-	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
-	void *xlt;
-
-	xlt = mlx5r_umr_create_xlt(dev, sg, nents, ent_size, flags);
-
-	memset(wr, 0, sizeof(*wr));
-	wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
-	if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
-		wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
-	wr->wr.sg_list = sg;
-	wr->wr.num_sge = 1;
-	wr->wr.opcode = MLX5_IB_WR_UMR;
-	wr->pd = mr->ibmr.pd;
-	wr->mkey = mr->mmkey.key;
-	wr->length = mr->ibmr.length;
-	wr->virt_addr = mr->ibmr.iova;
-	wr->access_flags = mr->access_flags;
-	wr->page_shift = mr->page_shift;
-	wr->xlt_size = sg->length;
-	return xlt;
-}
-
-static unsigned int xlt_wr_final_send_flags(unsigned int flags)
-{
-	unsigned int res = 0;
-
-	if (flags & MLX5_IB_UPD_XLT_ENABLE)
-		res |= MLX5_IB_SEND_UMR_ENABLE_MR |
-		       MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
-		       MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
-	if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
-		res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
-	if (flags & MLX5_IB_UPD_XLT_ADDR)
-		res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
-	return res;
-}
-
-int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
-		       int page_shift, int flags)
-{
-	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
-	struct device *ddev = &dev->mdev->pdev->dev;
-	void *xlt;
-	struct mlx5_umr_wr wr;
-	struct ib_sge sg;
-	int err = 0;
-	int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
-			       ? sizeof(struct mlx5_klm)
-			       : sizeof(struct mlx5_mtt);
-	const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
-	const int page_mask = page_align - 1;
-	size_t pages_mapped = 0;
-	size_t pages_to_map = 0;
-	size_t pages_iter;
-	size_t size_to_map = 0;
-	size_t orig_sg_length;
-
-	if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
-	    !umr_can_use_indirect_mkey(dev))
-		return -EPERM;
-
-	if (WARN_ON(!mr->umem->is_odp))
-		return -EINVAL;
-
-	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
-	 * so we need to align the offset and length accordingly
-	 */
-	if (idx & page_mask) {
-		npages += idx & page_mask;
-		idx &= ~page_mask;
-	}
-	pages_to_map = ALIGN(npages, page_align);
-
-	xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
-	if (!xlt)
-		return -ENOMEM;
-	pages_iter = sg.length / desc_size;
-	orig_sg_length = sg.length;
-
-	if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
-		struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
-		size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
-
-		pages_to_map = min_t(size_t, pages_to_map, max_pages);
-	}
-
-	wr.page_shift = page_shift;
-
-	for (pages_mapped = 0;
-	     pages_mapped < pages_to_map && !err;
-	     pages_mapped += pages_iter, idx += pages_iter) {
-		npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
-		size_to_map = npages * desc_size;
-		dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
-					DMA_TO_DEVICE);
-		mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
-		dma_sync_single_for_device(ddev, sg.addr, sg.length,
-					   DMA_TO_DEVICE);
-
-		sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
-
-		if (pages_mapped + pages_iter >= pages_to_map)
-			wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
-
-		wr.offset = idx * desc_size;
-		wr.xlt_size = sg.length;
-
-		err = mlx5_ib_post_send_wait(dev, &wr);
-	}
-	sg.length = orig_sg_length;
-	mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
-	return err;
-}
-
 /*
  * If ibmr is NULL it will be allocated by reg_create.
  * Else, the given ibmr will be used.
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index c00e70e74d94..66ac7bdc59f6 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -118,7 +118,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
 	 *
 	 *    xa_store()
 	 *    mutex_lock(umem_mutex)
-	 *     mlx5_ib_update_xlt()
+	 *     mlx5r_umr_update_xlt()
 	 *    mutex_unlock(umem_mutex)
 	 *    destroy lkey
 	 *
@@ -199,9 +199,9 @@ static void free_implicit_child_mr_work(struct work_struct *work)
 	mlx5r_deref_wait_odp_mkey(&mr->mmkey);
 
 	mutex_lock(&odp_imr->umem_mutex);
-	mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT,
-			   1, 0,
-			   MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
+	mlx5r_umr_update_xlt(mr->parent,
+			     ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0,
+			     MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
 	mutex_unlock(&odp_imr->umem_mutex);
 	mlx5_ib_dereg_mr(&mr->ibmr, NULL);
 
@@ -283,19 +283,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
 			u64 umr_offset = idx & umr_block_mask;
 
 			if (in_block && umr_offset == 0) {
-				mlx5_ib_update_xlt(mr, blk_start_idx,
-						   idx - blk_start_idx, 0,
-						   MLX5_IB_UPD_XLT_ZAP |
-						   MLX5_IB_UPD_XLT_ATOMIC);
+				mlx5r_umr_update_xlt(mr, blk_start_idx,
+						     idx - blk_start_idx, 0,
+						     MLX5_IB_UPD_XLT_ZAP |
+						     MLX5_IB_UPD_XLT_ATOMIC);
 				in_block = 0;
 			}
 		}
 	}
 	if (in_block)
-		mlx5_ib_update_xlt(mr, blk_start_idx,
-				   idx - blk_start_idx + 1, 0,
-				   MLX5_IB_UPD_XLT_ZAP |
-				   MLX5_IB_UPD_XLT_ATOMIC);
+		mlx5r_umr_update_xlt(mr, blk_start_idx,
+				     idx - blk_start_idx + 1, 0,
+				     MLX5_IB_UPD_XLT_ZAP |
+				     MLX5_IB_UPD_XLT_ATOMIC);
 
 	mlx5_update_odp_stats(mr, invalidations, invalidations);
 
@@ -442,11 +442,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
 	 */
 	refcount_set(&mr->mmkey.usecount, 2);
 
-	err = mlx5_ib_update_xlt(mr, 0,
-				 MLX5_IMR_MTT_ENTRIES,
-				 PAGE_SHIFT,
-				 MLX5_IB_UPD_XLT_ZAP |
-				 MLX5_IB_UPD_XLT_ENABLE);
+	err = mlx5r_umr_update_xlt(mr, 0,
+				   MLX5_IMR_MTT_ENTRIES,
+				   PAGE_SHIFT,
+				   MLX5_IB_UPD_XLT_ZAP |
+				   MLX5_IB_UPD_XLT_ENABLE);
 	if (err) {
 		ret = ERR_PTR(err);
 		goto out_mr;
@@ -513,12 +513,12 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 	imr->is_odp_implicit = true;
 	xa_init(&imr->implicit_children);
 
-	err = mlx5_ib_update_xlt(imr, 0,
-				 mlx5_imr_ksm_entries,
-				 MLX5_KSM_PAGE_SHIFT,
-				 MLX5_IB_UPD_XLT_INDIRECT |
-				 MLX5_IB_UPD_XLT_ZAP |
-				 MLX5_IB_UPD_XLT_ENABLE);
+	err = mlx5r_umr_update_xlt(imr, 0,
+				   mlx5_imr_ksm_entries,
+				   MLX5_KSM_PAGE_SHIFT,
+				   MLX5_IB_UPD_XLT_INDIRECT |
+				   MLX5_IB_UPD_XLT_ZAP |
+				   MLX5_IB_UPD_XLT_ENABLE);
 	if (err)
 		goto out_mr;
 
@@ -581,7 +581,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
 	 * No need to check whether the MTTs really belong to this MR, since
 	 * ib_umem_odp_map_dma_and_lock already checks this.
 	 */
-	ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
+	ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
 	mutex_unlock(&odp->umem_mutex);
 
 	if (ret < 0) {
@@ -679,9 +679,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
 	 * next pagefault handler will see the new information.
 	 */
 	mutex_lock(&odp_imr->umem_mutex);
-	err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
-				 MLX5_IB_UPD_XLT_INDIRECT |
-					 MLX5_IB_UPD_XLT_ATOMIC);
+	err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0,
+				   MLX5_IB_UPD_XLT_INDIRECT |
+					  MLX5_IB_UPD_XLT_ATOMIC);
 	mutex_unlock(&odp_imr->umem_mutex);
 	if (err) {
 		mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index b56e3569c677..ad9e31107901 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
 
+#include <rdma/ib_umem_odp.h>
 #include "mlx5_ib.h"
 #include "umr.h"
 #include "wr.h"
@@ -455,7 +456,7 @@ static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
 	return xlt_emergency_page;
 }
 
-void mlx5r_umr_free_xlt(void *xlt, size_t length)
+static void mlx5r_umr_free_xlt(void *xlt, size_t length)
 {
 	if (xlt == xlt_emergency_page) {
 		mutex_unlock(&xlt_emergency_page_mutex);
@@ -465,8 +466,8 @@ void mlx5r_umr_free_xlt(void *xlt, size_t length)
 	free_pages((unsigned long)xlt, get_order(length));
 }
 
-void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
-			     struct ib_sge *sg)
+static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+				     struct ib_sge *sg)
 {
 	struct device *ddev = &dev->mdev->pdev->dev;
 
@@ -477,8 +478,9 @@ void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
 /*
  * Create an XLT buffer ready for submission.
  */
-void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
-			  size_t nents, size_t ent_size, unsigned int flags)
+static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
+				  size_t nents, size_t ent_size,
+				  unsigned int flags)
 {
 	struct device *ddev = &dev->mdev->pdev->dev;
 	dma_addr_t dma;
@@ -658,3 +660,84 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
 	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
 	return err;
 }
+
+static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
+{
+	return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
+}
+
+int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+			 int page_shift, int flags)
+{
+	int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+			       ? sizeof(struct mlx5_klm)
+			       : sizeof(struct mlx5_mtt);
+	const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
+	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+	struct device *ddev = &dev->mdev->pdev->dev;
+	const int page_mask = page_align - 1;
+	struct mlx5r_umr_wqe wqe = {};
+	size_t pages_mapped = 0;
+	size_t pages_to_map = 0;
+	size_t size_to_map = 0;
+	size_t orig_sg_length;
+	size_t pages_iter;
+	struct ib_sge sg;
+	int err = 0;
+	void *xlt;
+
+	if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
+	    !umr_can_use_indirect_mkey(dev))
+		return -EPERM;
+
+	if (WARN_ON(!mr->umem->is_odp))
+		return -EINVAL;
+
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+	 * so we need to align the offset and length accordingly
+	 */
+	if (idx & page_mask) {
+		npages += idx & page_mask;
+		idx &= ~page_mask;
+	}
+	pages_to_map = ALIGN(npages, page_align);
+
+	xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
+	if (!xlt)
+		return -ENOMEM;
+
+	pages_iter = sg.length / desc_size;
+	orig_sg_length = sg.length;
+
+	if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+		struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+		size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+
+		pages_to_map = min_t(size_t, pages_to_map, max_pages);
+	}
+
+	mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
+	mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
+	mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
+
+	for (pages_mapped = 0;
+	     pages_mapped < pages_to_map && !err;
+	     pages_mapped += pages_iter, idx += pages_iter) {
+		npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
+		size_to_map = npages * desc_size;
+		dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+					DMA_TO_DEVICE);
+		mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+		dma_sync_single_for_device(ddev, sg.addr, sg.length,
+					   DMA_TO_DEVICE);
+		sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
+
+		if (pages_mapped + pages_iter >= pages_to_map)
+			mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+		mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
+		err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+	}
+	sg.length = orig_sg_length;
+	mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 3bb251f07f4e..b42e8c93b385 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -94,11 +94,8 @@ struct mlx5r_umr_wqe {
 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 			      int access_flags);
-void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
-			   size_t nents, size_t ent_size, unsigned int flags);
-void mlx5r_umr_free_xlt(void *xlt, size_t length);
-void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
-			      struct ib_sge *sg);
 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
+int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+			 int page_shift, int flags);
 
 #endif /* _MLX5_IB_UMR_H */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH rdma-next 12/12] RDMA/mlx5: Clean UMR QP type flow from mlx5_ib_post_send()
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (10 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 11/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt Leon Romanovsky
@ 2022-04-12  7:24 ` Leon Romanovsky
  2022-04-25 18:48 ` [PATCH rdma-next 00/12] Refactor UMR post send logic Jason Gunthorpe
  12 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2022-04-12  7:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

From: Aharon Landau <aharonl@nvidia.com>

No internal UMR operation is using mlx5_ib_post_send(), remove the UMR QP
type logic from this function.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 27 +----------
 drivers/infiniband/hw/mlx5/umr.c     | 43 -----------------
 drivers/infiniband/hw/mlx5/umr.h     |  4 --
 drivers/infiniband/hw/mlx5/wr.c      | 72 ----------------------------
 4 files changed, 1 insertion(+), 145 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 10e60c9281b4..df2b566ad73d 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -291,16 +291,9 @@ struct mlx5_ib_flow_db {
 };
 
 /* Use macros here so that don't have to duplicate
- * enum ib_send_flags and enum ib_qp_type for low-level driver
+ * enum ib_qp_type for low-level driver
  */
 
-#define MLX5_IB_SEND_UMR_ENABLE_MR	       (IB_SEND_RESERVED_START << 0)
-#define MLX5_IB_SEND_UMR_DISABLE_MR	       (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE	       (IB_SEND_RESERVED_START << 2)
-#define MLX5_IB_SEND_UMR_UPDATE_XLT	       (IB_SEND_RESERVED_START << 3)
-#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION    (IB_SEND_RESERVED_START << 4)
-#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS       IB_SEND_RESERVED_END
-
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
 /*
  * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
@@ -536,24 +529,6 @@ struct mlx5_ib_cq_buf {
 	int			nent;
 };
 
-struct mlx5_umr_wr {
-	struct ib_send_wr		wr;
-	u64				virt_addr;
-	u64				offset;
-	struct ib_pd		       *pd;
-	unsigned int			page_shift;
-	unsigned int			xlt_size;
-	u64				length;
-	int				access_flags;
-	u32				mkey;
-	u8				ignore_free_state:1;
-};
-
-static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
-{
-	return container_of(wr, struct mlx5_umr_wr, wr);
-}
-
 enum mlx5_ib_cq_pr_flags {
 	MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD	= 1 << 0,
 	MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1,
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index ad9e31107901..3a48364c0918 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -94,49 +94,6 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
 	return 0;
 }
 
-int mlx5r_umr_set_umr_ctrl_seg(struct mlx5_ib_dev *dev,
-			       struct mlx5_wqe_umr_ctrl_seg *umr,
-			       const struct ib_send_wr *wr)
-{
-	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
-	memset(umr, 0, sizeof(*umr));
-
-	if (!umrwr->ignore_free_state) {
-		if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
-			 /* fail if free */
-			umr->flags = MLX5_UMR_CHECK_FREE;
-		else
-			/* fail if not free */
-			umr->flags = MLX5_UMR_CHECK_NOT_FREE;
-	}
-
-	umr->xlt_octowords =
-		cpu_to_be16(mlx5r_umr_get_xlt_octo(umrwr->xlt_size));
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
-		u64 offset = mlx5r_umr_get_xlt_octo(umrwr->offset);
-
-		umr->xlt_offset = cpu_to_be16(offset & 0xffff);
-		umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
-		umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
-	}
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
-		umr->mkey_mask |= get_umr_update_translation_mask();
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
-		umr->mkey_mask |= get_umr_update_access_mask(dev);
-		umr->mkey_mask |= get_umr_update_pd_mask();
-	}
-	if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
-		umr->mkey_mask |= get_umr_enable_mr_mask();
-	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
-		umr->mkey_mask |= get_umr_disable_mr_mask();
-
-	if (!wr->num_sge)
-		umr->flags |= MLX5_UMR_INLINE;
-
-	return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
-}
-
 enum {
 	MAX_UMR_WR = 128,
 };
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index b42e8c93b385..c9d0021381a2 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -75,10 +75,6 @@ static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
 	       MLX5_IB_UMR_OCTOWORD;
 }
 
-int mlx5r_umr_set_umr_ctrl_seg(struct mlx5_ib_dev *dev,
-			       struct mlx5_wqe_umr_ctrl_seg *umr,
-			       const struct ib_send_wr *wr);
-
 struct mlx5r_umr_context {
 	struct ib_cqe cqe;
 	enum ib_wc_status status;
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 7949f83b2cd2..855f3f4fefad 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -214,43 +214,6 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
 	seg->status = MLX5_MKEY_STATUS_FREE;
 }
 
-static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
-				 struct mlx5_mkey_seg *seg,
-				 const struct ib_send_wr *wr)
-{
-	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
-	memset(seg, 0, sizeof(*seg));
-	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
-		MLX5_SET(mkc, seg, free, 1);
-
-	MLX5_SET(mkc, seg, a,
-		 !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
-	MLX5_SET(mkc, seg, rw,
-		 !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
-	MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
-	MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
-	MLX5_SET(mkc, seg, lr, 1);
-	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
-		MLX5_SET(mkc, seg, relaxed_ordering_write,
-			 !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
-	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
-		MLX5_SET(mkc, seg, relaxed_ordering_read,
-			 !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
-
-	if (umrwr->pd)
-		MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
-	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
-	    !umrwr->length)
-		MLX5_SET(mkc, seg, length64, 1);
-
-	MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
-	MLX5_SET64(mkc, seg, len, umrwr->length);
-	MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
-	MLX5_SET(mkc, seg, qpn, 0xffffff);
-	MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
-}
-
 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
 			     struct mlx5_ib_mr *mr,
 			     struct mlx5_ib_pd *pd)
@@ -1059,35 +1022,6 @@ static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
 	}
 }
 
-static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-			      const struct ib_send_wr *wr,
-			      struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
-			      int *size, void **cur_edge, unsigned int idx)
-{
-	int err = 0;
-
-	if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
-		err = -EINVAL;
-		mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
-		goto out;
-	}
-
-	qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
-	(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
-	err = mlx5r_umr_set_umr_ctrl_seg(dev, *seg, wr);
-	if (unlikely(err))
-		goto out;
-	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
-	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
-	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-	set_reg_mkey_segment(dev, *seg, wr);
-	*seg += sizeof(struct mlx5_mkey_seg);
-	*size += sizeof(struct mlx5_mkey_seg) / 16;
-	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-out:
-	return err;
-}
-
 void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
 		   struct mlx5_wqe_ctrl_seg *ctrl)
 {
@@ -1220,12 +1154,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		case IB_QPT_UD:
 			handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
 			break;
-		case MLX5_IB_QPT_REG_UMR:
-			err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
-						       &size, &cur_edge, idx);
-			if (unlikely(err))
-				goto out;
-			break;
 
 		default:
 			break;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH rdma-next 09/12] RDMA/mlx5: Move creation and free of translation tables to umr.c
  2022-04-12  7:24 ` [PATCH rdma-next 09/12] RDMA/mlx5: Move creation and free of translation tables to umr.c Leon Romanovsky
@ 2022-04-25 15:11   ` Jason Gunthorpe
  0 siblings, 0 replies; 17+ messages in thread
From: Jason Gunthorpe @ 2022-04-25 15:11 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

On Tue, Apr 12, 2022 at 10:24:04AM +0300, Leon Romanovsky wrote:

> +void mlx5r_umr_free_xlt(void *xlt, size_t length)
> +{

This should be made static in this patch, not in the later one

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH rdma-next 03/12] RDMA/mlx5: Move mkey ctrl segment logic to umr.c
  2022-04-12  7:23 ` [PATCH rdma-next 03/12] RDMA/mlx5: Move mkey ctrl segment logic to umr.c Leon Romanovsky
@ 2022-04-25 15:22   ` Jason Gunthorpe
  0 siblings, 0 replies; 17+ messages in thread
From: Jason Gunthorpe @ 2022-04-25 15:22 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

On Tue, Apr 12, 2022 at 10:23:58AM +0300, Leon Romanovsky wrote:
> From: Aharon Landau <aharonl@nvidia.com>
> 
> Move set_reg_umr_segment() and its helpers to umr.c.
> 
> Signed-off-by: Aharon Landau <aharonl@nvidia.com>
> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
>  drivers/infiniband/hw/mlx5/mlx5_ib.h |   3 -
>  drivers/infiniband/hw/mlx5/qp.c      |   1 +
>  drivers/infiniband/hw/mlx5/umr.c     | 129 ++++++++++++++++++++++++
>  drivers/infiniband/hw/mlx5/umr.h     |  13 +++
>  drivers/infiniband/hw/mlx5/wr.c      | 142 +--------------------------
>  5 files changed, 147 insertions(+), 141 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index 18ba11e4e2e6..d77a27503488 100644
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -311,9 +311,6 @@ struct mlx5_ib_flow_db {
>  #define MLX5_IB_QPT_DCT		IB_QPT_RESERVED4
>  #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
>  
> -#define MLX5_IB_UMR_OCTOWORD	       16
> -#define MLX5_IB_UMR_XLT_ALIGNMENT      64
> -
>  #define MLX5_IB_UPD_XLT_ZAP	      BIT(0)
>  #define MLX5_IB_UPD_XLT_ENABLE	      BIT(1)
>  #define MLX5_IB_UPD_XLT_ATOMIC	      BIT(2)
> diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
> index 3f467557d34e..d2f243d3c4e2 100644
> +++ b/drivers/infiniband/hw/mlx5/qp.c
> @@ -40,6 +40,7 @@
>  #include "ib_rep.h"
>  #include "counters.h"
>  #include "cmd.h"
> +#include "umr.h"
>  #include "qp.h"
>  #include "wr.h"
>  
> diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
> index 46eaf919eb49..d3626a9dc8ab 100644
> +++ b/drivers/infiniband/hw/mlx5/umr.c
> @@ -4,6 +4,135 @@
>  #include "mlx5_ib.h"
>  #include "umr.h"
>  
> +static __be64 get_umr_enable_mr_mask(void)
> +{
> +	u64 result;
> +
> +	result = MLX5_MKEY_MASK_KEY |
> +		 MLX5_MKEY_MASK_FREE;
> +
> +	return cpu_to_be64(result);
> +}
> +
> +static __be64 get_umr_disable_mr_mask(void)
> +{
> +	u64 result;
> +
> +	result = MLX5_MKEY_MASK_FREE;
> +
> +	return cpu_to_be64(result);
> +}
> +
> +static __be64 get_umr_update_translation_mask(void)
> +{
> +	u64 result;
> +
> +	result = MLX5_MKEY_MASK_LEN |
> +		 MLX5_MKEY_MASK_PAGE_SIZE |
> +		 MLX5_MKEY_MASK_START_ADDR;
> +
> +	return cpu_to_be64(result);
> +}

This is pretty ugly, it is fine to copy it, but an add-on patch to
remove these function wrappers would be nice

#define UMR_MR_MASK_DISABLE cpu_to_be64(MLX5_MKEY_MASK_FREE)

At worst (and arguably it can just be open coded)

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH rdma-next 10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas
  2022-04-12  7:24 ` [PATCH rdma-next 10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas Leon Romanovsky
@ 2022-04-25 17:43   ` Jason Gunthorpe
  0 siblings, 0 replies; 17+ messages in thread
From: Jason Gunthorpe @ 2022-04-25 17:43 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Aharon Landau, linux-rdma, Michael Guralnik

On Tue, Apr 12, 2022 at 10:24:05AM +0300, Leon Romanovsky wrote:

> +/*
> + * Send the DMA list to the HW for a normal MR using UMR.
> + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
> + * flag may be used.
> + */
> +int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
> +{

I would prefer to see zap split into its own function, it shouldn't
call rdma_for_each_block at all - the only reason it was structured
this way in the first place was because everything had to be squeezed
into a WR struct.

Then all the places calling

mlx5r_umr_update_xlt(.. MLX5_IB_UPD_XLT_ZAP) 

should call this new function instead as well.

Zapping an ODP or zapping an normal mkey should just be the same
logic.

Also, looking at it, it would be futher nice if the duplication
between mlx5r_umr_update_xlt() and mlx5r_umr_update_mr_pas() could be
removed and perhaps organized into a way to make it logical to
eliminate the mlx5_odp_populate_xlt() "callback"

Which would give four entry points:

'umr fill xlt from sgl' (mlx5r_umr_update_mr_pas)
'umr fill xlt from dma_list' (populate_mtt)
'umr fill klm from xarray' (populate_klm)
'umr fill xlt/klm with zero' (zap)

I'd imagine a general construction to consolidate more stuff into
mlx5r_umr_create_xlt(), probably having it return a struct, including
setting up the initial wqe so the above are slimmer.

Maybe in a followup series though

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH rdma-next 00/12] Refactor UMR post send logic
  2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
                   ` (11 preceding siblings ...)
  2022-04-12  7:24 ` [PATCH rdma-next 12/12] RDMA/mlx5: Clean UMR QP type flow from mlx5_ib_post_send() Leon Romanovsky
@ 2022-04-25 18:48 ` Jason Gunthorpe
  12 siblings, 0 replies; 17+ messages in thread
From: Jason Gunthorpe @ 2022-04-25 18:48 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Leon Romanovsky, Aharon Landau, linux-kernel, linux-rdma,
	Michael Guralnik

On Tue, Apr 12, 2022 at 10:23:55AM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> Hi,
> 
> UMR are special QPs that require slightly different logic than other
> QPs. This series from Aharon refactors the logic around UMR QP to
> separate file and functions to clean the post send flow.
> 
> Thanks
> 
> Aharon Landau (12):
>   RDMA/mlx5: Move init and cleanup of UMR to umr.c
>   RDMA/mlx5: Move umr checks to umr.h
>   RDMA/mlx5: Move mkey ctrl segment logic to umr.c
>   RDMA/mlx5: Simplify get_umr_update_access_mask()
>   RDMA/mlx5: Expose wqe posting helpers outside of wr.c
>   RDMA/mlx5: Introduce mlx5_umr_post_send_wait()
>   RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs
>   RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access
>   RDMA/mlx5: Move creation and free of translation tables to umr.c
>   RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas
>   RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt
>   RDMA/mlx5: Clean UMR QP type flow from mlx5_ib_post_send()

I moved the static to the earlier patch, but otherwise applied to
for-next, thanks

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2022-04-25 18:48 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-12  7:23 [PATCH rdma-next 00/12] Refactor UMR post send logic Leon Romanovsky
2022-04-12  7:23 ` [PATCH rdma-next 01/12] RDMA/mlx5: Move init and cleanup of UMR to umr.c Leon Romanovsky
2022-04-12  7:23 ` [PATCH rdma-next 02/12] RDMA/mlx5: Move umr checks to umr.h Leon Romanovsky
2022-04-12  7:23 ` [PATCH rdma-next 03/12] RDMA/mlx5: Move mkey ctrl segment logic to umr.c Leon Romanovsky
2022-04-25 15:22   ` Jason Gunthorpe
2022-04-12  7:23 ` [PATCH rdma-next 04/12] RDMA/mlx5: Simplify get_umr_update_access_mask() Leon Romanovsky
2022-04-12  7:24 ` [PATCH rdma-next 05/12] RDMA/mlx5: Expose wqe posting helpers outside of wr.c Leon Romanovsky
2022-04-12  7:24 ` [PATCH rdma-next 06/12] RDMA/mlx5: Introduce mlx5_umr_post_send_wait() Leon Romanovsky
2022-04-12  7:24 ` [PATCH rdma-next 07/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs Leon Romanovsky
2022-04-12  7:24 ` [PATCH rdma-next 08/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access Leon Romanovsky
2022-04-12  7:24 ` [PATCH rdma-next 09/12] RDMA/mlx5: Move creation and free of translation tables to umr.c Leon Romanovsky
2022-04-25 15:11   ` Jason Gunthorpe
2022-04-12  7:24 ` [PATCH rdma-next 10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas Leon Romanovsky
2022-04-25 17:43   ` Jason Gunthorpe
2022-04-12  7:24 ` [PATCH rdma-next 11/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt Leon Romanovsky
2022-04-12  7:24 ` [PATCH rdma-next 12/12] RDMA/mlx5: Clean UMR QP type flow from mlx5_ib_post_send() Leon Romanovsky
2022-04-25 18:48 ` [PATCH rdma-next 00/12] Refactor UMR post send logic Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.