linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yishai Hadas <yishaih@nvidia.com>
To: <linux-rdma@vger.kernel.org>
Cc: <jgg@nvidia.com>, <yishaih@nvidia.com>, <maorg@nvidia.com>,
	<markzhang@nvidia.com>, <edwards@nvidia.com>
Subject: [PATCH rdma-core 16/27] mlx5: Support initial DEVX/DV APIs over vfio
Date: Tue, 20 Jul 2021 11:16:36 +0300	[thread overview]
Message-ID: <20210720081647.1980-17-yishaih@nvidia.com> (raw)
In-Reply-To: <20210720081647.1980-1-yishaih@nvidia.com>

Support initial DEVX/DV APIs over vfio for UMEM/UAR/EQN usage.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 providers/mlx5/mlx5_ifc.h  |  70 ++++++++++++++
 providers/mlx5/mlx5_vfio.c | 228 ++++++++++++++++++++++++++++++++++++++++++++-
 providers/mlx5/mlx5_vfio.h |  10 ++
 3 files changed, 307 insertions(+), 1 deletion(-)

diff --git a/providers/mlx5/mlx5_ifc.h b/providers/mlx5/mlx5_ifc.h
index 1cbe846..1bd7466 100644
--- a/providers/mlx5/mlx5_ifc.h
+++ b/providers/mlx5/mlx5_ifc.h
@@ -88,6 +88,8 @@ enum {
 	MLX5_CMD_OP_CREATE_GENERAL_OBJECT = 0xa00,
 	MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01,
 	MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02,
+	MLX5_CMD_OP_CREATE_UMEM = 0xa08,
+	MLX5_CMD_OP_DESTROY_UMEM = 0xa0a,
 	MLX5_CMD_OP_SYNC_STEERING = 0xb00,
 };
 
@@ -4656,4 +4658,72 @@ struct mlx5_ifc_dealloc_pd_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_mtt_bits {
+	u8         ptag_63_32[0x20];
+
+	u8         ptag_31_8[0x18];
+	u8         reserved_at_38[0x6];
+	u8         wr_en[0x1];
+	u8         rd_en[0x1];
+};
+
+struct mlx5_ifc_umem_bits {
+	u8         reserved_at_0[0x80];
+
+	u8         reserved_at_80[0x1b];
+	u8         log_page_size[0x5];
+
+	u8         page_offset[0x20];
+
+	u8         num_of_mtt[0x40];
+
+	struct mlx5_ifc_mtt_bits  mtt[];
+};
+
+struct mlx5_ifc_create_umem_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_umem_bits  umem;
+};
+
+struct mlx5_ifc_create_umem_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         umem_id[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_destroy_umem_in_bits {
+	u8        opcode[0x10];
+	u8        uid[0x10];
+
+	u8        reserved_at_20[0x10];
+	u8        op_mod[0x10];
+
+	u8        reserved_at_40[0x8];
+	u8        umem_id[0x18];
+
+	u8        reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_destroy_umem_out_bits {
+	u8        status[0x8];
+	u8        reserved_at_8[0x18];
+
+	u8        syndrome[0x20];
+
+	u8        reserved_at_40[0x40];
+};
+
 #endif /* MLX5_IFC_H */
diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c
index 23c6eeb..5e55697 100644
--- a/providers/mlx5/mlx5_vfio.c
+++ b/providers/mlx5/mlx5_vfio.c
@@ -37,6 +37,8 @@ enum {
 	MLX5_VFIO_SUPP_MR_ACCESS_FLAGS = IBV_ACCESS_LOCAL_WRITE |
 		IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ |
 		IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_RELAXED_ORDERING,
+	MLX5_VFIO_SUPP_UMEM_ACCESS_FLAGS = IBV_ACCESS_LOCAL_WRITE |
+		IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ,
 };
 
 static int mlx5_vfio_give_pages(struct mlx5_vfio_context *ctx, uint16_t func_id,
@@ -173,7 +175,6 @@ static void mlx5_vfio_free_page(struct mlx5_vfio_context *ctx, uint64_t iova)
 		bitmap_set_bit(page_block->free_pages, pg);
 		if (bitmap_full(page_block->free_pages, MLX5_VFIO_BLOCK_NUM_PAGES))
 			mlx5_vfio_free_block(ctx, page_block);
-
 		goto end;
 	}
 
@@ -2467,6 +2468,220 @@ vfio_devx_obj_create(struct ibv_context *context, const void *in,
 	return NULL;
 }
 
+static int vfio_devx_query_eqn(struct ibv_context *ibctx, uint32_t vector,
+			       uint32_t *eqn)
+{
+	struct mlx5_vfio_context *ctx = to_mvfio_ctx(ibctx);
+
+	if (vector > ibctx->num_comp_vectors - 1)
+		return EINVAL;
+
+	/* For now use the singleton EQN created for async events */
+	*eqn = ctx->async_eq.eqn;
+	return 0;
+}
+
+static struct mlx5dv_devx_uar *
+vfio_devx_alloc_uar(struct ibv_context *ibctx, uint32_t flags)
+{
+	struct mlx5_vfio_context *ctx = to_mvfio_ctx(ibctx);
+	struct mlx5_devx_uar *uar;
+
+	if (flags != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) {
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	uar = calloc(1, sizeof(*uar));
+	if (!uar) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	uar->dv_devx_uar.page_id = ctx->eqs_uar.uarn;
+	uar->dv_devx_uar.base_addr = (void *)ctx->eqs_uar.iova;
+	uar->dv_devx_uar.reg_addr = uar->dv_devx_uar.base_addr + MLX5_BF_OFFSET;
+	uar->context = ibctx;
+
+	return &uar->dv_devx_uar;
+}
+
+static void vfio_devx_free_uar(struct mlx5dv_devx_uar *dv_devx_uar)
+{
+	free(dv_devx_uar);
+}
+
+static struct mlx5dv_devx_umem *
+_vfio_devx_umem_reg(struct ibv_context *context,
+		    void *addr, size_t size, uint32_t access,
+		    uint64_t pgsz_bitmap)
+{
+	struct mlx5_vfio_context *ctx = to_mvfio_ctx(context);
+	uint32_t out[DEVX_ST_SZ_DW(create_umem_out)] = {};
+	struct mlx5_vfio_devx_umem *vfio_umem;
+	int iova_page_shift;
+	uint64_t iova_size;
+	int ret;
+	void *in;
+	uint32_t inlen;
+	__be64 *mtt;
+	void *umem;
+	bool writeable;
+	void *aligned_va;
+	int num_pas;
+
+	if (!check_comp_mask(access, MLX5_VFIO_SUPP_UMEM_ACCESS_FLAGS)) {
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	if ((access & IBV_ACCESS_REMOTE_WRITE) &&
+	    !(access & IBV_ACCESS_LOCAL_WRITE)) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* Page size that encloses the start and end of the umem range */
+	iova_size = max(roundup_pow_of_two(size + ((uint64_t) addr & (ctx->iova_min_page_size - 1))),
+			ctx->iova_min_page_size);
+
+	if (!(iova_size & pgsz_bitmap)) {
+		/* input should include the iova page size */
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	writeable = access &
+		(IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+
+	vfio_umem = calloc(1, sizeof(*vfio_umem));
+	if (!vfio_umem) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	vfio_umem->iova_size = iova_size;
+	if (ibv_dontfork_range(addr, size))
+		goto err;
+
+	ret = iset_alloc_range(ctx->iova_alloc, vfio_umem->iova_size, &vfio_umem->iova);
+	if (ret)
+		goto err_alloc;
+
+	/* The registration's arguments have to reflect real VA presently mapped into the process */
+	aligned_va = (void *) ((unsigned long) addr & ~(ctx->iova_min_page_size - 1));
+	vfio_umem->iova_reg_size = align((addr + size) - aligned_va, ctx->iova_min_page_size);
+	ret = mlx5_vfio_register_mem(ctx, aligned_va, vfio_umem->iova, vfio_umem->iova_reg_size);
+	if (ret)
+		goto err_reg;
+
+	iova_page_shift = ilog32(vfio_umem->iova_size - 1);
+	num_pas = 1;
+	if (iova_page_shift > MLX5_MAX_PAGE_SHIFT) {
+		iova_page_shift = MLX5_MAX_PAGE_SHIFT;
+		num_pas = DIV_ROUND_UP(vfio_umem->iova_size, (1ULL << iova_page_shift));
+	}
+
+	inlen = DEVX_ST_SZ_BYTES(create_umem_in) + DEVX_ST_SZ_BYTES(mtt) * num_pas;
+
+	in = calloc(1, inlen);
+	if (!in) {
+		errno = ENOMEM;
+		goto err_in;
+	}
+
+	umem = DEVX_ADDR_OF(create_umem_in, in, umem);
+	mtt = (__be64 *)DEVX_ADDR_OF(umem, umem, mtt);
+
+	DEVX_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
+	DEVX_SET64(umem, umem, num_of_mtt, num_pas);
+	DEVX_SET(umem, umem, log_page_size, iova_page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	DEVX_SET(umem, umem, page_offset, addr - aligned_va);
+
+	mlx5_vfio_populate_pas(vfio_umem->iova, num_pas, (1ULL << iova_page_shift), mtt,
+			       (writeable ? MLX5_MTT_WRITE : 0) | MLX5_MTT_READ);
+
+	ret = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), 0);
+	if (ret)
+		goto err_exec;
+
+	free(in);
+
+	vfio_umem->dv_devx_umem.umem_id = DEVX_GET(create_umem_out, out, umem_id);
+	vfio_umem->context = context;
+	vfio_umem->addr = addr;
+	vfio_umem->size = size;
+	return &vfio_umem->dv_devx_umem;
+
+err_exec:
+	free(in);
+err_in:
+	mlx5_vfio_unregister_mem(ctx, vfio_umem->iova, vfio_umem->iova_reg_size);
+err_reg:
+	iset_insert_range(ctx->iova_alloc, vfio_umem->iova, vfio_umem->iova_size);
+err_alloc:
+	ibv_dofork_range(addr, size);
+err:
+	free(vfio_umem);
+	return NULL;
+}
+
+static struct mlx5dv_devx_umem *
+vfio_devx_umem_reg(struct ibv_context *context,
+		   void *addr, size_t size, uint32_t access)
+{
+	return _vfio_devx_umem_reg(context, addr, size, access, UINT64_MAX);
+}
+
+static struct mlx5dv_devx_umem *
+vfio_devx_umem_reg_ex(struct ibv_context *ctx, struct mlx5dv_devx_umem_in *in)
+{
+	if (!check_comp_mask(in->comp_mask, 0)) {
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	return _vfio_devx_umem_reg(ctx, in->addr, in->size, in->access, in->pgsz_bitmap);
+}
+
+static int vfio_devx_umem_dereg(struct mlx5dv_devx_umem *dv_devx_umem)
+{
+	struct mlx5_vfio_devx_umem *vfio_umem =
+		container_of(dv_devx_umem, struct mlx5_vfio_devx_umem,
+			     dv_devx_umem);
+	struct mlx5_vfio_context *ctx = to_mvfio_ctx(vfio_umem->context);
+	uint32_t in[DEVX_ST_SZ_DW(create_umem_in)] = {};
+	uint32_t out[DEVX_ST_SZ_DW(create_umem_out)] = {};
+	int ret;
+
+	DEVX_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
+	DEVX_SET(destroy_umem_in, in, umem_id, dv_devx_umem->umem_id);
+
+	ret = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+	if (ret)
+		return ret;
+
+	mlx5_vfio_unregister_mem(ctx, vfio_umem->iova, vfio_umem->iova_reg_size);
+	iset_insert_range(ctx->iova_alloc, vfio_umem->iova, vfio_umem->iova_size);
+	ibv_dofork_range(vfio_umem->addr, vfio_umem->size);
+	free(vfio_umem);
+	return 0;
+}
+
+static int vfio_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type)
+{
+	struct ibv_pd *pd_in = obj->pd.in;
+	struct mlx5dv_pd *pd_out = obj->pd.out;
+	struct mlx5_pd *mpd = to_mpd(pd_in);
+
+	if (obj_type != MLX5DV_OBJ_PD)
+		return EOPNOTSUPP;
+
+	pd_out->comp_mask = 0;
+	pd_out->pdn = mpd->pdn;
+	return 0;
+}
+
 static int vfio_devx_obj_query(struct mlx5dv_devx_obj *obj, const void *in,
 				size_t inlen, void *out, size_t outlen)
 {
@@ -2476,6 +2691,13 @@ static int vfio_devx_obj_query(struct mlx5dv_devx_obj *obj, const void *in,
 static struct mlx5_dv_context_ops mlx5_vfio_dv_ctx_ops = {
 	.devx_obj_create = vfio_devx_obj_create,
 	.devx_obj_query = vfio_devx_obj_query,
+	.devx_query_eqn = vfio_devx_query_eqn,
+	.devx_alloc_uar = vfio_devx_alloc_uar,
+	.devx_free_uar = vfio_devx_free_uar,
+	.devx_umem_reg = vfio_devx_umem_reg,
+	.devx_umem_reg_ex = vfio_devx_umem_reg_ex,
+	.devx_umem_dereg = vfio_devx_umem_dereg,
+	.init_obj = vfio_init_obj,
 };
 
 static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx)
@@ -2544,6 +2766,10 @@ mlx5_vfio_alloc_context(struct ibv_device *ibdev,
 
 	verbs_set_ops(&mctx->vctx, &mlx5_vfio_common_ops);
 	mctx->dv_ctx_ops = &mlx5_vfio_dv_ctx_ops;
+
+	/* For now only a singelton EQ is supported */
+	mctx->vctx.context.num_comp_vectors = 1;
+
 	return &mctx->vctx;
 
 func_teardown:
diff --git a/providers/mlx5/mlx5_vfio.h b/providers/mlx5/mlx5_vfio.h
index 79b8033..766c48c 100644
--- a/providers/mlx5/mlx5_vfio.h
+++ b/providers/mlx5/mlx5_vfio.h
@@ -47,6 +47,16 @@ struct mlx5_vfio_mr {
 	uint64_t iova_reg_size;
 };
 
+struct mlx5_vfio_devx_umem {
+	struct mlx5dv_devx_umem dv_devx_umem;
+	struct ibv_context *context;
+	void *addr;
+	size_t size;
+	uint64_t iova;
+	uint64_t iova_size;
+	uint64_t iova_reg_size;
+};
+
 struct mlx5_vfio_device {
 	struct verbs_device vdev;
 	char *pci_name;
-- 
1.8.3.1


  parent reply	other threads:[~2021-07-20  8:18 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-20  8:16 [PATCH rdma-core 00/27] Introduce mlx5 user space driver over VFIO Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 01/27] Update kernel headers Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 02/27] mlx5: Introduce mlx5dv_get_vfio_device_list() Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 03/27] mlx5: Enable debug functionality for vfio Yishai Hadas
2021-07-20  8:51   ` Leon Romanovsky
2021-07-20  9:27     ` Yishai Hadas
2021-07-20 12:27       ` Leon Romanovsky
2021-07-20 14:57         ` Yishai Hadas
2021-07-21  7:05           ` Gal Pressman
2021-07-21  7:58             ` Yishai Hadas
2021-07-21  8:51               ` Gal Pressman
2021-07-20  8:16 ` [PATCH rdma-core 04/27] util: Add interval_set support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 05/27] verbs: Enable verbs_open_device() to work over non sysfs devices Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 06/27] mlx5: Setup mlx5 vfio context Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 07/27] mlx5: Add mlx5_vfio_cmd_exec() support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 08/27] mlx5: vfio setup function support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 09/27] mlx5: vfio setup basic caps Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 10/27] mlx5: Support fast teardown over vfio Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 11/27] mlx5: Enable interrupt command mode " Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 12/27] mlx5: Introduce vfio APIs to process events Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 13/27] mlx5: VFIO poll_health support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 14/27] mlx5: Implement basic verbs operation for PD and MR over vfio Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 15/27] mlx5: Set DV context ops Yishai Hadas
2021-07-20  8:16 ` Yishai Hadas [this message]
2021-07-20  8:16 ` [PATCH rdma-core 17/27] mlx5: Implement mlx5dv devx_obj APIs over vfio Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 18/27] pyverbs: Support DevX UMEM registration Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 19/27] pyverbs/mlx5: Support EQN querying Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 20/27] pyverbs/mlx5: Support more DevX objects Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 21/27] pyverbs: Add auxiliary memory functions Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 22/27] pyverbs/mlx5: Add support to extract mlx5dv objects Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 23/27] pyverbs/mlx5: Wrap mlx5_cqe64 struct and add enums Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 24/27] tests: Add MAC address to the tests' args Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 25/27] tests: Add mlx5 DevX data path test Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 26/27] pyverbs/mlx5: Support mlx5 devices over VFIO Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 27/27] tests: Add a test for mlx5 " Yishai Hadas
2021-08-01  8:00 ` [PATCH rdma-core 00/27] Introduce mlx5 user space driver " Yishai Hadas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210720081647.1980-17-yishaih@nvidia.com \
    --to=yishaih@nvidia.com \
    --cc=edwards@nvidia.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=maorg@nvidia.com \
    --cc=markzhang@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).