All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yishai Hadas <yishaih@nvidia.com>
To: <linux-rdma@vger.kernel.org>
Cc: <jgg@nvidia.com>, <yishaih@nvidia.com>, <maorg@nvidia.com>,
	<markzhang@nvidia.com>, <edwards@nvidia.com>
Subject: [PATCH rdma-core 09/27] mlx5: vfio setup basic caps
Date: Tue, 20 Jul 2021 11:16:29 +0300	[thread overview]
Message-ID: <20210720081647.1980-10-yishaih@nvidia.com> (raw)
In-Reply-To: <20210720081647.1980-1-yishaih@nvidia.com>

Set basic caps that are required to initialize the device properly.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 providers/mlx5/mlx5_ifc.h  |  87 ++++++++++++++++++++-
 providers/mlx5/mlx5_vfio.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-
 providers/mlx5/mlx5_vfio.h |  21 +++++
 3 files changed, 290 insertions(+), 3 deletions(-)

diff --git a/providers/mlx5/mlx5_ifc.h b/providers/mlx5/mlx5_ifc.h
index ac741cd..082ac1f 100644
--- a/providers/mlx5/mlx5_ifc.h
+++ b/providers/mlx5/mlx5_ifc.h
@@ -36,6 +36,7 @@
 #define u8 uint8_t
 
 enum mlx5_cap_mode {
+	HCA_CAP_OPMOD_GET_MAX = 0,
 	HCA_CAP_OPMOD_GET_CUR	= 1,
 };
 
@@ -46,6 +47,7 @@ enum {
 	MLX5_CMD_OP_ENABLE_HCA = 0x104,
 	MLX5_CMD_OP_QUERY_PAGES = 0x107,
 	MLX5_CMD_OP_MANAGE_PAGES = 0x108,
+	MLX5_CMD_OP_SET_HCA_CAP = 0x109,
 	MLX5_CMD_OP_QUERY_ISSI = 0x10a,
 	MLX5_CMD_OP_SET_ISSI = 0x10b,
 	MLX5_CMD_OP_CREATE_MKEY = 0x200,
@@ -61,6 +63,7 @@ enum {
 	MLX5_CMD_OP_QUERY_DCT = 0x713,
 	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
 	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
+	MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755,
 	MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760,
 	MLX5_CMD_OP_ACCESS_REG = 0x805,
 	MLX5_CMD_OP_QUERY_LAG = 0x842,
@@ -110,6 +113,11 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum {
+	MLX5_CAP_PORT_TYPE_IB  = 0x0,
+	MLX5_CAP_PORT_TYPE_ETH = 0x1,
+};
+
 struct mlx5_ifc_atomic_caps_bits {
 	u8         reserved_at_0[0x40];
 
@@ -140,7 +148,8 @@ struct mlx5_ifc_atomic_caps_bits {
 };
 
 struct mlx5_ifc_roce_cap_bits {
-	u8         reserved_0[0x5];
+	u8         reserved_0[0x4];
+	u8         sw_r_roce_src_udp_port[0x1];
 	u8         fl_rc_qp_when_roce_disabled[0x1];
 	u8         fl_rc_qp_when_roce_enabled[0x1];
 	u8         reserved_at_7[0x17];
@@ -912,7 +921,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         uar_4k[0x1];
 	u8         reserved_at_241[0x9];
 	u8         uar_sz[0x6];
-	u8         reserved_at_250[0x3];
+	u8         reserved_at_250[0x2];
+	u8         umem_uid_0[0x1];
 	u8         log_max_dc_cnak_qps[0x5];
 	u8         log_pg_sz[0x8];
 
@@ -1339,8 +1349,11 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 };
 
 enum mlx5_cap_type {
+	MLX5_CAP_GENERAL = 0,
 	MLX5_CAP_ODP = 2,
 	MLX5_CAP_ATOMIC = 3,
+	MLX5_CAP_ROCE,
+	MLX5_CAP_NUM,
 };
 
 enum {
@@ -4346,4 +4359,74 @@ struct mlx5_ifc_access_register_in_bits {
 	u8         register_data[][0x20];
 };
 
+struct mlx5_ifc_modify_nic_vport_context_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_nic_vport_field_select_bits {
+	u8         reserved_at_0[0x12];
+	u8         affiliation[0x1];
+	u8         reserved_at_13[0x1];
+	u8         disable_uc_local_lb[0x1];
+	u8         disable_mc_local_lb[0x1];
+	u8         node_guid[0x1];
+	u8         port_guid[0x1];
+	u8         min_inline[0x1];
+	u8         mtu[0x1];
+	u8         change_event[0x1];
+	u8         promisc[0x1];
+	u8         permanent_address[0x1];
+	u8         addresses_list[0x1];
+	u8         roce_en[0x1];
+	u8         reserved_at_1f[0x1];
+};
+
+struct mlx5_ifc_modify_nic_vport_context_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_at_41[0xf];
+	u8         vport_number[0x10];
+
+	struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
+
+	u8         reserved_at_80[0x780];
+
+	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_function[0x1];
+	u8         reserved_at_41[0xf];
+	u8         function_id[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	union mlx5_ifc_hca_cap_union_bits capability;
+};
+
 #endif /* MLX5_IFC_H */
diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c
index 4d12807..bd128c2 100644
--- a/providers/mlx5/mlx5_vfio.c
+++ b/providers/mlx5/mlx5_vfio.c
@@ -1141,6 +1141,177 @@ out:
 	return err;
 }
 
+static int mlx5_vfio_get_caps_mode(struct mlx5_vfio_context *ctx,
+				   enum mlx5_cap_type cap_type,
+				   enum mlx5_cap_mode cap_mode)
+{
+	uint8_t in[DEVX_ST_SZ_BYTES(query_hca_cap_in)] = {};
+	int out_sz = DEVX_ST_SZ_BYTES(query_hca_cap_out);
+	void *out, *hca_caps;
+	uint16_t opmod = (cap_type << 1) | (cap_mode & 0x01);
+	int err;
+
+	out = calloc(1, out_sz);
+	if (!out) {
+		errno = ENOMEM;
+		return errno;
+	}
+
+	DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	DEVX_SET(query_hca_cap_in, in, op_mod, opmod);
+	err = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, out_sz, 0);
+	if (err)
+		goto query_ex;
+
+	hca_caps = DEVX_ADDR_OF(query_hca_cap_out, out, capability);
+
+	switch (cap_mode) {
+	case HCA_CAP_OPMOD_GET_MAX:
+		memcpy(ctx->caps.hca_max[cap_type], hca_caps,
+		       DEVX_UN_SZ_BYTES(hca_cap_union));
+		break;
+	case HCA_CAP_OPMOD_GET_CUR:
+		memcpy(ctx->caps.hca_cur[cap_type], hca_caps,
+		       DEVX_UN_SZ_BYTES(hca_cap_union));
+		break;
+	default:
+		err = EINVAL;
+		assert(false);
+		break;
+	}
+
+query_ex:
+	free(out);
+	return err;
+}
+
+enum mlx5_vport_roce_state {
+	MLX5_VPORT_ROCE_DISABLED = 0,
+	MLX5_VPORT_ROCE_ENABLED  = 1,
+};
+
+static int mlx5_vfio_nic_vport_update_roce_state(struct mlx5_vfio_context *ctx,
+						 enum mlx5_vport_roce_state state)
+{
+	uint32_t out[DEVX_ST_SZ_DW(modify_nic_vport_context_out)] = {};
+	int inlen = DEVX_ST_SZ_BYTES(modify_nic_vport_context_in);
+	void *in;
+	int err;
+
+	in = calloc(1, inlen);
+	if (!in) {
+		errno = ENOMEM;
+		return errno;
+	}
+
+	DEVX_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+	DEVX_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+		 state);
+	DEVX_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+	err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), 0);
+
+	free(in);
+
+	return err;
+}
+
+static int mlx5_vfio_get_caps(struct mlx5_vfio_context *ctx, enum mlx5_cap_type cap_type)
+{
+	int ret;
+
+	ret = mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_CUR);
+	if (ret)
+		return ret;
+
+	return mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
+static int handle_hca_cap_roce(struct mlx5_vfio_context *ctx, void *set_ctx,
+			       int ctx_size)
+{
+	int err;
+	uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {};
+	void *set_hca_cap;
+
+	if (!MLX5_VFIO_CAP_GEN(ctx, roce))
+		return 0;
+
+	err = mlx5_vfio_get_caps(ctx, MLX5_CAP_ROCE);
+	if (err)
+		return err;
+
+	if (MLX5_VFIO_CAP_ROCE(ctx, sw_r_roce_src_udp_port) ||
+	    !MLX5_VFIO_CAP_ROCE_MAX(ctx, sw_r_roce_src_udp_port))
+		return 0;
+
+	set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+	memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE],
+	       DEVX_ST_SZ_BYTES(roce_cap));
+	DEVX_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+	DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+	return mlx5_vfio_cmd_exec(ctx, set_ctx, ctx_size, out, sizeof(out), 0);
+}
+
+static int handle_hca_cap(struct mlx5_vfio_context *ctx, void *set_ctx, int set_sz)
+{
+	struct mlx5_vfio_device *dev = to_mvfio_dev(ctx->vctx.context.device);
+	int sys_page_shift = ilog32(dev->page_size - 1);
+	uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {};
+	void *set_hca_cap;
+	int err;
+
+	err = mlx5_vfio_get_caps(ctx, MLX5_CAP_GENERAL);
+	if (err)
+		return err;
+
+	set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx,
+				   capability);
+	memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL],
+	       DEVX_ST_SZ_BYTES(cmd_hca_cap));
+
+	/* disable cmdif checksum */
+	DEVX_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+	if (dev->flags & MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN)
+		DEVX_SET(cmd_hca_cap, set_hca_cap, disable_link_up_by_init_hca, 1);
+
+	DEVX_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, sys_page_shift - 12);
+
+	if (MLX5_VFIO_CAP_GEN_MAX(ctx, mkey_by_name))
+		DEVX_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
+
+	DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+
+	return mlx5_vfio_cmd_exec(ctx, set_ctx, set_sz, out, sizeof(out), 0);
+}
+
+static int set_hca_cap(struct mlx5_vfio_context *ctx)
+{
+	int set_sz = DEVX_ST_SZ_BYTES(set_hca_cap_in);
+	void *set_ctx;
+	int err;
+
+	set_ctx = calloc(1, set_sz);
+	if (!set_ctx) {
+		errno = ENOMEM;
+		return errno;
+	}
+
+	err = handle_hca_cap(ctx, set_ctx, set_sz);
+	if (err)
+		goto out;
+
+	memset(set_ctx, 0, set_sz);
+	err = handle_hca_cap_roce(ctx, set_ctx, set_sz);
+out:
+	free(set_ctx);
+	return err;
+}
+
 static int mlx5_vfio_set_hca_ctrl(struct mlx5_vfio_context *ctx)
 {
 	struct mlx5_reg_host_endianness he_in = {};
@@ -1217,6 +1388,15 @@ static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
 	if (err)
 		return err;
 
+	err = set_hca_cap(ctx);
+	if (err)
+		return err;
+
+	if (!MLX5_VFIO_CAP_GEN(ctx, umem_uid_0)) {
+		errno = EOPNOTSUPP;
+		return errno;
+	}
+
 	err = mlx5_vfio_satisfy_startup_pages(ctx, 0);
 	if (err)
 		return err;
@@ -1225,7 +1405,10 @@ static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
 	if (err)
 		return err;
 
-	return 0;
+	if (MLX5_VFIO_CAP_GEN(ctx, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+		err = mlx5_vfio_nic_vport_update_roce_state(ctx, MLX5_VPORT_ROCE_ENABLED);
+
+	return err;
 }
 
 static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx)
diff --git a/providers/mlx5/mlx5_vfio.h b/providers/mlx5/mlx5_vfio.h
index 36b1f40..225c1b9 100644
--- a/providers/mlx5/mlx5_vfio.h
+++ b/providers/mlx5/mlx5_vfio.h
@@ -12,6 +12,7 @@
 
 #include <infiniband/driver.h>
 #include <util/interval_set.h>
+#include "mlx5_ifc.h"
 
 #define FW_INIT_WAIT_MS 2
 #define FW_PRE_INIT_TIMEOUT_MILI 120000
@@ -43,6 +44,22 @@ struct mlx5_vfio_device {
 #error Host endianness not defined
 #endif
 
+/* GET Dev Caps macros */
+#define MLX5_VFIO_CAP_GEN(ctx, cap) \
+	DEVX_GET(cmd_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_GEN_64(mdev, cap) \
+	DEVX_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_GEN_MAX(ctx, cap) \
+	DEVX_GET(cmd_hca_cap, ctx->caps.hca_max[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_ROCE(ctx, cap) \
+	DEVX_GET(roce_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE], cap)
+
+#define MLX5_VFIO_CAP_ROCE_MAX(ctx, cap) \
+	DEVX_GET(roce_cap, ctx->caps.hca_max[MLX5_CAP_ROCE], cap)
+
 struct mlx5_reg_host_endianness {
 	uint8_t he;
 	uint8_t rsvd[15];
@@ -162,6 +179,10 @@ struct mlx5_vfio_context {
 	size_t bar_map_size;
 	struct mlx5_vfio_cmd cmd;
 	bool have_eq;
+	struct {
+		uint32_t hca_cur[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
+		uint32_t hca_max[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
+	} caps;
 };
 
 static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev)
-- 
1.8.3.1


  parent reply	other threads:[~2021-07-20  8:18 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-20  8:16 [PATCH rdma-core 00/27] Introduce mlx5 user space driver over VFIO Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 01/27] Update kernel headers Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 02/27] mlx5: Introduce mlx5dv_get_vfio_device_list() Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 03/27] mlx5: Enable debug functionality for vfio Yishai Hadas
2021-07-20  8:51   ` Leon Romanovsky
2021-07-20  9:27     ` Yishai Hadas
2021-07-20 12:27       ` Leon Romanovsky
2021-07-20 14:57         ` Yishai Hadas
2021-07-21  7:05           ` Gal Pressman
2021-07-21  7:58             ` Yishai Hadas
2021-07-21  8:51               ` Gal Pressman
2021-07-20  8:16 ` [PATCH rdma-core 04/27] util: Add interval_set support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 05/27] verbs: Enable verbs_open_device() to work over non sysfs devices Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 06/27] mlx5: Setup mlx5 vfio context Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 07/27] mlx5: Add mlx5_vfio_cmd_exec() support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 08/27] mlx5: vfio setup function support Yishai Hadas
2021-07-20  8:16 ` Yishai Hadas [this message]
2021-07-20  8:16 ` [PATCH rdma-core 10/27] mlx5: Support fast teardown over vfio Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 11/27] mlx5: Enable interrupt command mode " Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 12/27] mlx5: Introduce vfio APIs to process events Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 13/27] mlx5: VFIO poll_health support Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 14/27] mlx5: Implement basic verbs operation for PD and MR over vfio Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 15/27] mlx5: Set DV context ops Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 16/27] mlx5: Support initial DEVX/DV APIs over vfio Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 17/27] mlx5: Implement mlx5dv devx_obj " Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 18/27] pyverbs: Support DevX UMEM registration Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 19/27] pyverbs/mlx5: Support EQN querying Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 20/27] pyverbs/mlx5: Support more DevX objects Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 21/27] pyverbs: Add auxiliary memory functions Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 22/27] pyverbs/mlx5: Add support to extract mlx5dv objects Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 23/27] pyverbs/mlx5: Wrap mlx5_cqe64 struct and add enums Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 24/27] tests: Add MAC address to the tests' args Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 25/27] tests: Add mlx5 DevX data path test Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 26/27] pyverbs/mlx5: Support mlx5 devices over VFIO Yishai Hadas
2021-07-20  8:16 ` [PATCH rdma-core 27/27] tests: Add a test for mlx5 " Yishai Hadas
2021-08-01  8:00 ` [PATCH rdma-core 00/27] Introduce mlx5 user space driver " Yishai Hadas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210720081647.1980-10-yishaih@nvidia.com \
    --to=yishaih@nvidia.com \
    --cc=edwards@nvidia.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=maorg@nvidia.com \
    --cc=markzhang@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.