From: Yishai Hadas <yishaih@nvidia.com>
To: <linux-rdma@vger.kernel.org>
Cc: <jgg@nvidia.com>, <yishaih@nvidia.com>, <maorg@nvidia.com>,
<markzhang@nvidia.com>, <edwards@nvidia.com>
Subject: [PATCH rdma-core 10/27] mlx5: Support fast teardown over vfio
Date: Tue, 20 Jul 2021 11:16:30 +0300 [thread overview]
Message-ID: <20210720081647.1980-11-yishaih@nvidia.com> (raw)
In-Reply-To: <20210720081647.1980-1-yishaih@nvidia.com>
From: Mark Zhang <markzhang@nvidia.com>
Add vfio fast teardown support; If it fails then do regular teardown.
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
providers/mlx5/mlx5_ifc.h | 5 +++
providers/mlx5/mlx5_vfio.c | 76 +++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/providers/mlx5/mlx5_ifc.h b/providers/mlx5/mlx5_ifc.h
index 082ac1f..4b7a4c2 100644
--- a/providers/mlx5/mlx5_ifc.h
+++ b/providers/mlx5/mlx5_ifc.h
@@ -4286,6 +4286,10 @@ struct mlx5_ifc_manage_pages_in_bits {
u8 pas[][0x40];
};
+enum {
+ MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1,
+};
+
struct mlx5_ifc_teardown_hca_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -4299,6 +4303,7 @@ struct mlx5_ifc_teardown_hca_out_bits {
enum {
MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0,
+ MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN = 0x2,
};
struct mlx5_ifc_teardown_hca_in_bits {
diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c
index bd128c2..97d3ce6 100644
--- a/providers/mlx5/mlx5_vfio.c
+++ b/providers/mlx5/mlx5_vfio.c
@@ -1354,7 +1354,7 @@ static int wait_fw_init(struct mlx5_init_seg *init_seg, uint32_t max_wait_mili)
return 0;
}
-static int mlx5_vfio_teardown_hca(struct mlx5_vfio_context *ctx)
+static int mlx5_vfio_teardown_hca_regular(struct mlx5_vfio_context *ctx)
{
uint32_t in[DEVX_ST_SZ_DW(teardown_hca_in)] = {};
uint32_t out[DEVX_ST_SZ_DW(teardown_hca_out)] = {};
@@ -1364,6 +1364,80 @@ static int mlx5_vfio_teardown_hca(struct mlx5_vfio_context *ctx)
return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
}
+enum mlx5_cmd_addr_l_sz_offset {
+ MLX5_NIC_IFC_OFFSET = 8,
+};
+
+enum {
+ MLX5_NIC_IFC_DISABLED = 1,
+};
+
+static uint8_t mlx5_vfio_get_nic_state(struct mlx5_vfio_context *ctx)
+{
+ return (be32toh(mmio_read32_be(&ctx->bar_map->cmdq_addr_l_sz)) >> 8) & 7;
+}
+
+static void mlx5_vfio_set_nic_state(struct mlx5_vfio_context *ctx, uint8_t state)
+{
+ uint32_t cur_cmdq_addr_l_sz;
+
+ cur_cmdq_addr_l_sz = be32toh(mmio_read32_be(&ctx->bar_map->cmdq_addr_l_sz));
+ mmio_write32_be(&ctx->bar_map->cmdq_addr_l_sz,
+ htobe32((cur_cmdq_addr_l_sz & 0xFFFFF000) |
+ state << MLX5_NIC_IFC_OFFSET));
+}
+
+#define MLX5_FAST_TEARDOWN_WAIT_MS 3000
+#define MLX5_FAST_TEARDOWN_WAIT_ONCE_MS 1
+static int mlx5_vfio_teardown_hca_fast(struct mlx5_vfio_context *ctx)
+{
+ uint32_t out[DEVX_ST_SZ_DW(teardown_hca_out)] = {};
+ uint32_t in[DEVX_ST_SZ_DW(teardown_hca_in)] = {};
+ int waited = 0, state, ret;
+
+ DEVX_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ DEVX_SET(teardown_hca_in, in, profile,
+ MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
+ ret = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+ if (ret)
+ return ret;
+
+ state = DEVX_GET(teardown_hca_out, out, state);
+ if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+ mlx5_err(ctx->dbg_fp, "teardown with fast mode failed\n");
+ return EIO;
+ }
+
+ mlx5_vfio_set_nic_state(ctx, MLX5_NIC_IFC_DISABLED);
+ do {
+ if (mlx5_vfio_get_nic_state(ctx) == MLX5_NIC_IFC_DISABLED)
+ break;
+ usleep(MLX5_FAST_TEARDOWN_WAIT_ONCE_MS * 1000);
+ waited += MLX5_FAST_TEARDOWN_WAIT_ONCE_MS;
+ } while (waited < MLX5_FAST_TEARDOWN_WAIT_MS);
+
+ if (mlx5_vfio_get_nic_state(ctx) != MLX5_NIC_IFC_DISABLED) {
+ mlx5_err(ctx->dbg_fp, "NIC IFC still %d after %ums.\n",
+ mlx5_vfio_get_nic_state(ctx), waited);
+ return EIO;
+ }
+
+ return 0;
+}
+
+static int mlx5_vfio_teardown_hca(struct mlx5_vfio_context *ctx)
+{
+ int err;
+
+ if (MLX5_VFIO_CAP_GEN(ctx, fast_teardown)) {
+ err = mlx5_vfio_teardown_hca_fast(ctx);
+ if (!err)
+ return 0;
+ }
+
+ return mlx5_vfio_teardown_hca_regular(ctx);
+}
+
static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
{
int err;
--
1.8.3.1
next prev parent reply other threads:[~2021-07-20 8:18 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-20 8:16 [PATCH rdma-core 00/27] Introduce mlx5 user space driver over VFIO Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 01/27] Update kernel headers Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 02/27] mlx5: Introduce mlx5dv_get_vfio_device_list() Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 03/27] mlx5: Enable debug functionality for vfio Yishai Hadas
2021-07-20 8:51 ` Leon Romanovsky
2021-07-20 9:27 ` Yishai Hadas
2021-07-20 12:27 ` Leon Romanovsky
2021-07-20 14:57 ` Yishai Hadas
2021-07-21 7:05 ` Gal Pressman
2021-07-21 7:58 ` Yishai Hadas
2021-07-21 8:51 ` Gal Pressman
2021-07-20 8:16 ` [PATCH rdma-core 04/27] util: Add interval_set support Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 05/27] verbs: Enable verbs_open_device() to work over non sysfs devices Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 06/27] mlx5: Setup mlx5 vfio context Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 07/27] mlx5: Add mlx5_vfio_cmd_exec() support Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 08/27] mlx5: vfio setup function support Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 09/27] mlx5: vfio setup basic caps Yishai Hadas
2021-07-20 8:16 ` Yishai Hadas [this message]
2021-07-20 8:16 ` [PATCH rdma-core 11/27] mlx5: Enable interrupt command mode over vfio Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 12/27] mlx5: Introduce vfio APIs to process events Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 13/27] mlx5: VFIO poll_health support Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 14/27] mlx5: Implement basic verbs operation for PD and MR over vfio Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 15/27] mlx5: Set DV context ops Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 16/27] mlx5: Support initial DEVX/DV APIs over vfio Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 17/27] mlx5: Implement mlx5dv devx_obj " Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 18/27] pyverbs: Support DevX UMEM registration Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 19/27] pyverbs/mlx5: Support EQN querying Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 20/27] pyverbs/mlx5: Support more DevX objects Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 21/27] pyverbs: Add auxiliary memory functions Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 22/27] pyverbs/mlx5: Add support to extract mlx5dv objects Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 23/27] pyverbs/mlx5: Wrap mlx5_cqe64 struct and add enums Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 24/27] tests: Add MAC address to the tests' args Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 25/27] tests: Add mlx5 DevX data path test Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 26/27] pyverbs/mlx5: Support mlx5 devices over VFIO Yishai Hadas
2021-07-20 8:16 ` [PATCH rdma-core 27/27] tests: Add a test for mlx5 " Yishai Hadas
2021-08-01 8:00 ` [PATCH rdma-core 00/27] Introduce mlx5 user space driver " Yishai Hadas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210720081647.1980-11-yishaih@nvidia.com \
--to=yishaih@nvidia.com \
--cc=edwards@nvidia.com \
--cc=jgg@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=maorg@nvidia.com \
--cc=markzhang@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).