From: Yishai Hadas <yishaih@nvidia.com>
To: <alex.williamson@redhat.com>, <bhelgaas@google.com>,
<jgg@nvidia.com>, <saeedm@nvidia.com>
Cc: <linux-pci@vger.kernel.org>, <kvm@vger.kernel.org>,
<netdev@vger.kernel.org>, <kuba@kernel.org>, <leonro@nvidia.com>,
<kwankhede@nvidia.com>, <mgurtovoy@nvidia.com>,
<yishaih@nvidia.com>, <maorg@nvidia.com>, <cohuck@redhat.com>,
<ashok.raj@intel.com>, <kevin.tian@intel.com>,
<shameerali.kolothum.thodi@huawei.com>
Subject: [PATCH V9 mlx5-next 12/15] vfio/mlx5: Expose migration commands over mlx5 device
Date: Thu, 24 Feb 2022 16:20:21 +0200 [thread overview]
Message-ID: <20220224142024.147653-13-yishaih@nvidia.com> (raw)
In-Reply-To: <20220224142024.147653-1-yishaih@nvidia.com>
Expose migration commands over the device, it includes: suspend, resume,
get vhca id, query/save/load state.
As part of this adds the APIs and data structure that are needed to manage
the migration data.
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/vfio/pci/mlx5/cmd.c | 259 ++++++++++++++++++++++++++++++++++++
drivers/vfio/pci/mlx5/cmd.h | 35 +++++
2 files changed, 294 insertions(+)
create mode 100644 drivers/vfio/pci/mlx5/cmd.c
create mode 100644 drivers/vfio/pci/mlx5/cmd.h
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
new file mode 100644
index 000000000000..5c9f9218cc1d
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "cmd.h"
+
+int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
+ MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id);
+ MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
+
+ ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out);
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
+ MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id);
+ MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
+
+ ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out);
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
+ size_t *state_size)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ MLX5_SET(query_vhca_migration_state_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
+ MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
+
+ ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out);
+ if (ret)
+ goto end;
+
+ *state_size = MLX5_GET(query_vhca_migration_state_out, out,
+ required_umem_size);
+
+end:
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ int out_size;
+ void *out;
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ out = kzalloc(out_size, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, other_function, 1);
+ MLX5_SET(query_hca_cap_in, in, function_id, function_id);
+ MLX5_SET(query_hca_cap_in, in, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
+ HCA_CAP_OPMOD_GET_CUR);
+
+ ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ if (ret)
+ goto err_exec;
+
+ *vhca_id = MLX5_GET(query_hca_cap_out, out,
+ capability.cmd_hca_cap.vhca_id);
+
+err_exec:
+ kfree(out);
+end:
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_vf_migration_file *migf, u32 *mkey)
+{
+ size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE);
+ struct sg_dma_page_iter dma_iter;
+ int err = 0, inlen;
+ __be64 *mtt;
+ void *mkc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(*mtt) * round_up(npages, 2);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ DIV_ROUND_UP(npages, 2));
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+
+ for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
+ *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+ MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
+ MLX5_SET64(mkc, mkc, len, migf->total_length);
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+ kvfree(in);
+ return err;
+}
+
+int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+ u32 pdn, mkey;
+ int err;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ goto end;
+
+ err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
+ 0);
+ if (err)
+ goto err_dma_map;
+
+ err = _create_state_mkey(mdev, pdn, migf, &mkey);
+ if (err)
+ goto err_create_mkey;
+
+ MLX5_SET(save_vhca_state_in, in, opcode,
+ MLX5_CMD_OP_SAVE_VHCA_STATE);
+ MLX5_SET(save_vhca_state_in, in, op_mod, 0);
+ MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(save_vhca_state_in, in, mkey, mkey);
+ MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
+
+ err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out);
+ if (err)
+ goto err_exec;
+
+ migf->total_length =
+ MLX5_GET(save_vhca_state_out, out, actual_image_size);
+
+ mlx5_core_destroy_mkey(mdev, mkey);
+ mlx5_core_dealloc_pd(mdev, pdn);
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
+ mlx5_vf_put_core_dev(mdev);
+
+ return 0;
+
+err_exec:
+ mlx5_core_destroy_mkey(mdev, mkey);
+err_create_mkey:
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
+err_dma_map:
+ mlx5_core_dealloc_pd(mdev, pdn);
+end:
+ mlx5_vf_put_core_dev(mdev);
+ return err;
+}
+
+int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+ u32 pdn, mkey;
+ int err;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ mutex_lock(&migf->lock);
+ if (!migf->total_length) {
+ err = -EINVAL;
+ goto end;
+ }
+
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ goto end;
+
+ err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
+ if (err)
+ goto err_reg;
+
+ err = _create_state_mkey(mdev, pdn, migf, &mkey);
+ if (err)
+ goto err_mkey;
+
+ MLX5_SET(load_vhca_state_in, in, opcode,
+ MLX5_CMD_OP_LOAD_VHCA_STATE);
+ MLX5_SET(load_vhca_state_in, in, op_mod, 0);
+ MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(load_vhca_state_in, in, mkey, mkey);
+ MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
+
+ err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
+
+ mlx5_core_destroy_mkey(mdev, mkey);
+err_mkey:
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
+err_reg:
+ mlx5_core_dealloc_pd(mdev, pdn);
+end:
+ mlx5_vf_put_core_dev(mdev);
+ mutex_unlock(&migf->lock);
+ return err;
+}
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
new file mode 100644
index 000000000000..69a1481ed953
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ */
+
+#ifndef MLX5_VFIO_CMD_H
+#define MLX5_VFIO_CMD_H
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vf_migration_file {
+ struct file *filp;
+ struct mutex lock;
+
+ struct sg_append_table table;
+ size_t total_length;
+ size_t allocated_length;
+
+ /* Optimize mlx5vf_get_migration_page() for sequential access */
+ struct scatterlist *last_offset_sg;
+ unsigned int sg_last_entry;
+ unsigned long last_offset;
+};
+
+int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
+int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
+int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
+ size_t *state_size);
+int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id);
+int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf);
+int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf);
+#endif /* MLX5_VFIO_CMD_H */
--
2.18.1
next prev parent reply other threads:[~2022-02-24 14:22 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-24 14:20 [PATCH V9 mlx5-next 00/15] Add mlx5 live migration driver and v2 migration protocol Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 01/15] PCI/IOV: Add pci_iov_vf_id() to get VF index Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 02/15] net/mlx5: Reuse exported virtfn index function call Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 03/15] net/mlx5: Disable SRIOV before PF removal Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 04/15] PCI/IOV: Add pci_iov_get_pf_drvdata() to allow VF reaching the drvdata of a PF Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 05/15] net/mlx5: Expose APIs to get/put the mlx5 core device Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 06/15] net/mlx5: Introduce migration bits and structures Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 07/15] net/mlx5: Add migration commands definitions Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 08/15] vfio: Have the core code decode the VFIO_DEVICE_FEATURE ioctl Yishai Hadas
2022-03-02 10:00 ` Cornelia Huck
2022-03-02 14:23 ` Jason Gunthorpe
2022-02-24 14:20 ` [PATCH V9 mlx5-next 09/15] vfio: Define device migration protocol v2 Yishai Hadas
2022-03-02 11:19 ` Cornelia Huck
2022-03-02 14:27 ` Jason Gunthorpe
2022-03-02 15:34 ` Alex Williamson
2022-03-02 16:07 ` Cornelia Huck
2022-03-02 16:34 ` Alex Williamson
2022-03-02 16:56 ` Cornelia Huck
2022-03-02 16:34 ` Jason Gunthorpe
2022-03-02 16:57 ` Cornelia Huck
2022-02-24 14:20 ` [PATCH V9 mlx5-next 10/15] vfio: Extend the device migration protocol with RUNNING_P2P Yishai Hadas
2022-02-24 15:21 ` Cornelia Huck
2022-02-24 15:30 ` Alex Williamson
2022-02-24 16:13 ` Jason Gunthorpe
2022-02-24 16:35 ` Alex Williamson
2022-02-24 16:53 ` Cornelia Huck
2022-02-24 20:46 ` Alex Williamson
2022-03-02 11:51 ` Cornelia Huck
2022-02-24 14:20 ` [PATCH V9 mlx5-next 11/15] vfio: Remove migration protocol v1 documentation Yishai Hadas
2022-03-02 10:09 ` Cornelia Huck
2022-02-24 14:20 ` Yishai Hadas [this message]
2022-02-24 14:20 ` [PATCH V9 mlx5-next 13/15] vfio/mlx5: Implement vfio_pci driver for mlx5 devices Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 14/15] vfio/pci: Expose vfio_pci_core_aer_err_detected() Yishai Hadas
2022-02-24 14:20 ` [PATCH V9 mlx5-next 15/15] vfio/mlx5: Use its own PCI reset_done error handler Yishai Hadas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220224142024.147653-13-yishaih@nvidia.com \
--to=yishaih@nvidia.com \
--cc=alex.williamson@redhat.com \
--cc=ashok.raj@intel.com \
--cc=bhelgaas@google.com \
--cc=cohuck@redhat.com \
--cc=jgg@nvidia.com \
--cc=kevin.tian@intel.com \
--cc=kuba@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=kwankhede@nvidia.com \
--cc=leonro@nvidia.com \
--cc=linux-pci@vger.kernel.org \
--cc=maorg@nvidia.com \
--cc=mgurtovoy@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
--cc=shameerali.kolothum.thodi@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).