All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yishai Hadas <yishaih@nvidia.com>
To: <alex.williamson@redhat.com>, <jgg@nvidia.com>
Cc: <kvm@vger.kernel.org>, <kevin.tian@intel.com>,
	<joao.m.martins@oracle.com>, <leonro@nvidia.com>,
	<shayd@nvidia.com>, <yishaih@nvidia.com>, <maorg@nvidia.com>,
	<avihaih@nvidia.com>, <cohuck@redhat.com>,
	<shameerali.kolothum.thodi@huawei.com>
Subject: [PATCH V4 vfio 10/14] vfio/mlx5: Introduce vfio precopy ioctl implementation
Date: Tue, 6 Dec 2022 10:34:34 +0200	[thread overview]
Message-ID: <20221206083438.37807-11-yishaih@nvidia.com> (raw)
In-Reply-To: <20221206083438.37807-1-yishaih@nvidia.com>

vfio precopy ioctl returns an estimation of data available for
transferring from the device.

Whenever a user is using VFIO_MIG_GET_PRECOPY_INFO, track the current
state of the device, and if needed, append the dirty data to the
transfer FD data. This is done by saving a middle state.

As mlx5 runs the SAVE command asynchronously, make sure to query for
incremental data only once there is no active save command.
Running both in parallel, might end-up with a failure in the incremental
query command on un-tracked vhca.

Also, a middle state will be saved only after the previous state has
finished its SAVE command and has been fully transferred, this prevents
endless use resources.

Co-developed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/pci/mlx5/cmd.c  |  16 +++++
 drivers/vfio/pci/mlx5/main.c | 111 +++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 160fa38fc78d..12e74ecebe64 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -67,12 +67,25 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
 {
 	u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
+	bool inc = query_flags & MLX5VF_QUERY_INC;
 	int ret;
 
 	lockdep_assert_held(&mvdev->state_mutex);
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
+	/*
+	 * In case PRE_COPY is used, saving_migf is exposed while device is
+	 * running. Make sure to run only once there is no active save command.
+	 * Running both in parallel, might end-up with a failure in the
+	 * incremental query command on un-tracked vhca.
+	 */
+	if (inc) {
+		ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
+		if (ret)
+			return ret;
+	}
+
 	MLX5_SET(query_vhca_migration_state_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
 	MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
@@ -82,6 +95,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
 
 	ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
 				  out);
+	if (inc)
+		complete(&mvdev->saving_migf->save_comp);
+
 	if (ret)
 		return ret;
 
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 9a36e36ec33b..2c8ac763057c 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -294,10 +294,121 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
 	wake_up_interruptible(&migf->poll_wait);
 }
 
+static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct mlx5_vf_migration_file *migf = filp->private_data;
+	struct mlx5vf_pci_core_device *mvdev = migf->mvdev;
+	struct mlx5_vhca_data_buffer *buf;
+	struct vfio_precopy_info info = {};
+	loff_t *pos = &filp->f_pos;
+	unsigned long minsz;
+	size_t inc_length = 0;
+	bool end_of_data;
+	int ret;
+
+	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+		return -ENOTTY;
+
+	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+
+	if (copy_from_user(&info, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	mutex_lock(&mvdev->state_mutex);
+	if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
+	    mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+		ret = -EINVAL;
+		goto err_state_unlock;
+	}
+
+	/*
+	 * We can't issue a SAVE command when the device is suspended, so as
+	 * part of VFIO_DEVICE_STATE_PRE_COPY_P2P no reason to query for extra
+	 * bytes that can't be read.
+	 */
+	if (mvdev->mig_state == VFIO_DEVICE_STATE_PRE_COPY) {
+		/*
+		 * Once the query returns it's guaranteed that there is no
+		 * active SAVE command.
+		 * As so, the other code below is safe with the proper locks.
+		 */
+		ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length,
+							    MLX5VF_QUERY_INC);
+		if (ret)
+			goto err_state_unlock;
+	}
+
+	mutex_lock(&migf->lock);
+	if (migf->state == MLX5_MIGF_STATE_ERROR) {
+		ret = -ENODEV;
+		goto err_migf_unlock;
+	}
+
+	buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+	if (buf) {
+		if (buf->start_pos == 0) {
+			info.initial_bytes = buf->header_image_size - *pos;
+		} else if (buf->start_pos ==
+				sizeof(struct mlx5_vf_migration_header)) {
+			/* First data buffer following the header */
+			info.initial_bytes = buf->start_pos +
+						buf->length - *pos;
+		} else {
+			info.dirty_bytes = buf->start_pos + buf->length - *pos;
+		}
+	} else {
+		if (!end_of_data) {
+			ret = -EINVAL;
+			goto err_migf_unlock;
+		}
+
+		info.dirty_bytes = inc_length;
+	}
+
+	if (!end_of_data || !inc_length) {
+		mutex_unlock(&migf->lock);
+		goto done;
+	}
+
+	mutex_unlock(&migf->lock);
+	/*
+	 * We finished transferring the current state and the device has a
+	 * dirty state, save a new state to be ready for.
+	 */
+	buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		mlx5vf_mark_err(migf);
+		goto err_state_unlock;
+	}
+
+	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true);
+	if (ret) {
+		mlx5vf_mark_err(migf);
+		mlx5vf_put_data_buffer(buf);
+		goto err_state_unlock;
+	}
+
+done:
+	mlx5vf_state_mutex_unlock(mvdev);
+	return copy_to_user((void __user *)arg, &info, minsz);
+err_migf_unlock:
+	mutex_unlock(&migf->lock);
+err_state_unlock:
+	mlx5vf_state_mutex_unlock(mvdev);
+	return ret;
+}
+
 static const struct file_operations mlx5vf_save_fops = {
 	.owner = THIS_MODULE,
 	.read = mlx5vf_save_read,
 	.poll = mlx5vf_save_poll,
+	.unlocked_ioctl = mlx5vf_precopy_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.release = mlx5vf_release_file,
 	.llseek = no_llseek,
 };
-- 
2.18.1


  parent reply	other threads:[~2022-12-06  8:36 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-06  8:34 [PATCH V4 vfio 00/14] Add migration PRE_COPY support for mlx5 driver Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 01/14] net/mlx5: Introduce ifc bits for pre_copy Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 02/14] vfio: Extend the device migration protocol with PRE_COPY Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 03/14] vfio/mlx5: Enforce a single SAVE command at a time Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 04/14] vfio/mlx5: Refactor PD usage Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 05/14] vfio/mlx5: Refactor MKEY usage Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 06/14] vfio/mlx5: Refactor migration file state Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 07/14] vfio/mlx5: Refactor to use queue based data chunks Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 08/14] vfio/mlx5: Introduce device transitions of PRE_COPY Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 09/14] vfio/mlx5: Introduce SW headers for migration states Yishai Hadas
2022-12-06  8:34 ` Yishai Hadas [this message]
2022-12-06  8:34 ` [PATCH V4 vfio 11/14] vfio/mlx5: Consider temporary end of stream as part of PRE_COPY Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 12/14] vfio/mlx5: Introduce multiple loads Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 13/14] vfio/mlx5: Fallback to STOP_COPY upon specific PRE_COPY error Yishai Hadas
2022-12-06  8:34 ` [PATCH V4 vfio 14/14] vfio/mlx5: Enable MIGRATION_PRE_COPY flag Yishai Hadas
2022-12-06 21:49 ` [PATCH V4 vfio 00/14] Add migration PRE_COPY support for mlx5 driver Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221206083438.37807-11-yishaih@nvidia.com \
    --to=yishaih@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=avihaih@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leonro@nvidia.com \
    --cc=maorg@nvidia.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=shayd@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.