All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/3] Support live migration with mlx5_vdpa
@ 2022-06-16 13:27 Eli Cohen
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
                   ` (2 more replies)
  0 siblings, 3 replies; 42+ messages in thread
From: Eli Cohen @ 2022-06-16 13:27 UTC (permalink / raw)
  To: eperezma, jasowang, mst, virtualization
  Cc: linux-kernel, si-wei.liu, parav, Eli Cohen

Following patchset supports the suspend callback to suspend VQs to allow
sshdow VQs to kick in. It also adds address space support also required
for live migration. We use two groups, one for the control virtqueue and
one for the data virtqueues.

Finally there is a patch that disables VLAN support since it is not
supported currently.

The aim of this patchset is to allow enable development/testing of live
migration over a real hardware device.

The below two patches are also required to be applied but I did not
include them in the series since they were sent separately.

https://lore.kernel.org/all/20220613075958.511064-1-elic@nvidia.com/T/
https://lore.kernel.org/all/20220613075958.511064-2-elic@nvidia.com/T/

Eli Cohen (3):
  vdpa/mlx5: Implement susupend virtqueue callback
  vdpa/mlx5: Support different address spaces for control and data
  vdpa/mlx5: Disable VLAN support to support live migration

 drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 170 ++++++++++++++++++++++++++---
 include/linux/mlx5/mlx5_ifc_vdpa.h |   8 ++
 3 files changed, 176 insertions(+), 13 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-16 13:27 [PATCH RFC 0/3] Support live migration with mlx5_vdpa Eli Cohen
@ 2022-06-16 13:27 ` Eli Cohen
  2022-06-16 17:12   ` kernel test robot
                     ` (3 more replies)
  2022-06-16 13:27 ` [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data Eli Cohen
  2022-06-16 13:27 ` [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration Eli Cohen
  2 siblings, 4 replies; 42+ messages in thread
From: Eli Cohen @ 2022-06-16 13:27 UTC (permalink / raw)
  To: eperezma, jasowang, mst, virtualization
  Cc: linux-kernel, si-wei.liu, parav, Eli Cohen

Implement the suspend callback allowing to suspend the virtqueues so
they stop processing descriptors. This is required to allow the shadow
virtqueue to kick in.

Signed-off-by: Eli Cohen <elic@nvidia.com>
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
 include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index fb0b23e71383..ea4bc8a0cd25 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
 	if (err)
 		goto err_cmd;
 
+	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
 	kfree(in);
 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
 
@@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
 		return;
 	}
+	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
 	umems_destroy(ndev, mvq);
 }
 
@@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
 	return err;
 }
 
+static bool is_valid_state_change(int oldstate, int newstate)
+{
+	switch (oldstate) {
+	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
+		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
+	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
+		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
+	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
+	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
+	default:
+		return false;
+	}
+}
+
 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
 {
 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
@@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
 	void *in;
 	int err;
 
+	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
+		return 0;
+
+	if (!is_valid_state_change(mvq->fw_state, state))
+		return -EINVAL;
+
 	in = kzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	struct mlx5_vdpa_virtqueue *mvq;
+	int err;
 
 	if (!mvdev->actual_features)
 		return;
@@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
 	}
 
 	mvq = &ndev->vqs[idx];
-	if (!ready)
+	if (!ready) {
 		suspend_vq(ndev, mvq);
+	} else {
+		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
+		if (err) {
+			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
+			ready = false;
+		}
+	}
+
 
 	mvq->ready = ready;
 }
@@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
 	return err;
 }
 
+static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
+{
+	struct mlx5_control_vq *cvq;
+
+	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+		return;
+
+	cvq = &mvdev->cvq;
+	cvq->ready = !suspend;
+}
+
+static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
+{
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	struct mlx5_vdpa_virtqueue *mvq;
+	int i;
+
+	if (!suspend) {
+		mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	down_write(&ndev->reslock);
+	for (i = 0; i < ndev->cur_num_vqs; i++) {
+		mvq = &ndev->vqs[i];
+		suspend_vq(ndev, mvq);
+	}
+	mlx5_vdpa_cvq_suspend(mvdev, suspend);
+	up_write(&ndev->reslock);
+	return 0;
+}
+
 static const struct vdpa_config_ops mlx5_vdpa_ops = {
 	.set_vq_address = mlx5_vdpa_set_vq_address,
 	.set_vq_num = mlx5_vdpa_set_vq_num,
@@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
 	.get_generation = mlx5_vdpa_get_generation,
 	.set_map = mlx5_vdpa_set_map,
 	.free = mlx5_vdpa_free,
+	.suspend = mlx5_vdpa_suspend,
 };
 
 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
@@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
 		mvq->index = i;
 		mvq->ndev = ndev;
 		mvq->fwqp.fw = true;
+		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
 	}
 	for (; i < ndev->mvdev.max_vqs; i++) {
 		mvq = &ndev->vqs[i];
diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
index 4414ed5b6ed2..423562f39d3c 100644
--- a/include/linux/mlx5/mlx5_ifc_vdpa.h
+++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
@@ -150,6 +150,14 @@ enum {
 	MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
 };
 
+/* This indicates that the object was not created or has alreadyi
+ * been desroyed. It is very safe to assume that this object will never
+ * have so many states
+ */
+enum {
+	MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
+};
+
 enum {
 	MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
 	MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data
  2022-06-16 13:27 [PATCH RFC 0/3] Support live migration with mlx5_vdpa Eli Cohen
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
@ 2022-06-16 13:27 ` Eli Cohen
  2022-06-20  8:47     ` Jason Wang
  2022-06-20  8:57   ` Eugenio Perez Martin
  2022-06-16 13:27 ` [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration Eli Cohen
  2 siblings, 2 replies; 42+ messages in thread
From: Eli Cohen @ 2022-06-16 13:27 UTC (permalink / raw)
  To: eperezma, jasowang, mst, virtualization
  Cc: linux-kernel, si-wei.liu, parav, Eli Cohen

Partition virtqueues to two different address spaces: oce for control
virtqueue which is implemented in software, and one for data virtqueus.

Signed-off-by: Eli Cohen <elic@nvidia.com>
---
 drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++++
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 101 +++++++++++++++++++++++++----
 2 files changed, 101 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 44104093163b..6af9fdbb86b7 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
 	struct mlx5_vdpa_dev *mvdev;
 };
 
+enum {
+	MLX5_VDPA_DATAVQ_GROUP,
+	MLX5_VDPA_CVQ_GROUP,
+	MLX5_VDPA_NUMVQ_GROUPS
+};
+
+enum {
+	MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
+};
+
 struct mlx5_vdpa_dev {
 	struct vdpa_device vdev;
 	struct mlx5_core_dev *mdev;
@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
 	struct mlx5_vdpa_mr mr;
 	struct mlx5_control_vq cvq;
 	struct workqueue_struct *wq;
+	unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
 };
 
 int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index ea4bc8a0cd25..34bd81cb697c 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2125,9 +2125,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
 	return PAGE_SIZE;
 }
 
-static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
+static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
 {
-	return 0;
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+	if (is_ctrl_vq_idx(mvdev, idx))
+		return MLX5_VDPA_CVQ_GROUP;
+
+	return MLX5_VDPA_DATAVQ_GROUP;
 }
 
 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
@@ -2541,6 +2546,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
 	up_write(&ndev->reslock);
 }
 
+static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
+{
+	int i;
+
+	/* default mapping all groups are mapped to asid 0 */
+	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
+		mvdev->group2asid[i] = 0;
+}
+
 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2559,7 +2573,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 	ndev->mvdev.cvq.completed_desc = 0;
 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
 	ndev->mvdev.actual_features = 0;
+	init_group_to_asid_map(mvdev);
 	++mvdev->generation;
+
 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
 		if (mlx5_vdpa_create_mr(mvdev, NULL))
 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
@@ -2597,26 +2613,76 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
 	return mvdev->generation;
 }
 
-static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
-			     struct vhost_iotlb *iotlb)
+static u32 get_group(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+	u32 group;
+
+	for (group = 0; group < MLX5_VDPA_NUMVQ_GROUPS; group++) {
+		if (mvdev->group2asid[group] == asid)
+			return group;
+	}
+	return -EINVAL;
+}
+
+static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+	u64 start = 0ULL, last = 0ULL - 1;
+	struct vhost_iotlb_map *map;
+	int err = 0;
+
+	spin_lock(&mvdev->cvq.iommu_lock);
+	vhost_iotlb_reset(mvdev->cvq.iotlb);
+
+	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+	     map = vhost_iotlb_itree_next(map, start, last)) {
+		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
+					    map->last, map->addr, map->perm);
+		if (err)
+			goto out;
+	}
+
+out:
+	spin_unlock(&mvdev->cvq.iommu_lock);
+	return err;
+}
+
+static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
 {
-	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
-	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	bool change_map;
 	int err;
 
-	down_write(&ndev->reslock);
-
 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
 	if (err) {
 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
-		goto err;
+		return err;
 	}
 
 	if (change_map)
 		err = mlx5_vdpa_change_map(mvdev, iotlb);
 
-err:
+	return err;
+}
+
+static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
+			     struct vhost_iotlb *iotlb)
+{
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	u32 group;
+	int err;
+
+	down_write(&ndev->reslock);
+	group = get_group(mvdev, asid);
+	switch (group) {
+	case MLX5_VDPA_DATAVQ_GROUP:
+		err = set_map_data(mvdev, iotlb);
+		break;
+	case MLX5_VDPA_CVQ_GROUP:
+		err = set_map_control(mvdev, iotlb);
+		break;
+	default:
+		err = -EINVAL;
+	}
 	up_write(&ndev->reslock);
 	return err;
 }
@@ -2796,6 +2862,18 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
 	return 0;
 }
 
+static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
+			       unsigned int asid)
+{
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
+		return -EINVAL;
+
+	mvdev->group2asid[group] = asid;
+	return 0;
+}
+
 static const struct vdpa_config_ops mlx5_vdpa_ops = {
 	.set_vq_address = mlx5_vdpa_set_vq_address,
 	.set_vq_num = mlx5_vdpa_set_vq_num,
@@ -2825,6 +2903,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
 	.set_config = mlx5_vdpa_set_config,
 	.get_generation = mlx5_vdpa_get_generation,
 	.set_map = mlx5_vdpa_set_map,
+	.set_group_asid = mlx5_set_group_asid,
 	.free = mlx5_vdpa_free,
 	.suspend = mlx5_vdpa_suspend,
 };
@@ -3047,7 +3126,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
 	}
 
 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-				 1, 1, name, false);
+				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
 	if (IS_ERR(ndev))
 		return PTR_ERR(ndev);
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration
  2022-06-16 13:27 [PATCH RFC 0/3] Support live migration with mlx5_vdpa Eli Cohen
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
  2022-06-16 13:27 ` [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data Eli Cohen
@ 2022-06-16 13:27 ` Eli Cohen
  2022-06-20  8:47     ` Jason Wang
  2 siblings, 1 reply; 42+ messages in thread
From: Eli Cohen @ 2022-06-16 13:27 UTC (permalink / raw)
  To: eperezma, jasowang, mst, virtualization
  Cc: linux-kernel, si-wei.liu, parav, Eli Cohen

Current qemu code does not support live migration for devices supporting
VLAN. Disable it.

Note: this patch is provided just to enable testing with current qemu.

Signed-off-by: Eli Cohen <elic@nvidia.com>
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 34bd81cb697c..1568cfdf07e6 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2172,7 +2172,6 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
-	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
 
 	return mlx_vdpa_features;
 }
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
@ 2022-06-16 17:12   ` kernel test robot
  2022-06-19 16:33   ` Eugenio Perez Martin
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 42+ messages in thread
From: kernel test robot @ 2022-06-16 17:12 UTC (permalink / raw)
  To: Eli Cohen; +Cc: llvm, kbuild-all

Hi Eli,

[FYI, it's a private test report for your RFC patch.]
[auto build test ERROR on linus/master]
[also build test ERROR on v5.19-rc2 next-20220616]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Eli-Cohen/Support-live-migration-with-mlx5_vdpa/20220616-213010
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 30306f6194cadcc29c77f6ddcd416a75bf5c0232
config: riscv-randconfig-r042-20220616 (https://download.01.org/0day-ci/archive/20220617/202206170119.3hXbEXxz-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project f0e608de27b3d568000046eebf3712ab542979d6)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/3c2592433495f8e099adf6734416c288e847c8f8
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Eli-Cohen/Support-live-migration-with-mlx5_vdpa/20220616-213010
        git checkout 3c2592433495f8e099adf6734416c288e847c8f8
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/vdpa/mlx5/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/vdpa/mlx5/net/mlx5_vnet.c:5:
   In file included from include/linux/vdpa.h:7:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/riscv/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/riscv/include/asm/io.h:136:
   include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
                                                     ^
   In file included from drivers/vdpa/mlx5/net/mlx5_vnet.c:5:
   In file included from include/linux/vdpa.h:7:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/riscv/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/riscv/include/asm/io.h:136:
   include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
                                                     ^
   In file included from drivers/vdpa/mlx5/net/mlx5_vnet.c:5:
   In file included from include/linux/vdpa.h:7:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/riscv/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/riscv/include/asm/io.h:136:
   include/asm-generic/io.h:501:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:511:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:521:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:1024:55: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port;
                                                     ~~~~~~~~~~ ^
>> drivers/vdpa/mlx5/net/mlx5_vnet.c:2819:3: error: field designator 'suspend' does not refer to any field in type 'const struct vdpa_config_ops'
           .suspend = mlx5_vdpa_suspend,
            ^
   7 warnings and 1 error generated.


vim +2819 drivers/vdpa/mlx5/net/mlx5_vnet.c

  2788	
  2789	static const struct vdpa_config_ops mlx5_vdpa_ops = {
  2790		.set_vq_address = mlx5_vdpa_set_vq_address,
  2791		.set_vq_num = mlx5_vdpa_set_vq_num,
  2792		.kick_vq = mlx5_vdpa_kick_vq,
  2793		.set_vq_cb = mlx5_vdpa_set_vq_cb,
  2794		.set_vq_ready = mlx5_vdpa_set_vq_ready,
  2795		.get_vq_ready = mlx5_vdpa_get_vq_ready,
  2796		.set_vq_state = mlx5_vdpa_set_vq_state,
  2797		.get_vq_state = mlx5_vdpa_get_vq_state,
  2798		.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
  2799		.get_vq_notification = mlx5_get_vq_notification,
  2800		.get_vq_irq = mlx5_get_vq_irq,
  2801		.get_vq_align = mlx5_vdpa_get_vq_align,
  2802		.get_vq_group = mlx5_vdpa_get_vq_group,
  2803		.get_device_features = mlx5_vdpa_get_device_features,
  2804		.set_driver_features = mlx5_vdpa_set_driver_features,
  2805		.get_driver_features = mlx5_vdpa_get_driver_features,
  2806		.set_config_cb = mlx5_vdpa_set_config_cb,
  2807		.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
  2808		.get_device_id = mlx5_vdpa_get_device_id,
  2809		.get_vendor_id = mlx5_vdpa_get_vendor_id,
  2810		.get_status = mlx5_vdpa_get_status,
  2811		.set_status = mlx5_vdpa_set_status,
  2812		.reset = mlx5_vdpa_reset,
  2813		.get_config_size = mlx5_vdpa_get_config_size,
  2814		.get_config = mlx5_vdpa_get_config,
  2815		.set_config = mlx5_vdpa_set_config,
  2816		.get_generation = mlx5_vdpa_get_generation,
  2817		.set_map = mlx5_vdpa_set_map,
  2818		.free = mlx5_vdpa_free,
> 2819		.suspend = mlx5_vdpa_suspend,
  2820	};
  2821	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
  2022-06-16 17:12   ` kernel test robot
@ 2022-06-19 16:33   ` Eugenio Perez Martin
  2022-06-20  8:56     ` Jason Wang
  2022-06-20 10:07   ` Eugenio Perez Martin
  3 siblings, 0 replies; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-19 16:33 UTC (permalink / raw)
  To: Eli Cohen
  Cc: Jason Wang, Michael Tsirkin, virtualization, linux-kernel,
	Si-Wei Liu, Parav Pandit

On Thu, Jun 16, 2022 at 3:27 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Implement the suspend callback allowing to suspend the virtqueues so
> they stop processing descriptors. This is required to allow the shadow
> virtqueue to kick in.
>

Maybe a more general description is "To get a meaningful virtqueue
state in live migration, trusting the device will not modify it from
the moment it is suspended"?

> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
>  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
>  2 files changed, 75 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index fb0b23e71383..ea4bc8a0cd25 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>         if (err)
>                 goto err_cmd;
>
> +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
>         kfree(in);
>         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
>
> @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
>                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
>                 return;
>         }
> +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
>         umems_destroy(ndev, mvq);
>  }
>
> @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
>         return err;
>  }
>
> +static bool is_valid_state_change(int oldstate, int newstate)
> +{
> +       switch (oldstate) {
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> +       default:
> +               return false;
> +       }
> +}
> +
>  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
>  {
>         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>         void *in;
>         int err;
>
> +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> +               return 0;
> +
> +       if (!is_valid_state_change(mvq->fw_state, state))
> +               return -EINVAL;
> +
>         in = kzalloc(inlen, GFP_KERNEL);
>         if (!in)
>                 return -ENOMEM;
> @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>         struct mlx5_vdpa_virtqueue *mvq;
> +       int err;
>
>         if (!mvdev->actual_features)
>                 return;
> @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         }
>
>         mvq = &ndev->vqs[idx];
> -       if (!ready)
> +       if (!ready) {
>                 suspend_vq(ndev, mvq);
> +       } else {
> +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> +               if (err) {
> +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> +                       ready = false;
> +               }
> +       }
> +
>
>         mvq->ready = ready;
>  }
> @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
>         return err;
>  }
>
> +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> +{
> +       struct mlx5_control_vq *cvq;
> +
> +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> +               return;
> +
> +       cvq = &mvdev->cvq;
> +       cvq->ready = !suspend;
> +}
> +
> +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       struct mlx5_vdpa_virtqueue *mvq;
> +       int i;
> +
> +       if (!suspend) {
> +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> +               return -EOPNOTSUPP;
> +       }
> +
> +       down_write(&ndev->reslock);
> +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> +               mvq = &ndev->vqs[i];
> +               suspend_vq(ndev, mvq);
> +       }
> +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> +       up_write(&ndev->reslock);
> +       return 0;
> +}
> +
>  static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_vq_address = mlx5_vdpa_set_vq_address,
>         .set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
>         .free = mlx5_vdpa_free,
> +       .suspend = mlx5_vdpa_suspend,
>  };
>
>  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
>                 mvq->index = i;
>                 mvq->ndev = ndev;
>                 mvq->fwqp.fw = true;
> +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
>         }
>         for (; i < ndev->mvdev.max_vqs; i++) {
>                 mvq = &ndev->vqs[i];
> diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> index 4414ed5b6ed2..423562f39d3c 100644
> --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> @@ -150,6 +150,14 @@ enum {
>         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
>  };
>
> +/* This indicates that the object was not created or has alreadyi
> + * been desroyed. It is very safe to assume that this object will never

Small typos: "already been destroyed".

> + * have so many states
> + */
> +enum {
> +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> +};
> +
>  enum {
>         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
>         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> --
> 2.35.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data
  2022-06-16 13:27 ` [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data Eli Cohen
@ 2022-06-20  8:47     ` Jason Wang
  2022-06-20  8:57   ` Eugenio Perez Martin
  1 sibling, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  8:47 UTC (permalink / raw)
  To: Eli Cohen
  Cc: eperezma, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Partition virtqueues to two different address spaces: oce for control

Typo, should be "one"

> virtqueue which is implemented in software, and one for data virtqueus.

And should be "virtqueues".

Other than this.

Acked-by: Jason Wang <jasowang@redhat.com>

>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++++
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 101 +++++++++++++++++++++++++----
>  2 files changed, 101 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 44104093163b..6af9fdbb86b7 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
>         struct mlx5_vdpa_dev *mvdev;
>  };
>
> +enum {
> +       MLX5_VDPA_DATAVQ_GROUP,
> +       MLX5_VDPA_CVQ_GROUP,
> +       MLX5_VDPA_NUMVQ_GROUPS
> +};
> +
> +enum {
> +       MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
> +};
> +
>  struct mlx5_vdpa_dev {
>         struct vdpa_device vdev;
>         struct mlx5_core_dev *mdev;
> @@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
>         struct mlx5_vdpa_mr mr;
>         struct mlx5_control_vq cvq;
>         struct workqueue_struct *wq;
> +       unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
>  };
>
>  int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index ea4bc8a0cd25..34bd81cb697c 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2125,9 +2125,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
>         return PAGE_SIZE;
>  }
>
> -static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
> +static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
>  {
> -       return 0;
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +
> +       if (is_ctrl_vq_idx(mvdev, idx))
> +               return MLX5_VDPA_CVQ_GROUP;
> +
> +       return MLX5_VDPA_DATAVQ_GROUP;
>  }
>
>  enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
> @@ -2541,6 +2546,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>         up_write(&ndev->reslock);
>  }
>
> +static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
> +{
> +       int i;
> +
> +       /* default mapping all groups are mapped to asid 0 */
> +       for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
> +               mvdev->group2asid[i] = 0;
> +}
> +
>  static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>  {
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -2559,7 +2573,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>         ndev->mvdev.cvq.completed_desc = 0;
>         memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>         ndev->mvdev.actual_features = 0;
> +       init_group_to_asid_map(mvdev);
>         ++mvdev->generation;
> +
>         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>                 if (mlx5_vdpa_create_mr(mvdev, NULL))
>                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> @@ -2597,26 +2613,76 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
>         return mvdev->generation;
>  }
>
> -static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> -                            struct vhost_iotlb *iotlb)
> +static u32 get_group(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> +{
> +       u32 group;
> +
> +       for (group = 0; group < MLX5_VDPA_NUMVQ_GROUPS; group++) {
> +               if (mvdev->group2asid[group] == asid)
> +                       return group;
> +       }
> +       return -EINVAL;
> +}
> +
> +static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +{
> +       u64 start = 0ULL, last = 0ULL - 1;
> +       struct vhost_iotlb_map *map;
> +       int err = 0;
> +
> +       spin_lock(&mvdev->cvq.iommu_lock);
> +       vhost_iotlb_reset(mvdev->cvq.iotlb);
> +
> +       for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
> +            map = vhost_iotlb_itree_next(map, start, last)) {
> +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
> +                                           map->last, map->addr, map->perm);
> +               if (err)
> +                       goto out;
> +       }
> +
> +out:
> +       spin_unlock(&mvdev->cvq.iommu_lock);
> +       return err;
> +}
> +
> +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>  {
> -       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> -       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>         bool change_map;
>         int err;
>
> -       down_write(&ndev->reslock);
> -
>         err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
>         if (err) {
>                 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
> -               goto err;
> +               return err;
>         }
>
>         if (change_map)
>                 err = mlx5_vdpa_change_map(mvdev, iotlb);
>
> -err:
> +       return err;
> +}
> +
> +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> +                            struct vhost_iotlb *iotlb)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       u32 group;
> +       int err;
> +
> +       down_write(&ndev->reslock);
> +       group = get_group(mvdev, asid);
> +       switch (group) {
> +       case MLX5_VDPA_DATAVQ_GROUP:
> +               err = set_map_data(mvdev, iotlb);
> +               break;
> +       case MLX5_VDPA_CVQ_GROUP:
> +               err = set_map_control(mvdev, iotlb);
> +               break;
> +       default:
> +               err = -EINVAL;
> +       }
>         up_write(&ndev->reslock);
>         return err;
>  }
> @@ -2796,6 +2862,18 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
>         return 0;
>  }
>
> +static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
> +                              unsigned int asid)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +
> +       if (group >= MLX5_VDPA_NUMVQ_GROUPS)
> +               return -EINVAL;
> +
> +       mvdev->group2asid[group] = asid;
> +       return 0;
> +}
> +
>  static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_vq_address = mlx5_vdpa_set_vq_address,
>         .set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2825,6 +2903,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_config = mlx5_vdpa_set_config,
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
> +       .set_group_asid = mlx5_set_group_asid,
>         .free = mlx5_vdpa_free,
>         .suspend = mlx5_vdpa_suspend,
>  };
> @@ -3047,7 +3126,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>         }
>
>         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> -                                1, 1, name, false);
> +                                MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
>         if (IS_ERR(ndev))
>                 return PTR_ERR(ndev);
>
> --
> 2.35.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data
@ 2022-06-20  8:47     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  8:47 UTC (permalink / raw)
  To: Eli Cohen; +Cc: mst, linux-kernel, virtualization, eperezma

On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Partition virtqueues to two different address spaces: oce for control

Typo, should be "one"

> virtqueue which is implemented in software, and one for data virtqueus.

And should be "virtqueues".

Other than this.

Acked-by: Jason Wang <jasowang@redhat.com>

>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++++
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 101 +++++++++++++++++++++++++----
>  2 files changed, 101 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 44104093163b..6af9fdbb86b7 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
>         struct mlx5_vdpa_dev *mvdev;
>  };
>
> +enum {
> +       MLX5_VDPA_DATAVQ_GROUP,
> +       MLX5_VDPA_CVQ_GROUP,
> +       MLX5_VDPA_NUMVQ_GROUPS
> +};
> +
> +enum {
> +       MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
> +};
> +
>  struct mlx5_vdpa_dev {
>         struct vdpa_device vdev;
>         struct mlx5_core_dev *mdev;
> @@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
>         struct mlx5_vdpa_mr mr;
>         struct mlx5_control_vq cvq;
>         struct workqueue_struct *wq;
> +       unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
>  };
>
>  int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index ea4bc8a0cd25..34bd81cb697c 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2125,9 +2125,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
>         return PAGE_SIZE;
>  }
>
> -static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
> +static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
>  {
> -       return 0;
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +
> +       if (is_ctrl_vq_idx(mvdev, idx))
> +               return MLX5_VDPA_CVQ_GROUP;
> +
> +       return MLX5_VDPA_DATAVQ_GROUP;
>  }
>
>  enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
> @@ -2541,6 +2546,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>         up_write(&ndev->reslock);
>  }
>
> +static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
> +{
> +       int i;
> +
> +       /* default mapping all groups are mapped to asid 0 */
> +       for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
> +               mvdev->group2asid[i] = 0;
> +}
> +
>  static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>  {
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -2559,7 +2573,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>         ndev->mvdev.cvq.completed_desc = 0;
>         memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>         ndev->mvdev.actual_features = 0;
> +       init_group_to_asid_map(mvdev);
>         ++mvdev->generation;
> +
>         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>                 if (mlx5_vdpa_create_mr(mvdev, NULL))
>                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> @@ -2597,26 +2613,76 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
>         return mvdev->generation;
>  }
>
> -static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> -                            struct vhost_iotlb *iotlb)
> +static u32 get_group(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> +{
> +       u32 group;
> +
> +       for (group = 0; group < MLX5_VDPA_NUMVQ_GROUPS; group++) {
> +               if (mvdev->group2asid[group] == asid)
> +                       return group;
> +       }
> +       return -EINVAL;
> +}
> +
> +static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +{
> +       u64 start = 0ULL, last = 0ULL - 1;
> +       struct vhost_iotlb_map *map;
> +       int err = 0;
> +
> +       spin_lock(&mvdev->cvq.iommu_lock);
> +       vhost_iotlb_reset(mvdev->cvq.iotlb);
> +
> +       for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
> +            map = vhost_iotlb_itree_next(map, start, last)) {
> +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
> +                                           map->last, map->addr, map->perm);
> +               if (err)
> +                       goto out;
> +       }
> +
> +out:
> +       spin_unlock(&mvdev->cvq.iommu_lock);
> +       return err;
> +}
> +
> +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>  {
> -       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> -       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>         bool change_map;
>         int err;
>
> -       down_write(&ndev->reslock);
> -
>         err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
>         if (err) {
>                 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
> -               goto err;
> +               return err;
>         }
>
>         if (change_map)
>                 err = mlx5_vdpa_change_map(mvdev, iotlb);
>
> -err:
> +       return err;
> +}
> +
> +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> +                            struct vhost_iotlb *iotlb)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       u32 group;
> +       int err;
> +
> +       down_write(&ndev->reslock);
> +       group = get_group(mvdev, asid);
> +       switch (group) {
> +       case MLX5_VDPA_DATAVQ_GROUP:
> +               err = set_map_data(mvdev, iotlb);
> +               break;
> +       case MLX5_VDPA_CVQ_GROUP:
> +               err = set_map_control(mvdev, iotlb);
> +               break;
> +       default:
> +               err = -EINVAL;
> +       }
>         up_write(&ndev->reslock);
>         return err;
>  }
> @@ -2796,6 +2862,18 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
>         return 0;
>  }
>
> +static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
> +                              unsigned int asid)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +
> +       if (group >= MLX5_VDPA_NUMVQ_GROUPS)
> +               return -EINVAL;
> +
> +       mvdev->group2asid[group] = asid;
> +       return 0;
> +}
> +
>  static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_vq_address = mlx5_vdpa_set_vq_address,
>         .set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2825,6 +2903,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_config = mlx5_vdpa_set_config,
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
> +       .set_group_asid = mlx5_set_group_asid,
>         .free = mlx5_vdpa_free,
>         .suspend = mlx5_vdpa_suspend,
>  };
> @@ -3047,7 +3126,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>         }
>
>         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> -                                1, 1, name, false);
> +                                MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
>         if (IS_ERR(ndev))
>                 return PTR_ERR(ndev);
>
> --
> 2.35.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration
  2022-06-16 13:27 ` [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration Eli Cohen
@ 2022-06-20  8:47     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  8:47 UTC (permalink / raw)
  To: Eli Cohen
  Cc: eperezma, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Thu, Jun 16, 2022 at 9:28 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Current qemu code does not support live migration for devices supporting
> VLAN. Disable it.

This looks like a bug that we need to fix in Qemu.

Thanks

>
> Note: this patch is provided just to enable testing with current qemu.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 34bd81cb697c..1568cfdf07e6 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2172,7 +2172,6 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
>         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
>         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
> -       mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
>
>         return mlx_vdpa_features;
>  }
> --
> 2.35.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration
@ 2022-06-20  8:47     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  8:47 UTC (permalink / raw)
  To: Eli Cohen; +Cc: mst, linux-kernel, virtualization, eperezma

On Thu, Jun 16, 2022 at 9:28 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Current qemu code does not support live migration for devices supporting
> VLAN. Disable it.

This looks like a bug that we need to fix in Qemu.

Thanks

>
> Note: this patch is provided just to enable testing with current qemu.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 34bd81cb697c..1568cfdf07e6 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2172,7 +2172,6 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
>         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
>         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
> -       mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
>
>         return mlx_vdpa_features;
>  }
> --
> 2.35.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
@ 2022-06-20  8:56     ` Jason Wang
  2022-06-19 16:33   ` Eugenio Perez Martin
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  8:56 UTC (permalink / raw)
  To: Eli Cohen
  Cc: eperezma, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Implement the suspend callback allowing to suspend the virtqueues so
> they stop processing descriptors. This is required to allow the shadow
> virtqueue to kick in.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
>  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
>  2 files changed, 75 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index fb0b23e71383..ea4bc8a0cd25 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>         if (err)
>                 goto err_cmd;
>
> +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
>         kfree(in);
>         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
>
> @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
>                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
>                 return;
>         }
> +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
>         umems_destroy(ndev, mvq);
>  }
>
> @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
>         return err;
>  }
>
> +static bool is_valid_state_change(int oldstate, int newstate)
> +{
> +       switch (oldstate) {
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> +       default:
> +               return false;
> +       }
> +}
> +
>  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
>  {
>         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>         void *in;
>         int err;
>
> +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> +               return 0;
> +
> +       if (!is_valid_state_change(mvq->fw_state, state))
> +               return -EINVAL;
> +
>         in = kzalloc(inlen, GFP_KERNEL);
>         if (!in)
>                 return -ENOMEM;
> @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>         struct mlx5_vdpa_virtqueue *mvq;
> +       int err;
>
>         if (!mvdev->actual_features)
>                 return;
> @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         }
>
>         mvq = &ndev->vqs[idx];
> -       if (!ready)
> +       if (!ready) {
>                 suspend_vq(ndev, mvq);
> +       } else {
> +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> +               if (err) {
> +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> +                       ready = false;
> +               }
> +       }
> +
>
>         mvq->ready = ready;
>  }
> @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
>         return err;
>  }
>
> +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> +{
> +       struct mlx5_control_vq *cvq;
> +
> +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> +               return;
> +
> +       cvq = &mvdev->cvq;
> +       cvq->ready = !suspend;
> +}

It looks to me we need to synchronize this with reslock. And this
probably deserve a dedicated fix.

> +
> +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       struct mlx5_vdpa_virtqueue *mvq;
> +       int i;
> +
> +       if (!suspend) {
> +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> +               return -EOPNOTSUPP;
> +       }
> +
> +       down_write(&ndev->reslock);
> +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> +               mvq = &ndev->vqs[i];
> +               suspend_vq(ndev, mvq);
> +       }
> +       mlx5_vdpa_cvq_suspend(mvdev, suspend);

Do we need to synchronize with the carrier work here? Otherwise we may
get config notification after suspending.

> +       up_write(&ndev->reslock);
> +       return 0;
> +}
> +
>  static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_vq_address = mlx5_vdpa_set_vq_address,
>         .set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
>         .free = mlx5_vdpa_free,
> +       .suspend = mlx5_vdpa_suspend,

I don't see the vDPA bus patch to enable this method. Or anything I missed here?

Thanks

>  };
>
>  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
>                 mvq->index = i;
>                 mvq->ndev = ndev;
>                 mvq->fwqp.fw = true;
> +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
>         }
>         for (; i < ndev->mvdev.max_vqs; i++) {
>                 mvq = &ndev->vqs[i];
> diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> index 4414ed5b6ed2..423562f39d3c 100644
> --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> @@ -150,6 +150,14 @@ enum {
>         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
>  };
>
> +/* This indicates that the object was not created or has alreadyi
> + * been desroyed. It is very safe to assume that this object will never
> + * have so many states
> + */
> +enum {
> +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> +};
> +
>  enum {
>         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
>         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> --
> 2.35.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-06-20  8:56     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  8:56 UTC (permalink / raw)
  To: Eli Cohen; +Cc: mst, linux-kernel, virtualization, eperezma

On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Implement the suspend callback allowing to suspend the virtqueues so
> they stop processing descriptors. This is required to allow the shadow
> virtqueue to kick in.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
>  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
>  2 files changed, 75 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index fb0b23e71383..ea4bc8a0cd25 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>         if (err)
>                 goto err_cmd;
>
> +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
>         kfree(in);
>         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
>
> @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
>                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
>                 return;
>         }
> +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
>         umems_destroy(ndev, mvq);
>  }
>
> @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
>         return err;
>  }
>
> +static bool is_valid_state_change(int oldstate, int newstate)
> +{
> +       switch (oldstate) {
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> +       default:
> +               return false;
> +       }
> +}
> +
>  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
>  {
>         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>         void *in;
>         int err;
>
> +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> +               return 0;
> +
> +       if (!is_valid_state_change(mvq->fw_state, state))
> +               return -EINVAL;
> +
>         in = kzalloc(inlen, GFP_KERNEL);
>         if (!in)
>                 return -ENOMEM;
> @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>         struct mlx5_vdpa_virtqueue *mvq;
> +       int err;
>
>         if (!mvdev->actual_features)
>                 return;
> @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         }
>
>         mvq = &ndev->vqs[idx];
> -       if (!ready)
> +       if (!ready) {
>                 suspend_vq(ndev, mvq);
> +       } else {
> +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> +               if (err) {
> +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> +                       ready = false;
> +               }
> +       }
> +
>
>         mvq->ready = ready;
>  }
> @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
>         return err;
>  }
>
> +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> +{
> +       struct mlx5_control_vq *cvq;
> +
> +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> +               return;
> +
> +       cvq = &mvdev->cvq;
> +       cvq->ready = !suspend;
> +}

It looks to me we need to synchronize this with reslock. And this
probably deserve a dedicated fix.

> +
> +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       struct mlx5_vdpa_virtqueue *mvq;
> +       int i;
> +
> +       if (!suspend) {
> +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> +               return -EOPNOTSUPP;
> +       }
> +
> +       down_write(&ndev->reslock);
> +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> +               mvq = &ndev->vqs[i];
> +               suspend_vq(ndev, mvq);
> +       }
> +       mlx5_vdpa_cvq_suspend(mvdev, suspend);

Do we need to synchronize with the carrier work here? Otherwise we may
get config notification after suspending.

> +       up_write(&ndev->reslock);
> +       return 0;
> +}
> +
>  static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_vq_address = mlx5_vdpa_set_vq_address,
>         .set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
>         .free = mlx5_vdpa_free,
> +       .suspend = mlx5_vdpa_suspend,

I don't see the vDPA bus patch to enable this method. Or anything I missed here?

Thanks

>  };
>
>  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
>                 mvq->index = i;
>                 mvq->ndev = ndev;
>                 mvq->fwqp.fw = true;
> +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
>         }
>         for (; i < ndev->mvdev.max_vqs; i++) {
>                 mvq = &ndev->vqs[i];
> diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> index 4414ed5b6ed2..423562f39d3c 100644
> --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> @@ -150,6 +150,14 @@ enum {
>         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
>  };
>
> +/* This indicates that the object was not created or has alreadyi
> + * been desroyed. It is very safe to assume that this object will never
> + * have so many states
> + */
> +enum {
> +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> +};
> +
>  enum {
>         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
>         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> --
> 2.35.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data
  2022-06-16 13:27 ` [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data Eli Cohen
  2022-06-20  8:47     ` Jason Wang
@ 2022-06-20  8:57   ` Eugenio Perez Martin
  2022-06-20  9:20       ` Jason Wang
  1 sibling, 1 reply; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-20  8:57 UTC (permalink / raw)
  To: Eli Cohen
  Cc: Jason Wang, Michael Tsirkin, virtualization, linux-kernel,
	Si-Wei Liu, Parav Pandit

On Thu, Jun 16, 2022 at 3:27 PM Eli Cohen <elic@nvidia.com> wrote:
>
> Partition virtqueues to two different address spaces: oce for control
> virtqueue which is implemented in software, and one for data virtqueus.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++++
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 101 +++++++++++++++++++++++++----
>  2 files changed, 101 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 44104093163b..6af9fdbb86b7 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
>         struct mlx5_vdpa_dev *mvdev;
>  };
>
> +enum {
> +       MLX5_VDPA_DATAVQ_GROUP,
> +       MLX5_VDPA_CVQ_GROUP,
> +       MLX5_VDPA_NUMVQ_GROUPS
> +};
> +
> +enum {
> +       MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
> +};
> +
>  struct mlx5_vdpa_dev {
>         struct vdpa_device vdev;
>         struct mlx5_core_dev *mdev;
> @@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
>         struct mlx5_vdpa_mr mr;
>         struct mlx5_control_vq cvq;
>         struct workqueue_struct *wq;
> +       unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
>  };
>
>  int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index ea4bc8a0cd25..34bd81cb697c 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2125,9 +2125,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
>         return PAGE_SIZE;
>  }
>
> -static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
> +static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
>  {
> -       return 0;
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +
> +       if (is_ctrl_vq_idx(mvdev, idx))
> +               return MLX5_VDPA_CVQ_GROUP;
> +
> +       return MLX5_VDPA_DATAVQ_GROUP;
>  }
>
>  enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
> @@ -2541,6 +2546,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>         up_write(&ndev->reslock);
>  }
>
> +static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
> +{
> +       int i;
> +
> +       /* default mapping all groups are mapped to asid 0 */
> +       for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
> +               mvdev->group2asid[i] = 0;
> +}
> +
>  static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>  {
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -2559,7 +2573,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>         ndev->mvdev.cvq.completed_desc = 0;
>         memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>         ndev->mvdev.actual_features = 0;
> +       init_group_to_asid_map(mvdev);
>         ++mvdev->generation;
> +
>         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>                 if (mlx5_vdpa_create_mr(mvdev, NULL))
>                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> @@ -2597,26 +2613,76 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
>         return mvdev->generation;
>  }
>
> -static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> -                            struct vhost_iotlb *iotlb)
> +static u32 get_group(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> +{
> +       u32 group;
> +
> +       for (group = 0; group < MLX5_VDPA_NUMVQ_GROUPS; group++) {
> +               if (mvdev->group2asid[group] == asid)
> +                       return group;
> +       }
> +       return -EINVAL;
> +}
> +
> +static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +{
> +       u64 start = 0ULL, last = 0ULL - 1;
> +       struct vhost_iotlb_map *map;
> +       int err = 0;
> +
> +       spin_lock(&mvdev->cvq.iommu_lock);
> +       vhost_iotlb_reset(mvdev->cvq.iotlb);
> +
> +       for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
> +            map = vhost_iotlb_itree_next(map, start, last)) {
> +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
> +                                           map->last, map->addr, map->perm);
> +               if (err)
> +                       goto out;
> +       }
> +
> +out:
> +       spin_unlock(&mvdev->cvq.iommu_lock);
> +       return err;
> +}
> +
> +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>  {
> -       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> -       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>         bool change_map;
>         int err;
>
> -       down_write(&ndev->reslock);
> -
>         err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
>         if (err) {
>                 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
> -               goto err;
> +               return err;
>         }
>
>         if (change_map)
>                 err = mlx5_vdpa_change_map(mvdev, iotlb);
>
> -err:
> +       return err;
> +}
> +
> +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> +                            struct vhost_iotlb *iotlb)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       u32 group;
> +       int err;
> +
> +       down_write(&ndev->reslock);
> +       group = get_group(mvdev, asid);
> +       switch (group) {
> +       case MLX5_VDPA_DATAVQ_GROUP:
> +               err = set_map_data(mvdev, iotlb);
> +               break;
> +       case MLX5_VDPA_CVQ_GROUP:
> +               err = set_map_control(mvdev, iotlb);
> +               break;
> +       default:
> +               err = -EINVAL;
> +       }

This shouldn't be a switch, but to check the asid assigned to the
different vqs individually.

In the current qemu version with no ASID support, all vq groups (data
and cvq) are assigned to asid 0 at the device reset. In this case,
emulated cvq also needs to receive the mappings, because guest's CVQ
commands will go from the guest's ASID directly.

Thanks!

>         up_write(&ndev->reslock);
>         return err;
>  }
> @@ -2796,6 +2862,18 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
>         return 0;
>  }
>
> +static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
> +                              unsigned int asid)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +
> +       if (group >= MLX5_VDPA_NUMVQ_GROUPS)
> +               return -EINVAL;
> +
> +       mvdev->group2asid[group] = asid;
> +       return 0;
> +}
> +
>  static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_vq_address = mlx5_vdpa_set_vq_address,
>         .set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2825,6 +2903,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>         .set_config = mlx5_vdpa_set_config,
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
> +       .set_group_asid = mlx5_set_group_asid,
>         .free = mlx5_vdpa_free,
>         .suspend = mlx5_vdpa_suspend,
>  };
> @@ -3047,7 +3126,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>         }
>
>         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> -                                1, 1, name, false);
> +                                MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
>         if (IS_ERR(ndev))
>                 return PTR_ERR(ndev);
>
> --
> 2.35.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration
  2022-06-20  8:47     ` Jason Wang
  (?)
@ 2022-06-20  9:01     ` Eugenio Perez Martin
  2022-06-20  9:25         ` Jason Wang
  -1 siblings, 1 reply; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-20  9:01 UTC (permalink / raw)
  To: Jason Wang
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Mon, Jun 20, 2022 at 10:48 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Jun 16, 2022 at 9:28 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > Current qemu code does not support live migration for devices supporting
> > VLAN. Disable it.
>
> This looks like a bug that we need to fix in Qemu.
>

Not a but, but a lack of a feature :). Each cvq command needs new code
to inject it at the destination, and only set mac cmd is implemented
at the moment. Only to start simple.

Thanks!

> Thanks
>
> >
> > Note: this patch is provided just to enable testing with current qemu.
> >
> > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 -
> >  1 file changed, 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index 34bd81cb697c..1568cfdf07e6 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -2172,7 +2172,6 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
> >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
> >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
> >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
> > -       mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
> >
> >         return mlx_vdpa_features;
> >  }
> > --
> > 2.35.1
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data
  2022-06-20  8:57   ` Eugenio Perez Martin
@ 2022-06-20  9:20       ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  9:20 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Michael Tsirkin, linux-kernel, virtualization, Eli Cohen

On Mon, Jun 20, 2022 at 4:58 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Thu, Jun 16, 2022 at 3:27 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > Partition virtqueues to two different address spaces: oce for control
> > virtqueue which is implemented in software, and one for data virtqueus.
> >
> > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++++
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 101 +++++++++++++++++++++++++----
> >  2 files changed, 101 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > index 44104093163b..6af9fdbb86b7 100644
> > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > @@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
> >         struct mlx5_vdpa_dev *mvdev;
> >  };
> >
> > +enum {
> > +       MLX5_VDPA_DATAVQ_GROUP,
> > +       MLX5_VDPA_CVQ_GROUP,
> > +       MLX5_VDPA_NUMVQ_GROUPS
> > +};
> > +
> > +enum {
> > +       MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
> > +};
> > +
> >  struct mlx5_vdpa_dev {
> >         struct vdpa_device vdev;
> >         struct mlx5_core_dev *mdev;
> > @@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
> >         struct mlx5_vdpa_mr mr;
> >         struct mlx5_control_vq cvq;
> >         struct workqueue_struct *wq;
> > +       unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
> >  };
> >
> >  int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index ea4bc8a0cd25..34bd81cb697c 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -2125,9 +2125,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
> >         return PAGE_SIZE;
> >  }
> >
> > -static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
> > +static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
> >  {
> > -       return 0;
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +
> > +       if (is_ctrl_vq_idx(mvdev, idx))
> > +               return MLX5_VDPA_CVQ_GROUP;
> > +
> > +       return MLX5_VDPA_DATAVQ_GROUP;
> >  }
> >
> >  enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
> > @@ -2541,6 +2546,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> >         up_write(&ndev->reslock);
> >  }
> >
> > +static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
> > +{
> > +       int i;
> > +
> > +       /* default mapping all groups are mapped to asid 0 */
> > +       for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
> > +               mvdev->group2asid[i] = 0;
> > +}
> > +
> >  static int mlx5_vdpa_reset(struct vdpa_device *vdev)
> >  {
> >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > @@ -2559,7 +2573,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
> >         ndev->mvdev.cvq.completed_desc = 0;
> >         memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
> >         ndev->mvdev.actual_features = 0;
> > +       init_group_to_asid_map(mvdev);
> >         ++mvdev->generation;
> > +
> >         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> >                 if (mlx5_vdpa_create_mr(mvdev, NULL))
> >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > @@ -2597,26 +2613,76 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
> >         return mvdev->generation;
> >  }
> >
> > -static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> > -                            struct vhost_iotlb *iotlb)
> > +static u32 get_group(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> > +{
> > +       u32 group;
> > +
> > +       for (group = 0; group < MLX5_VDPA_NUMVQ_GROUPS; group++) {
> > +               if (mvdev->group2asid[group] == asid)
> > +                       return group;
> > +       }
> > +       return -EINVAL;
> > +}
> > +
> > +static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > +{
> > +       u64 start = 0ULL, last = 0ULL - 1;
> > +       struct vhost_iotlb_map *map;
> > +       int err = 0;
> > +
> > +       spin_lock(&mvdev->cvq.iommu_lock);
> > +       vhost_iotlb_reset(mvdev->cvq.iotlb);
> > +
> > +       for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
> > +            map = vhost_iotlb_itree_next(map, start, last)) {
> > +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
> > +                                           map->last, map->addr, map->perm);
> > +               if (err)
> > +                       goto out;
> > +       }
> > +
> > +out:
> > +       spin_unlock(&mvdev->cvq.iommu_lock);
> > +       return err;
> > +}
> > +
> > +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> >  {
> > -       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > -       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >         bool change_map;
> >         int err;
> >
> > -       down_write(&ndev->reslock);
> > -
> >         err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
> >         if (err) {
> >                 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
> > -               goto err;
> > +               return err;
> >         }
> >
> >         if (change_map)
> >                 err = mlx5_vdpa_change_map(mvdev, iotlb);
> >
> > -err:
> > +       return err;
> > +}
> > +
> > +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> > +                            struct vhost_iotlb *iotlb)
> > +{
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > +       u32 group;
> > +       int err;
> > +
> > +       down_write(&ndev->reslock);
> > +       group = get_group(mvdev, asid);
> > +       switch (group) {
> > +       case MLX5_VDPA_DATAVQ_GROUP:
> > +               err = set_map_data(mvdev, iotlb);
> > +               break;
> > +       case MLX5_VDPA_CVQ_GROUP:
> > +               err = set_map_control(mvdev, iotlb);
> > +               break;
> > +       default:
> > +               err = -EINVAL;
> > +       }
>
> This shouldn't be a switch, but to check the asid assigned to the
> different vqs individually.
>
> In the current qemu version with no ASID support, all vq groups (data
> and cvq) are assigned to asid 0 at the device reset. In this case,
> emulated cvq also needs to receive the mappings, because guest's CVQ
> commands will go from the guest's ASID directly.

Ack.

Thanks

>
> Thanks!
>
> >         up_write(&ndev->reslock);
> >         return err;
> >  }
> > @@ -2796,6 +2862,18 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> >         return 0;
> >  }
> >
> > +static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
> > +                              unsigned int asid)
> > +{
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +
> > +       if (group >= MLX5_VDPA_NUMVQ_GROUPS)
> > +               return -EINVAL;
> > +
> > +       mvdev->group2asid[group] = asid;
> > +       return 0;
> > +}
> > +
> >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .set_vq_address = mlx5_vdpa_set_vq_address,
> >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > @@ -2825,6 +2903,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .set_config = mlx5_vdpa_set_config,
> >         .get_generation = mlx5_vdpa_get_generation,
> >         .set_map = mlx5_vdpa_set_map,
> > +       .set_group_asid = mlx5_set_group_asid,
> >         .free = mlx5_vdpa_free,
> >         .suspend = mlx5_vdpa_suspend,
> >  };
> > @@ -3047,7 +3126,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> >         }
> >
> >         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> > -                                1, 1, name, false);
> > +                                MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
> >         if (IS_ERR(ndev))
> >                 return PTR_ERR(ndev);
> >
> > --
> > 2.35.1
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data
@ 2022-06-20  9:20       ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  9:20 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Eli Cohen, Michael Tsirkin, virtualization, linux-kernel,
	Si-Wei Liu, Parav Pandit

On Mon, Jun 20, 2022 at 4:58 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Thu, Jun 16, 2022 at 3:27 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > Partition virtqueues to two different address spaces: oce for control
> > virtqueue which is implemented in software, and one for data virtqueus.
> >
> > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  11 ++++
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 101 +++++++++++++++++++++++++----
> >  2 files changed, 101 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > index 44104093163b..6af9fdbb86b7 100644
> > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > @@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
> >         struct mlx5_vdpa_dev *mvdev;
> >  };
> >
> > +enum {
> > +       MLX5_VDPA_DATAVQ_GROUP,
> > +       MLX5_VDPA_CVQ_GROUP,
> > +       MLX5_VDPA_NUMVQ_GROUPS
> > +};
> > +
> > +enum {
> > +       MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
> > +};
> > +
> >  struct mlx5_vdpa_dev {
> >         struct vdpa_device vdev;
> >         struct mlx5_core_dev *mdev;
> > @@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
> >         struct mlx5_vdpa_mr mr;
> >         struct mlx5_control_vq cvq;
> >         struct workqueue_struct *wq;
> > +       unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
> >  };
> >
> >  int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index ea4bc8a0cd25..34bd81cb697c 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -2125,9 +2125,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
> >         return PAGE_SIZE;
> >  }
> >
> > -static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
> > +static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
> >  {
> > -       return 0;
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +
> > +       if (is_ctrl_vq_idx(mvdev, idx))
> > +               return MLX5_VDPA_CVQ_GROUP;
> > +
> > +       return MLX5_VDPA_DATAVQ_GROUP;
> >  }
> >
> >  enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
> > @@ -2541,6 +2546,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> >         up_write(&ndev->reslock);
> >  }
> >
> > +static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
> > +{
> > +       int i;
> > +
> > +       /* default mapping all groups are mapped to asid 0 */
> > +       for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
> > +               mvdev->group2asid[i] = 0;
> > +}
> > +
> >  static int mlx5_vdpa_reset(struct vdpa_device *vdev)
> >  {
> >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > @@ -2559,7 +2573,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
> >         ndev->mvdev.cvq.completed_desc = 0;
> >         memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
> >         ndev->mvdev.actual_features = 0;
> > +       init_group_to_asid_map(mvdev);
> >         ++mvdev->generation;
> > +
> >         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> >                 if (mlx5_vdpa_create_mr(mvdev, NULL))
> >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > @@ -2597,26 +2613,76 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
> >         return mvdev->generation;
> >  }
> >
> > -static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> > -                            struct vhost_iotlb *iotlb)
> > +static u32 get_group(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> > +{
> > +       u32 group;
> > +
> > +       for (group = 0; group < MLX5_VDPA_NUMVQ_GROUPS; group++) {
> > +               if (mvdev->group2asid[group] == asid)
> > +                       return group;
> > +       }
> > +       return -EINVAL;
> > +}
> > +
> > +static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > +{
> > +       u64 start = 0ULL, last = 0ULL - 1;
> > +       struct vhost_iotlb_map *map;
> > +       int err = 0;
> > +
> > +       spin_lock(&mvdev->cvq.iommu_lock);
> > +       vhost_iotlb_reset(mvdev->cvq.iotlb);
> > +
> > +       for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
> > +            map = vhost_iotlb_itree_next(map, start, last)) {
> > +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
> > +                                           map->last, map->addr, map->perm);
> > +               if (err)
> > +                       goto out;
> > +       }
> > +
> > +out:
> > +       spin_unlock(&mvdev->cvq.iommu_lock);
> > +       return err;
> > +}
> > +
> > +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> >  {
> > -       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > -       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >         bool change_map;
> >         int err;
> >
> > -       down_write(&ndev->reslock);
> > -
> >         err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
> >         if (err) {
> >                 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
> > -               goto err;
> > +               return err;
> >         }
> >
> >         if (change_map)
> >                 err = mlx5_vdpa_change_map(mvdev, iotlb);
> >
> > -err:
> > +       return err;
> > +}
> > +
> > +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
> > +                            struct vhost_iotlb *iotlb)
> > +{
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > +       u32 group;
> > +       int err;
> > +
> > +       down_write(&ndev->reslock);
> > +       group = get_group(mvdev, asid);
> > +       switch (group) {
> > +       case MLX5_VDPA_DATAVQ_GROUP:
> > +               err = set_map_data(mvdev, iotlb);
> > +               break;
> > +       case MLX5_VDPA_CVQ_GROUP:
> > +               err = set_map_control(mvdev, iotlb);
> > +               break;
> > +       default:
> > +               err = -EINVAL;
> > +       }
>
> This shouldn't be a switch, but to check the asid assigned to the
> different vqs individually.
>
> In the current qemu version with no ASID support, all vq groups (data
> and cvq) are assigned to asid 0 at the device reset. In this case,
> emulated cvq also needs to receive the mappings, because guest's CVQ
> commands will go from the guest's ASID directly.

Ack.

Thanks

>
> Thanks!
>
> >         up_write(&ndev->reslock);
> >         return err;
> >  }
> > @@ -2796,6 +2862,18 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> >         return 0;
> >  }
> >
> > +static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
> > +                              unsigned int asid)
> > +{
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +
> > +       if (group >= MLX5_VDPA_NUMVQ_GROUPS)
> > +               return -EINVAL;
> > +
> > +       mvdev->group2asid[group] = asid;
> > +       return 0;
> > +}
> > +
> >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .set_vq_address = mlx5_vdpa_set_vq_address,
> >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > @@ -2825,6 +2903,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .set_config = mlx5_vdpa_set_config,
> >         .get_generation = mlx5_vdpa_get_generation,
> >         .set_map = mlx5_vdpa_set_map,
> > +       .set_group_asid = mlx5_set_group_asid,
> >         .free = mlx5_vdpa_free,
> >         .suspend = mlx5_vdpa_suspend,
> >  };
> > @@ -3047,7 +3126,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> >         }
> >
> >         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> > -                                1, 1, name, false);
> > +                                MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
> >         if (IS_ERR(ndev))
> >                 return PTR_ERR(ndev);
> >
> > --
> > 2.35.1
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration
  2022-06-20  9:01     ` Eugenio Perez Martin
@ 2022-06-20  9:25         ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  9:25 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Mon, Jun 20, 2022 at 5:02 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Mon, Jun 20, 2022 at 10:48 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jun 16, 2022 at 9:28 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Current qemu code does not support live migration for devices supporting
> > > VLAN. Disable it.
> >
> > This looks like a bug that we need to fix in Qemu.
> >
>
> Not a but, but a lack of a feature :). Each cvq command needs new code
> to inject it at the destination, and only set mac cmd is implemented
> at the moment. Only to start simple.

I think we don't need this in the formal patch? (Anyhow we could
disable ctrl vlan vic command line)

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > > Note: this patch is provided just to enable testing with current qemu.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 -
> > >  1 file changed, 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index 34bd81cb697c..1568cfdf07e6 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -2172,7 +2172,6 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
> > >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
> > >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
> > >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
> > > -       mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
> > >
> > >         return mlx_vdpa_features;
> > >  }
> > > --
> > > 2.35.1
> > >
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration
@ 2022-06-20  9:25         ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-20  9:25 UTC (permalink / raw)
  To: Eugenio Perez Martin; +Cc: mst, linux-kernel, virtualization, Eli Cohen

On Mon, Jun 20, 2022 at 5:02 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Mon, Jun 20, 2022 at 10:48 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jun 16, 2022 at 9:28 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Current qemu code does not support live migration for devices supporting
> > > VLAN. Disable it.
> >
> > This looks like a bug that we need to fix in Qemu.
> >
>
> Not a but, but a lack of a feature :). Each cvq command needs new code
> to inject it at the destination, and only set mac cmd is implemented
> at the moment. Only to start simple.

I think we don't need this in the formal patch? (Anyhow we could
disable ctrl vlan vic command line)

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > > Note: this patch is provided just to enable testing with current qemu.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 -
> > >  1 file changed, 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index 34bd81cb697c..1568cfdf07e6 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -2172,7 +2172,6 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
> > >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
> > >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
> > >         mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
> > > -       mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
> > >
> > >         return mlx_vdpa_features;
> > >  }
> > > --
> > > 2.35.1
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-20  8:56     ` Jason Wang
  (?)
@ 2022-06-20  9:58     ` Eugenio Perez Martin
  2022-06-20 10:06         ` Michael S. Tsirkin
  2022-06-21  3:04         ` Jason Wang
  -1 siblings, 2 replies; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-20  9:58 UTC (permalink / raw)
  To: Jason Wang
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > Implement the suspend callback allowing to suspend the virtqueues so
> > they stop processing descriptors. This is required to allow the shadow
> > virtqueue to kick in.
> >
> > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> >  2 files changed, 75 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index fb0b23e71383..ea4bc8a0cd25 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> >         if (err)
> >                 goto err_cmd;
> >
> > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> >         kfree(in);
> >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> >
> > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> >                 return;
> >         }
> > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> >         umems_destroy(ndev, mvq);
> >  }
> >
> > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> >         return err;
> >  }
> >
> > +static bool is_valid_state_change(int oldstate, int newstate)
> > +{
> > +       switch (oldstate) {
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > +       default:
> > +               return false;
> > +       }
> > +}
> > +
> >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> >  {
> >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> >         void *in;
> >         int err;
> >
> > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > +               return 0;
> > +
> > +       if (!is_valid_state_change(mvq->fw_state, state))
> > +               return -EINVAL;
> > +
> >         in = kzalloc(inlen, GFP_KERNEL);
> >         if (!in)
> >                 return -ENOMEM;
> > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >         struct mlx5_vdpa_virtqueue *mvq;
> > +       int err;
> >
> >         if (!mvdev->actual_features)
> >                 return;
> > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> >         }
> >
> >         mvq = &ndev->vqs[idx];
> > -       if (!ready)
> > +       if (!ready) {
> >                 suspend_vq(ndev, mvq);
> > +       } else {
> > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > +               if (err) {
> > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > +                       ready = false;
> > +               }
> > +       }
> > +
> >
> >         mvq->ready = ready;
> >  }
> > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> >         return err;
> >  }
> >
> > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > +{
> > +       struct mlx5_control_vq *cvq;
> > +
> > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > +               return;
> > +
> > +       cvq = &mvdev->cvq;
> > +       cvq->ready = !suspend;
> > +}
>
> It looks to me we need to synchronize this with reslock. And this
> probably deserve a dedicated fix.
>
> > +
> > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > +{
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > +       struct mlx5_vdpa_virtqueue *mvq;
> > +       int i;
> > +
> > +       if (!suspend) {
> > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > +               return -EOPNOTSUPP;
> > +       }
> > +
> > +       down_write(&ndev->reslock);
> > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > +               mvq = &ndev->vqs[i];
> > +               suspend_vq(ndev, mvq);
> > +       }
> > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
>
> Do we need to synchronize with the carrier work here? Otherwise we may
> get config notification after suspending.
>
> > +       up_write(&ndev->reslock);
> > +       return 0;
> > +}
> > +
> >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .set_vq_address = mlx5_vdpa_set_vq_address,
> >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .get_generation = mlx5_vdpa_get_generation,
> >         .set_map = mlx5_vdpa_set_map,
> >         .free = mlx5_vdpa_free,
> > +       .suspend = mlx5_vdpa_suspend,
>
> I don't see the vDPA bus patch to enable this method. Or anything I missed here?
>

Should we add
Based-on: <20220526124338.36247-1-eperezma@redhat.com>

To this series?

> Thanks
>
> >  };
> >
> >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> >                 mvq->index = i;
> >                 mvq->ndev = ndev;
> >                 mvq->fwqp.fw = true;
> > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> >         }
> >         for (; i < ndev->mvdev.max_vqs; i++) {
> >                 mvq = &ndev->vqs[i];
> > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > index 4414ed5b6ed2..423562f39d3c 100644
> > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > @@ -150,6 +150,14 @@ enum {
> >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> >  };
> >
> > +/* This indicates that the object was not created or has alreadyi
> > + * been desroyed. It is very safe to assume that this object will never
> > + * have so many states
> > + */
> > +enum {
> > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > +};
> > +
> >  enum {
> >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > --
> > 2.35.1
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-20  9:58     ` Eugenio Perez Martin
@ 2022-06-20 10:06         ` Michael S. Tsirkin
  2022-06-21  3:04         ` Jason Wang
  1 sibling, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2022-06-20 10:06 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Jason Wang, Eli Cohen, virtualization, linux-kernel, Si-Wei Liu,
	Parav Pandit

On Mon, Jun 20, 2022 at 11:58:33AM +0200, Eugenio Perez Martin wrote:
> On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Implement the suspend callback allowing to suspend the virtqueues so
> > > they stop processing descriptors. This is required to allow the shadow
> > > virtqueue to kick in.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         if (err)
> > >                 goto err_cmd;
> > >
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > >         kfree(in);
> > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > >
> > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > >                 return;
> > >         }
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         umems_destroy(ndev, mvq);
> > >  }
> > >
> > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > >         return err;
> > >  }
> > >
> > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > +{
> > > +       switch (oldstate) {
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > +       default:
> > > +               return false;
> > > +       }
> > > +}
> > > +
> > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > >  {
> > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         void *in;
> > >         int err;
> > >
> > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > +               return 0;
> > > +
> > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > +               return -EINVAL;
> > > +
> > >         in = kzalloc(inlen, GFP_KERNEL);
> > >         if (!in)
> > >                 return -ENOMEM;
> > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > >         struct mlx5_vdpa_virtqueue *mvq;
> > > +       int err;
> > >
> > >         if (!mvdev->actual_features)
> > >                 return;
> > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         }
> > >
> > >         mvq = &ndev->vqs[idx];
> > > -       if (!ready)
> > > +       if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > +       } else {
> > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > +               if (err) {
> > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > +                       ready = false;
> > > +               }
> > > +       }
> > > +
> > >
> > >         mvq->ready = ready;
> > >  }
> > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > >         return err;
> > >  }
> > >
> > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > +{
> > > +       struct mlx5_control_vq *cvq;
> > > +
> > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > +               return;
> > > +
> > > +       cvq = &mvdev->cvq;
> > > +       cvq->ready = !suspend;
> > > +}
> >
> > It looks to me we need to synchronize this with reslock. And this
> > probably deserve a dedicated fix.
> >
> > > +
> > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > +{
> > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > +       int i;
> > > +
> > > +       if (!suspend) {
> > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > +               return -EOPNOTSUPP;
> > > +       }
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > +               mvq = &ndev->vqs[i];
> > > +               suspend_vq(ndev, mvq);
> > > +       }
> > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> >
> > Do we need to synchronize with the carrier work here? Otherwise we may
> > get config notification after suspending.
> >
> > > +       up_write(&ndev->reslock);
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .get_generation = mlx5_vdpa_get_generation,
> > >         .set_map = mlx5_vdpa_set_map,
> > >         .free = mlx5_vdpa_free,
> > > +       .suspend = mlx5_vdpa_suspend,
> >
> > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> >
> 
> Should we add
> Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> 
> To this series?

If it's based on your patch then mentioning this in the log and
including the S.O.B. is customary. what would this tag add?
was there relevant discussion?


> > Thanks
> >
> > >  };
> > >
> > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > >                 mvq->index = i;
> > >                 mvq->ndev = ndev;
> > >                 mvq->fwqp.fw = true;
> > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         }
> > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > >                 mvq = &ndev->vqs[i];
> > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > index 4414ed5b6ed2..423562f39d3c 100644
> > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > @@ -150,6 +150,14 @@ enum {
> > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > >  };
> > >
> > > +/* This indicates that the object was not created or has alreadyi
> > > + * been desroyed. It is very safe to assume that this object will never
> > > + * have so many states
> > > + */
> > > +enum {
> > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > +};
> > > +
> > >  enum {
> > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > --
> > > 2.35.1
> > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-06-20 10:06         ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2022-06-20 10:06 UTC (permalink / raw)
  To: Eugenio Perez Martin; +Cc: linux-kernel, virtualization, Eli Cohen

On Mon, Jun 20, 2022 at 11:58:33AM +0200, Eugenio Perez Martin wrote:
> On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Implement the suspend callback allowing to suspend the virtqueues so
> > > they stop processing descriptors. This is required to allow the shadow
> > > virtqueue to kick in.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         if (err)
> > >                 goto err_cmd;
> > >
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > >         kfree(in);
> > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > >
> > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > >                 return;
> > >         }
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         umems_destroy(ndev, mvq);
> > >  }
> > >
> > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > >         return err;
> > >  }
> > >
> > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > +{
> > > +       switch (oldstate) {
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > +       default:
> > > +               return false;
> > > +       }
> > > +}
> > > +
> > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > >  {
> > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         void *in;
> > >         int err;
> > >
> > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > +               return 0;
> > > +
> > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > +               return -EINVAL;
> > > +
> > >         in = kzalloc(inlen, GFP_KERNEL);
> > >         if (!in)
> > >                 return -ENOMEM;
> > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > >         struct mlx5_vdpa_virtqueue *mvq;
> > > +       int err;
> > >
> > >         if (!mvdev->actual_features)
> > >                 return;
> > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         }
> > >
> > >         mvq = &ndev->vqs[idx];
> > > -       if (!ready)
> > > +       if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > +       } else {
> > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > +               if (err) {
> > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > +                       ready = false;
> > > +               }
> > > +       }
> > > +
> > >
> > >         mvq->ready = ready;
> > >  }
> > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > >         return err;
> > >  }
> > >
> > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > +{
> > > +       struct mlx5_control_vq *cvq;
> > > +
> > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > +               return;
> > > +
> > > +       cvq = &mvdev->cvq;
> > > +       cvq->ready = !suspend;
> > > +}
> >
> > It looks to me we need to synchronize this with reslock. And this
> > probably deserve a dedicated fix.
> >
> > > +
> > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > +{
> > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > +       int i;
> > > +
> > > +       if (!suspend) {
> > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > +               return -EOPNOTSUPP;
> > > +       }
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > +               mvq = &ndev->vqs[i];
> > > +               suspend_vq(ndev, mvq);
> > > +       }
> > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> >
> > Do we need to synchronize with the carrier work here? Otherwise we may
> > get config notification after suspending.
> >
> > > +       up_write(&ndev->reslock);
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .get_generation = mlx5_vdpa_get_generation,
> > >         .set_map = mlx5_vdpa_set_map,
> > >         .free = mlx5_vdpa_free,
> > > +       .suspend = mlx5_vdpa_suspend,
> >
> > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> >
> 
> Should we add
> Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> 
> To this series?

If it's based on your patch then mentioning this in the log and
including the S.O.B. is customary. what would this tag add?
was there relevant discussion?


> > Thanks
> >
> > >  };
> > >
> > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > >                 mvq->index = i;
> > >                 mvq->ndev = ndev;
> > >                 mvq->fwqp.fw = true;
> > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         }
> > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > >                 mvq = &ndev->vqs[i];
> > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > index 4414ed5b6ed2..423562f39d3c 100644
> > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > @@ -150,6 +150,14 @@ enum {
> > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > >  };
> > >
> > > +/* This indicates that the object was not created or has alreadyi
> > > + * been desroyed. It is very safe to assume that this object will never
> > > + * have so many states
> > > + */
> > > +enum {
> > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > +};
> > > +
> > >  enum {
> > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > --
> > > 2.35.1
> > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
                     ` (2 preceding siblings ...)
  2022-06-20  8:56     ` Jason Wang
@ 2022-06-20 10:07   ` Eugenio Perez Martin
  3 siblings, 0 replies; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-20 10:07 UTC (permalink / raw)
  To: Eli Cohen
  Cc: Jason Wang, Michael Tsirkin, virtualization, linux-kernel,
	Si-Wei Liu, Parav Pandit

> +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       struct mlx5_vdpa_virtqueue *mvq;
> +       int i;
> +
> +       if (!suspend) {
> +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");

If the resume part it's a problem, maybe we can split the vdpa_sim
series so it only adds a callback to suspend() the device. If needed,
we can add a resume() later. suspend is the only operation we need to
perform LM.

Thoughts on this?

Thanks!

> +               return -EOPNOTSUPP;
> +       }
> +
> +       down_write(&ndev->reslock);
> +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> +               mvq = &ndev->vqs[i];
> +               suspend_vq(ndev, mvq);
> +       }
> +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> +       up_write(&ndev->reslock);
> +       return 0;
> +}
> +


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-20 10:06         ` Michael S. Tsirkin
  (?)
@ 2022-06-20 11:09         ` Eugenio Perez Martin
  -1 siblings, 0 replies; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-20 11:09 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jason Wang, Eli Cohen, virtualization, linux-kernel, Si-Wei Liu,
	Parav Pandit

On Mon, Jun 20, 2022 at 12:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jun 20, 2022 at 11:58:33AM +0200, Eugenio Perez Martin wrote:
> > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > >
> > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > they stop processing descriptors. This is required to allow the shadow
> > > > virtqueue to kick in.
> > > >
> > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > ---
> > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         if (err)
> > > >                 goto err_cmd;
> > > >
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > >         kfree(in);
> > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > >
> > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > >                 return;
> > > >         }
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         umems_destroy(ndev, mvq);
> > > >  }
> > > >
> > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > >         return err;
> > > >  }
> > > >
> > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > +{
> > > > +       switch (oldstate) {
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > +       default:
> > > > +               return false;
> > > > +       }
> > > > +}
> > > > +
> > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > >  {
> > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         void *in;
> > > >         int err;
> > > >
> > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > +               return 0;
> > > > +
> > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > +               return -EINVAL;
> > > > +
> > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > >         if (!in)
> > > >                 return -ENOMEM;
> > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int err;
> > > >
> > > >         if (!mvdev->actual_features)
> > > >                 return;
> > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         }
> > > >
> > > >         mvq = &ndev->vqs[idx];
> > > > -       if (!ready)
> > > > +       if (!ready) {
> > > >                 suspend_vq(ndev, mvq);
> > > > +       } else {
> > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > +               if (err) {
> > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > +                       ready = false;
> > > > +               }
> > > > +       }
> > > > +
> > > >
> > > >         mvq->ready = ready;
> > > >  }
> > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > >         return err;
> > > >  }
> > > >
> > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_control_vq *cvq;
> > > > +
> > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > +               return;
> > > > +
> > > > +       cvq = &mvdev->cvq;
> > > > +       cvq->ready = !suspend;
> > > > +}
> > >
> > > It looks to me we need to synchronize this with reslock. And this
> > > probably deserve a dedicated fix.
> > >
> > > > +
> > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int i;
> > > > +
> > > > +       if (!suspend) {
> > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > +               return -EOPNOTSUPP;
> > > > +       }
> > > > +
> > > > +       down_write(&ndev->reslock);
> > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > +               mvq = &ndev->vqs[i];
> > > > +               suspend_vq(ndev, mvq);
> > > > +       }
> > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > >
> > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > get config notification after suspending.
> > >
> > > > +       up_write(&ndev->reslock);
> > > > +       return 0;
> > > > +}
> > > > +
> > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .get_generation = mlx5_vdpa_get_generation,
> > > >         .set_map = mlx5_vdpa_set_map,
> > > >         .free = mlx5_vdpa_free,
> > > > +       .suspend = mlx5_vdpa_suspend,
> > >
> > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > >
> >
> > Should we add
> > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> >
> > To this series?
>
> If it's based on your patch then mentioning this in the log and
> including the S.O.B. is customary. what would this tag add?
> was there relevant discussion?
>

Sorry I think I need to expand it. I was using the meaning of qemu's
submitting patches guide, which is:
It is also okay to base patches on top of other on-going work that is
not yet part of the git master branch.

So these patches are not modifications of my patches, they should be
applied on top of that series.

My series is the one that adds the "vdpa bus method" Jason is
referring to (.suspend). That series is able to suspend the vdpa net
simulator by itself. If we apply this series *on top* of my previous
series, it gives us the vdpa op to suspend the mlx device.

Thanks!

>
> > > Thanks
> > >
> > > >  };
> > > >
> > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > >                 mvq->index = i;
> > > >                 mvq->ndev = ndev;
> > > >                 mvq->fwqp.fw = true;
> > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         }
> > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > >                 mvq = &ndev->vqs[i];
> > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > @@ -150,6 +150,14 @@ enum {
> > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > >  };
> > > >
> > > > +/* This indicates that the object was not created or has alreadyi
> > > > + * been desroyed. It is very safe to assume that this object will never
> > > > + * have so many states
> > > > + */
> > > > +enum {
> > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > +};
> > > > +
> > > >  enum {
> > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > --
> > > > 2.35.1
> > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* RE: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-20  8:56     ` Jason Wang
  (?)
  (?)
@ 2022-06-20 13:09     ` Eli Cohen
  2022-06-21  2:58         ` Jason Wang
  -1 siblings, 1 reply; 42+ messages in thread
From: Eli Cohen @ 2022-06-20 13:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: eperezma, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

> -----Original Message-----
> From: Jason Wang <jasowang@redhat.com>
> Sent: Monday, June 20, 2022 11:56 AM
> To: Eli Cohen <elic@nvidia.com>
> Cc: eperezma <eperezma@redhat.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> kernel <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> 
> On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > Implement the suspend callback allowing to suspend the virtqueues so
> > they stop processing descriptors. This is required to allow the shadow
> > virtqueue to kick in.
> >
> > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> >  2 files changed, 75 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index fb0b23e71383..ea4bc8a0cd25 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> >         if (err)
> >                 goto err_cmd;
> >
> > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> >         kfree(in);
> >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> >
> > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> >                 return;
> >         }
> > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> >         umems_destroy(ndev, mvq);
> >  }
> >
> > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> >         return err;
> >  }
> >
> > +static bool is_valid_state_change(int oldstate, int newstate)
> > +{
> > +       switch (oldstate) {
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > +       default:
> > +               return false;
> > +       }
> > +}
> > +
> >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> >  {
> >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> >         void *in;
> >         int err;
> >
> > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > +               return 0;
> > +
> > +       if (!is_valid_state_change(mvq->fw_state, state))
> > +               return -EINVAL;
> > +
> >         in = kzalloc(inlen, GFP_KERNEL);
> >         if (!in)
> >                 return -ENOMEM;
> > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >         struct mlx5_vdpa_virtqueue *mvq;
> > +       int err;
> >
> >         if (!mvdev->actual_features)
> >                 return;
> > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> >         }
> >
> >         mvq = &ndev->vqs[idx];
> > -       if (!ready)
> > +       if (!ready) {
> >                 suspend_vq(ndev, mvq);
> > +       } else {
> > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > +               if (err) {
> > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > +                       ready = false;
> > +               }
> > +       }
> > +
> >
> >         mvq->ready = ready;
> >  }
> > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> >         return err;
> >  }
> >
> > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > +{
> > +       struct mlx5_control_vq *cvq;
> > +
> > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > +               return;
> > +
> > +       cvq = &mvdev->cvq;
> > +       cvq->ready = !suspend;
> > +}
> 
> It looks to me we need to synchronize this with reslock. And this
> probably deserve a dedicated fix.
> 

It's already being held by mlx5_vdpa_suspend

> > +
> > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > +{
> > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > +       struct mlx5_vdpa_virtqueue *mvq;
> > +       int i;
> > +
> > +       if (!suspend) {
> > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > +               return -EOPNOTSUPP;
> > +       }
> > +
> > +       down_write(&ndev->reslock);
> > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > +               mvq = &ndev->vqs[i];
> > +               suspend_vq(ndev, mvq);
> > +       }
> > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> 
> Do we need to synchronize with the carrier work here? Otherwise we may
> get config notification after suspending.
> 

Are you saying we should not allow carrier updates after the VQs have been suspended?
Link state should not be related to suspension of VQs.

> > +       up_write(&ndev->reslock);
> > +       return 0;
> > +}
> > +
> >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .set_vq_address = mlx5_vdpa_set_vq_address,
> >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> >         .get_generation = mlx5_vdpa_get_generation,
> >         .set_map = mlx5_vdpa_set_map,
> >         .free = mlx5_vdpa_free,
> > +       .suspend = mlx5_vdpa_suspend,
> 
> I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> 
> Thanks
> 
> >  };
> >
> >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> >                 mvq->index = i;
> >                 mvq->ndev = ndev;
> >                 mvq->fwqp.fw = true;
> > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> >         }
> >         for (; i < ndev->mvdev.max_vqs; i++) {
> >                 mvq = &ndev->vqs[i];
> > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > index 4414ed5b6ed2..423562f39d3c 100644
> > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > @@ -150,6 +150,14 @@ enum {
> >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> >  };
> >
> > +/* This indicates that the object was not created or has alreadyi
> > + * been desroyed. It is very safe to assume that this object will never
> > + * have so many states
> > + */
> > +enum {
> > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > +};
> > +
> >  enum {
> >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > --
> > 2.35.1
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-20 13:09     ` Eli Cohen
@ 2022-06-21  2:58         ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-21  2:58 UTC (permalink / raw)
  To: Eli Cohen
  Cc: eperezma, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Mon, Jun 20, 2022 at 9:09 PM Eli Cohen <elic@nvidia.com> wrote:
>
> > -----Original Message-----
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Monday, June 20, 2022 11:56 AM
> > To: Eli Cohen <elic@nvidia.com>
> > Cc: eperezma <eperezma@redhat.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> > kernel <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Implement the suspend callback allowing to suspend the virtqueues so
> > > they stop processing descriptors. This is required to allow the shadow
> > > virtqueue to kick in.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         if (err)
> > >                 goto err_cmd;
> > >
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > >         kfree(in);
> > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > >
> > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > >                 return;
> > >         }
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         umems_destroy(ndev, mvq);
> > >  }
> > >
> > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > >         return err;
> > >  }
> > >
> > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > +{
> > > +       switch (oldstate) {
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > +       default:
> > > +               return false;
> > > +       }
> > > +}
> > > +
> > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > >  {
> > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         void *in;
> > >         int err;
> > >
> > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > +               return 0;
> > > +
> > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > +               return -EINVAL;
> > > +
> > >         in = kzalloc(inlen, GFP_KERNEL);
> > >         if (!in)
> > >                 return -ENOMEM;
> > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > >         struct mlx5_vdpa_virtqueue *mvq;
> > > +       int err;
> > >
> > >         if (!mvdev->actual_features)
> > >                 return;
> > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         }
> > >
> > >         mvq = &ndev->vqs[idx];
> > > -       if (!ready)
> > > +       if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > +       } else {
> > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > +               if (err) {
> > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > +                       ready = false;
> > > +               }
> > > +       }
> > > +
> > >
> > >         mvq->ready = ready;
> > >  }
> > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > >         return err;
> > >  }
> > >
> > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > +{
> > > +       struct mlx5_control_vq *cvq;
> > > +
> > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > +               return;
> > > +
> > > +       cvq = &mvdev->cvq;
> > > +       cvq->ready = !suspend;
> > > +}
> >
> > It looks to me we need to synchronize this with reslock. And this
> > probably deserve a dedicated fix.
> >
>
> It's already being held by mlx5_vdpa_suspend

Right, but I meant this seems kind of duplicated with set_cvq_ready(),
can we unify them? (We don't hold reslock there).

>
> > > +
> > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > +{
> > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > +       int i;
> > > +
> > > +       if (!suspend) {
> > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > +               return -EOPNOTSUPP;
> > > +       }
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > +               mvq = &ndev->vqs[i];
> > > +               suspend_vq(ndev, mvq);
> > > +       }
> > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> >
> > Do we need to synchronize with the carrier work here? Otherwise we may
> > get config notification after suspending.
> >
>
> Are you saying we should not allow carrier updates after the VQs have been suspended?
> Link state should not be related to suspension of VQs.

Yes, it's not related to the VQ but we suspend the device here. So we
probably need to flush the carrier work.

Thanks

>
> > > +       up_write(&ndev->reslock);
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .get_generation = mlx5_vdpa_get_generation,
> > >         .set_map = mlx5_vdpa_set_map,
> > >         .free = mlx5_vdpa_free,
> > > +       .suspend = mlx5_vdpa_suspend,
> >
> > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> >
> > Thanks
> >
> > >  };
> > >
> > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > >                 mvq->index = i;
> > >                 mvq->ndev = ndev;
> > >                 mvq->fwqp.fw = true;
> > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         }
> > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > >                 mvq = &ndev->vqs[i];
> > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > index 4414ed5b6ed2..423562f39d3c 100644
> > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > @@ -150,6 +150,14 @@ enum {
> > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > >  };
> > >
> > > +/* This indicates that the object was not created or has alreadyi
> > > + * been desroyed. It is very safe to assume that this object will never
> > > + * have so many states
> > > + */
> > > +enum {
> > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > +};
> > > +
> > >  enum {
> > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > --
> > > 2.35.1
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-06-21  2:58         ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-21  2:58 UTC (permalink / raw)
  To: Eli Cohen; +Cc: mst, linux-kernel, virtualization, eperezma

On Mon, Jun 20, 2022 at 9:09 PM Eli Cohen <elic@nvidia.com> wrote:
>
> > -----Original Message-----
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Monday, June 20, 2022 11:56 AM
> > To: Eli Cohen <elic@nvidia.com>
> > Cc: eperezma <eperezma@redhat.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> > kernel <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Implement the suspend callback allowing to suspend the virtqueues so
> > > they stop processing descriptors. This is required to allow the shadow
> > > virtqueue to kick in.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         if (err)
> > >                 goto err_cmd;
> > >
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > >         kfree(in);
> > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > >
> > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > >                 return;
> > >         }
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         umems_destroy(ndev, mvq);
> > >  }
> > >
> > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > >         return err;
> > >  }
> > >
> > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > +{
> > > +       switch (oldstate) {
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > +       default:
> > > +               return false;
> > > +       }
> > > +}
> > > +
> > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > >  {
> > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         void *in;
> > >         int err;
> > >
> > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > +               return 0;
> > > +
> > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > +               return -EINVAL;
> > > +
> > >         in = kzalloc(inlen, GFP_KERNEL);
> > >         if (!in)
> > >                 return -ENOMEM;
> > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > >         struct mlx5_vdpa_virtqueue *mvq;
> > > +       int err;
> > >
> > >         if (!mvdev->actual_features)
> > >                 return;
> > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         }
> > >
> > >         mvq = &ndev->vqs[idx];
> > > -       if (!ready)
> > > +       if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > +       } else {
> > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > +               if (err) {
> > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > +                       ready = false;
> > > +               }
> > > +       }
> > > +
> > >
> > >         mvq->ready = ready;
> > >  }
> > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > >         return err;
> > >  }
> > >
> > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > +{
> > > +       struct mlx5_control_vq *cvq;
> > > +
> > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > +               return;
> > > +
> > > +       cvq = &mvdev->cvq;
> > > +       cvq->ready = !suspend;
> > > +}
> >
> > It looks to me we need to synchronize this with reslock. And this
> > probably deserve a dedicated fix.
> >
>
> It's already being held by mlx5_vdpa_suspend

Right, but I meant this seems kind of duplicated with set_cvq_ready(),
can we unify them? (We don't hold reslock there).

>
> > > +
> > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > +{
> > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > +       int i;
> > > +
> > > +       if (!suspend) {
> > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > +               return -EOPNOTSUPP;
> > > +       }
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > +               mvq = &ndev->vqs[i];
> > > +               suspend_vq(ndev, mvq);
> > > +       }
> > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> >
> > Do we need to synchronize with the carrier work here? Otherwise we may
> > get config notification after suspending.
> >
>
> Are you saying we should not allow carrier updates after the VQs have been suspended?
> Link state should not be related to suspension of VQs.

Yes, it's not related to the VQ but we suspend the device here. So we
probably need to flush the carrier work.

Thanks

>
> > > +       up_write(&ndev->reslock);
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .get_generation = mlx5_vdpa_get_generation,
> > >         .set_map = mlx5_vdpa_set_map,
> > >         .free = mlx5_vdpa_free,
> > > +       .suspend = mlx5_vdpa_suspend,
> >
> > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> >
> > Thanks
> >
> > >  };
> > >
> > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > >                 mvq->index = i;
> > >                 mvq->ndev = ndev;
> > >                 mvq->fwqp.fw = true;
> > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         }
> > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > >                 mvq = &ndev->vqs[i];
> > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > index 4414ed5b6ed2..423562f39d3c 100644
> > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > @@ -150,6 +150,14 @@ enum {
> > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > >  };
> > >
> > > +/* This indicates that the object was not created or has alreadyi
> > > + * been desroyed. It is very safe to assume that this object will never
> > > + * have so many states
> > > + */
> > > +enum {
> > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > +};
> > > +
> > >  enum {
> > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > --
> > > 2.35.1
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-20  9:58     ` Eugenio Perez Martin
@ 2022-06-21  3:04         ` Jason Wang
  2022-06-21  3:04         ` Jason Wang
  1 sibling, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-21  3:04 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Implement the suspend callback allowing to suspend the virtqueues so
> > > they stop processing descriptors. This is required to allow the shadow
> > > virtqueue to kick in.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         if (err)
> > >                 goto err_cmd;
> > >
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > >         kfree(in);
> > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > >
> > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > >                 return;
> > >         }
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         umems_destroy(ndev, mvq);
> > >  }
> > >
> > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > >         return err;
> > >  }
> > >
> > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > +{
> > > +       switch (oldstate) {
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > +       default:
> > > +               return false;
> > > +       }
> > > +}
> > > +
> > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > >  {
> > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         void *in;
> > >         int err;
> > >
> > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > +               return 0;
> > > +
> > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > +               return -EINVAL;
> > > +
> > >         in = kzalloc(inlen, GFP_KERNEL);
> > >         if (!in)
> > >                 return -ENOMEM;
> > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > >         struct mlx5_vdpa_virtqueue *mvq;
> > > +       int err;
> > >
> > >         if (!mvdev->actual_features)
> > >                 return;
> > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         }
> > >
> > >         mvq = &ndev->vqs[idx];
> > > -       if (!ready)
> > > +       if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > +       } else {
> > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > +               if (err) {
> > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > +                       ready = false;
> > > +               }
> > > +       }
> > > +
> > >
> > >         mvq->ready = ready;
> > >  }
> > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > >         return err;
> > >  }
> > >
> > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > +{
> > > +       struct mlx5_control_vq *cvq;
> > > +
> > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > +               return;
> > > +
> > > +       cvq = &mvdev->cvq;
> > > +       cvq->ready = !suspend;
> > > +}
> >
> > It looks to me we need to synchronize this with reslock. And this
> > probably deserve a dedicated fix.
> >
> > > +
> > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > +{
> > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > +       int i;
> > > +
> > > +       if (!suspend) {
> > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > +               return -EOPNOTSUPP;
> > > +       }
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > +               mvq = &ndev->vqs[i];
> > > +               suspend_vq(ndev, mvq);
> > > +       }
> > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> >
> > Do we need to synchronize with the carrier work here? Otherwise we may
> > get config notification after suspending.
> >
> > > +       up_write(&ndev->reslock);
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .get_generation = mlx5_vdpa_get_generation,
> > >         .set_map = mlx5_vdpa_set_map,
> > >         .free = mlx5_vdpa_free,
> > > +       .suspend = mlx5_vdpa_suspend,
> >
> > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> >
>
> Should we add
> Based-on: <20220526124338.36247-1-eperezma@redhat.com>
>
> To this series?

Probably, but that series seems to support resume while this series doesn't.

Any reason for this?

(I don't see any blocker for this especially considering parents can
choose to do reset + set_vring_state etc.)

Thanks

>
> > Thanks
> >
> > >  };
> > >
> > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > >                 mvq->index = i;
> > >                 mvq->ndev = ndev;
> > >                 mvq->fwqp.fw = true;
> > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         }
> > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > >                 mvq = &ndev->vqs[i];
> > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > index 4414ed5b6ed2..423562f39d3c 100644
> > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > @@ -150,6 +150,14 @@ enum {
> > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > >  };
> > >
> > > +/* This indicates that the object was not created or has alreadyi
> > > + * been desroyed. It is very safe to assume that this object will never
> > > + * have so many states
> > > + */
> > > +enum {
> > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > +};
> > > +
> > >  enum {
> > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > --
> > > 2.35.1
> > >
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-06-21  3:04         ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-21  3:04 UTC (permalink / raw)
  To: Eugenio Perez Martin; +Cc: mst, linux-kernel, virtualization, Eli Cohen

On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > Implement the suspend callback allowing to suspend the virtqueues so
> > > they stop processing descriptors. This is required to allow the shadow
> > > virtqueue to kick in.
> > >
> > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         if (err)
> > >                 goto err_cmd;
> > >
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > >         kfree(in);
> > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > >
> > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > >                 return;
> > >         }
> > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         umems_destroy(ndev, mvq);
> > >  }
> > >
> > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > >         return err;
> > >  }
> > >
> > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > +{
> > > +       switch (oldstate) {
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > +       default:
> > > +               return false;
> > > +       }
> > > +}
> > > +
> > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > >  {
> > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > >         void *in;
> > >         int err;
> > >
> > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > +               return 0;
> > > +
> > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > +               return -EINVAL;
> > > +
> > >         in = kzalloc(inlen, GFP_KERNEL);
> > >         if (!in)
> > >                 return -ENOMEM;
> > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > >         struct mlx5_vdpa_virtqueue *mvq;
> > > +       int err;
> > >
> > >         if (!mvdev->actual_features)
> > >                 return;
> > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         }
> > >
> > >         mvq = &ndev->vqs[idx];
> > > -       if (!ready)
> > > +       if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > +       } else {
> > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > +               if (err) {
> > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > +                       ready = false;
> > > +               }
> > > +       }
> > > +
> > >
> > >         mvq->ready = ready;
> > >  }
> > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > >         return err;
> > >  }
> > >
> > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > +{
> > > +       struct mlx5_control_vq *cvq;
> > > +
> > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > +               return;
> > > +
> > > +       cvq = &mvdev->cvq;
> > > +       cvq->ready = !suspend;
> > > +}
> >
> > It looks to me we need to synchronize this with reslock. And this
> > probably deserve a dedicated fix.
> >
> > > +
> > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > +{
> > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > +       int i;
> > > +
> > > +       if (!suspend) {
> > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > +               return -EOPNOTSUPP;
> > > +       }
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > +               mvq = &ndev->vqs[i];
> > > +               suspend_vq(ndev, mvq);
> > > +       }
> > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> >
> > Do we need to synchronize with the carrier work here? Otherwise we may
> > get config notification after suspending.
> >
> > > +       up_write(&ndev->reslock);
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > >         .get_generation = mlx5_vdpa_get_generation,
> > >         .set_map = mlx5_vdpa_set_map,
> > >         .free = mlx5_vdpa_free,
> > > +       .suspend = mlx5_vdpa_suspend,
> >
> > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> >
>
> Should we add
> Based-on: <20220526124338.36247-1-eperezma@redhat.com>
>
> To this series?

Probably, but that series seems to support resume while this series doesn't.

Any reason for this?

(I don't see any blocker for this especially considering parents can
choose to do reset + set_vring_state etc.)

Thanks

>
> > Thanks
> >
> > >  };
> > >
> > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > >                 mvq->index = i;
> > >                 mvq->ndev = ndev;
> > >                 mvq->fwqp.fw = true;
> > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > >         }
> > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > >                 mvq = &ndev->vqs[i];
> > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > index 4414ed5b6ed2..423562f39d3c 100644
> > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > @@ -150,6 +150,14 @@ enum {
> > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > >  };
> > >
> > > +/* This indicates that the object was not created or has alreadyi
> > > + * been desroyed. It is very safe to assume that this object will never
> > > + * have so many states
> > > + */
> > > +enum {
> > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > +};
> > > +
> > >  enum {
> > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > --
> > > 2.35.1
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-21  3:04         ` Jason Wang
  (?)
@ 2022-06-21  7:48         ` Eugenio Perez Martin
  2022-06-21  7:52             ` Jason Wang
  -1 siblings, 1 reply; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-06-21  7:48 UTC (permalink / raw)
  To: Jason Wang
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Tue, Jun 21, 2022 at 5:05 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > >
> > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > they stop processing descriptors. This is required to allow the shadow
> > > > virtqueue to kick in.
> > > >
> > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > ---
> > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         if (err)
> > > >                 goto err_cmd;
> > > >
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > >         kfree(in);
> > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > >
> > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > >                 return;
> > > >         }
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         umems_destroy(ndev, mvq);
> > > >  }
> > > >
> > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > >         return err;
> > > >  }
> > > >
> > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > +{
> > > > +       switch (oldstate) {
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > +       default:
> > > > +               return false;
> > > > +       }
> > > > +}
> > > > +
> > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > >  {
> > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         void *in;
> > > >         int err;
> > > >
> > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > +               return 0;
> > > > +
> > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > +               return -EINVAL;
> > > > +
> > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > >         if (!in)
> > > >                 return -ENOMEM;
> > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int err;
> > > >
> > > >         if (!mvdev->actual_features)
> > > >                 return;
> > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         }
> > > >
> > > >         mvq = &ndev->vqs[idx];
> > > > -       if (!ready)
> > > > +       if (!ready) {
> > > >                 suspend_vq(ndev, mvq);
> > > > +       } else {
> > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > +               if (err) {
> > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > +                       ready = false;
> > > > +               }
> > > > +       }
> > > > +
> > > >
> > > >         mvq->ready = ready;
> > > >  }
> > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > >         return err;
> > > >  }
> > > >
> > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_control_vq *cvq;
> > > > +
> > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > +               return;
> > > > +
> > > > +       cvq = &mvdev->cvq;
> > > > +       cvq->ready = !suspend;
> > > > +}
> > >
> > > It looks to me we need to synchronize this with reslock. And this
> > > probably deserve a dedicated fix.
> > >
> > > > +
> > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int i;
> > > > +
> > > > +       if (!suspend) {
> > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > +               return -EOPNOTSUPP;
> > > > +       }
> > > > +
> > > > +       down_write(&ndev->reslock);
> > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > +               mvq = &ndev->vqs[i];
> > > > +               suspend_vq(ndev, mvq);
> > > > +       }
> > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > >
> > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > get config notification after suspending.
> > >
> > > > +       up_write(&ndev->reslock);
> > > > +       return 0;
> > > > +}
> > > > +
> > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .get_generation = mlx5_vdpa_get_generation,
> > > >         .set_map = mlx5_vdpa_set_map,
> > > >         .free = mlx5_vdpa_free,
> > > > +       .suspend = mlx5_vdpa_suspend,
> > >
> > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > >
> >
> > Should we add
> > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> >
> > To this series?
>
> Probably, but that series seems to support resume while this series doesn't.
>
> Any reason for this?
>
> (I don't see any blocker for this especially considering parents can
> choose to do reset + set_vring_state etc.)
>

I suggest starting simple and modify the vdpa_sim series so it only
provides suspend() operation, with no parameters. We can always add
the resume() later if needed at all.

To provide the reset + set_vring_state etc seems simpler if done from userland.

Thanks!

> Thanks
>
> >
> > > Thanks
> > >
> > > >  };
> > > >
> > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > >                 mvq->index = i;
> > > >                 mvq->ndev = ndev;
> > > >                 mvq->fwqp.fw = true;
> > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         }
> > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > >                 mvq = &ndev->vqs[i];
> > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > @@ -150,6 +150,14 @@ enum {
> > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > >  };
> > > >
> > > > +/* This indicates that the object was not created or has alreadyi
> > > > + * been desroyed. It is very safe to assume that this object will never
> > > > + * have so many states
> > > > + */
> > > > +enum {
> > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > +};
> > > > +
> > > >  enum {
> > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > --
> > > > 2.35.1
> > > >
> > >
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-21  7:48         ` Eugenio Perez Martin
@ 2022-06-21  7:52             ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-21  7:52 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Tue, Jun 21, 2022 at 3:49 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Tue, Jun 21, 2022 at 5:05 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > virtqueue to kick in.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         if (err)
> > > > >                 goto err_cmd;
> > > > >
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > >         kfree(in);
> > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > >
> > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > >                 return;
> > > > >         }
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         umems_destroy(ndev, mvq);
> > > > >  }
> > > > >
> > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > +{
> > > > > +       switch (oldstate) {
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > +       default:
> > > > > +               return false;
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > >  {
> > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         void *in;
> > > > >         int err;
> > > > >
> > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > +               return 0;
> > > > > +
> > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > +               return -EINVAL;
> > > > > +
> > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > >         if (!in)
> > > > >                 return -ENOMEM;
> > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int err;
> > > > >
> > > > >         if (!mvdev->actual_features)
> > > > >                 return;
> > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         }
> > > > >
> > > > >         mvq = &ndev->vqs[idx];
> > > > > -       if (!ready)
> > > > > +       if (!ready) {
> > > > >                 suspend_vq(ndev, mvq);
> > > > > +       } else {
> > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > +               if (err) {
> > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > +                       ready = false;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >
> > > > >         mvq->ready = ready;
> > > > >  }
> > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq;
> > > > > +
> > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > +               return;
> > > > > +
> > > > > +       cvq = &mvdev->cvq;
> > > > > +       cvq->ready = !suspend;
> > > > > +}
> > > >
> > > > It looks to me we need to synchronize this with reslock. And this
> > > > probably deserve a dedicated fix.
> > > >
> > > > > +
> > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int i;
> > > > > +
> > > > > +       if (!suspend) {
> > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > +               return -EOPNOTSUPP;
> > > > > +       }
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > +               mvq = &ndev->vqs[i];
> > > > > +               suspend_vq(ndev, mvq);
> > > > > +       }
> > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > >
> > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > get config notification after suspending.
> > > >
> > > > > +       up_write(&ndev->reslock);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > >         .set_map = mlx5_vdpa_set_map,
> > > > >         .free = mlx5_vdpa_free,
> > > > > +       .suspend = mlx5_vdpa_suspend,
> > > >
> > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > >
> > >
> > > Should we add
> > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > >
> > > To this series?
> >
> > Probably, but that series seems to support resume while this series doesn't.
> >
> > Any reason for this?
> >
> > (I don't see any blocker for this especially considering parents can
> > choose to do reset + set_vring_state etc.)
> >
>
> I suggest starting simple and modify the vdpa_sim series so it only
> provides suspend() operation, with no parameters. We can always add
> the resume() later if needed at all.

This complicates the feature a little bit.

>
> To provide the reset + set_vring_state etc seems simpler if done from userland.

One issue for the current API is that it only works for networking
devices since we don't have a way to set device state.

By having stop/resume, we know the device state is kept.

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > > > Thanks
> > > >
> > > > >  };
> > > > >
> > > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > > >                 mvq->index = i;
> > > > >                 mvq->ndev = ndev;
> > > > >                 mvq->fwqp.fw = true;
> > > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         }
> > > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > > >                 mvq = &ndev->vqs[i];
> > > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > @@ -150,6 +150,14 @@ enum {
> > > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > > >  };
> > > > >
> > > > > +/* This indicates that the object was not created or has alreadyi
> > > > > + * been desroyed. It is very safe to assume that this object will never
> > > > > + * have so many states
> > > > > + */
> > > > > +enum {
> > > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > > +};
> > > > > +
> > > > >  enum {
> > > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > > --
> > > > > 2.35.1
> > > > >
> > > >
> > >
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-06-21  7:52             ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-06-21  7:52 UTC (permalink / raw)
  To: Eugenio Perez Martin; +Cc: mst, linux-kernel, virtualization, Eli Cohen

On Tue, Jun 21, 2022 at 3:49 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Tue, Jun 21, 2022 at 5:05 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > virtqueue to kick in.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         if (err)
> > > > >                 goto err_cmd;
> > > > >
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > >         kfree(in);
> > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > >
> > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > >                 return;
> > > > >         }
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         umems_destroy(ndev, mvq);
> > > > >  }
> > > > >
> > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > +{
> > > > > +       switch (oldstate) {
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > +       default:
> > > > > +               return false;
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > >  {
> > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         void *in;
> > > > >         int err;
> > > > >
> > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > +               return 0;
> > > > > +
> > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > +               return -EINVAL;
> > > > > +
> > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > >         if (!in)
> > > > >                 return -ENOMEM;
> > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int err;
> > > > >
> > > > >         if (!mvdev->actual_features)
> > > > >                 return;
> > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         }
> > > > >
> > > > >         mvq = &ndev->vqs[idx];
> > > > > -       if (!ready)
> > > > > +       if (!ready) {
> > > > >                 suspend_vq(ndev, mvq);
> > > > > +       } else {
> > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > +               if (err) {
> > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > +                       ready = false;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >
> > > > >         mvq->ready = ready;
> > > > >  }
> > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq;
> > > > > +
> > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > +               return;
> > > > > +
> > > > > +       cvq = &mvdev->cvq;
> > > > > +       cvq->ready = !suspend;
> > > > > +}
> > > >
> > > > It looks to me we need to synchronize this with reslock. And this
> > > > probably deserve a dedicated fix.
> > > >
> > > > > +
> > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int i;
> > > > > +
> > > > > +       if (!suspend) {
> > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > +               return -EOPNOTSUPP;
> > > > > +       }
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > +               mvq = &ndev->vqs[i];
> > > > > +               suspend_vq(ndev, mvq);
> > > > > +       }
> > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > >
> > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > get config notification after suspending.
> > > >
> > > > > +       up_write(&ndev->reslock);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > >         .set_map = mlx5_vdpa_set_map,
> > > > >         .free = mlx5_vdpa_free,
> > > > > +       .suspend = mlx5_vdpa_suspend,
> > > >
> > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > >
> > >
> > > Should we add
> > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > >
> > > To this series?
> >
> > Probably, but that series seems to support resume while this series doesn't.
> >
> > Any reason for this?
> >
> > (I don't see any blocker for this especially considering parents can
> > choose to do reset + set_vring_state etc.)
> >
>
> I suggest starting simple and modify the vdpa_sim series so it only
> provides suspend() operation, with no parameters. We can always add
> the resume() later if needed at all.

This complicates the feature a little bit.

>
> To provide the reset + set_vring_state etc seems simpler if done from userland.

One issue for the current API is that it only works for networking
devices since we don't have a way to set device state.

By having stop/resume, we know the device state is kept.

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > > > Thanks
> > > >
> > > > >  };
> > > > >
> > > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > > >                 mvq->index = i;
> > > > >                 mvq->ndev = ndev;
> > > > >                 mvq->fwqp.fw = true;
> > > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         }
> > > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > > >                 mvq = &ndev->vqs[i];
> > > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > @@ -150,6 +150,14 @@ enum {
> > > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > > >  };
> > > > >
> > > > > +/* This indicates that the object was not created or has alreadyi
> > > > > + * been desroyed. It is very safe to assume that this object will never
> > > > > + * have so many states
> > > > > + */
> > > > > +enum {
> > > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > > +};
> > > > > +
> > > > >  enum {
> > > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > > --
> > > > > 2.35.1
> > > > >
> > > >
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* RE: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-21  2:58         ` Jason Wang
  (?)
@ 2022-07-11  6:10         ` Eli Cohen
  -1 siblings, 0 replies; 42+ messages in thread
From: Eli Cohen @ 2022-07-11  6:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: eperezma, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

> -----Original Message-----
> From: Jason Wang <jasowang@redhat.com>
> Sent: Tuesday, June 21, 2022 5:59 AM
> To: Eli Cohen <elic@nvidia.com>
> Cc: eperezma <eperezma@redhat.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> kernel <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> 
> On Mon, Jun 20, 2022 at 9:09 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > > -----Original Message-----
> > > From: Jason Wang <jasowang@redhat.com>
> > > Sent: Monday, June 20, 2022 11:56 AM
> > > To: Eli Cohen <elic@nvidia.com>
> > > Cc: eperezma <eperezma@redhat.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>;
> linux-
> > > kernel <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > >
> > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > >
> > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > they stop processing descriptors. This is required to allow the shadow
> > > > virtqueue to kick in.
> > > >
> > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > ---
> > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         if (err)
> > > >                 goto err_cmd;
> > > >
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > >         kfree(in);
> > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > >
> > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > >                 return;
> > > >         }
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         umems_destroy(ndev, mvq);
> > > >  }
> > > >
> > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > >         return err;
> > > >  }
> > > >
> > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > +{
> > > > +       switch (oldstate) {
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > +       default:
> > > > +               return false;
> > > > +       }
> > > > +}
> > > > +
> > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > >  {
> > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         void *in;
> > > >         int err;
> > > >
> > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > +               return 0;
> > > > +
> > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > +               return -EINVAL;
> > > > +
> > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > >         if (!in)
> > > >                 return -ENOMEM;
> > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int err;
> > > >
> > > >         if (!mvdev->actual_features)
> > > >                 return;
> > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         }
> > > >
> > > >         mvq = &ndev->vqs[idx];
> > > > -       if (!ready)
> > > > +       if (!ready) {
> > > >                 suspend_vq(ndev, mvq);
> > > > +       } else {
> > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > +               if (err) {
> > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > +                       ready = false;
> > > > +               }
> > > > +       }
> > > > +
> > > >
> > > >         mvq->ready = ready;
> > > >  }
> > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > >         return err;
> > > >  }
> > > >
> > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_control_vq *cvq;
> > > > +
> > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > +               return;
> > > > +
> > > > +       cvq = &mvdev->cvq;
> > > > +       cvq->ready = !suspend;
> > > > +}
> > >
> > > It looks to me we need to synchronize this with reslock. And this
> > > probably deserve a dedicated fix.
> > >
> >
> > It's already being held by mlx5_vdpa_suspend
> 
> Right, but I meant this seems kind of duplicated with set_cvq_ready(),
> can we unify them? (We don't hold reslock there).

Do you mean call set_vq_ready(mvdev, !suspend) and abandon mlx5_vdpa_cvq_suspend()?

> 
> >
> > > > +
> > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int i;
> > > > +
> > > > +       if (!suspend) {
> > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > +               return -EOPNOTSUPP;
> > > > +       }
> > > > +
> > > > +       down_write(&ndev->reslock);
> > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > +               mvq = &ndev->vqs[i];
> > > > +               suspend_vq(ndev, mvq);
> > > > +       }
> > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > >
> > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > get config notification after suspending.
> > >
> >
> > Are you saying we should not allow carrier updates after the VQs have been suspended?
> > Link state should not be related to suspension of VQs.
> 
> Yes, it's not related to the VQ but we suspend the device here. So we
> probably need to flush the carrier work.
> 

Right.

> Thanks
> 
> >
> > > > +       up_write(&ndev->reslock);
> > > > +       return 0;
> > > > +}
> > > > +
> > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .get_generation = mlx5_vdpa_get_generation,
> > > >         .set_map = mlx5_vdpa_set_map,
> > > >         .free = mlx5_vdpa_free,
> > > > +       .suspend = mlx5_vdpa_suspend,
> > >
> > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > >
> > > Thanks
> > >
> > > >  };
> > > >
> > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > >                 mvq->index = i;
> > > >                 mvq->ndev = ndev;
> > > >                 mvq->fwqp.fw = true;
> > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         }
> > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > >                 mvq = &ndev->vqs[i];
> > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > @@ -150,6 +150,14 @@ enum {
> > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > >  };
> > > >
> > > > +/* This indicates that the object was not created or has alreadyi
> > > > + * been desroyed. It is very safe to assume that this object will never
> > > > + * have so many states
> > > > + */
> > > > +enum {
> > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > +};
> > > > +
> > > >  enum {
> > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > --
> > > > 2.35.1
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* RE: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-06-21  3:04         ` Jason Wang
  (?)
  (?)
@ 2022-07-11  6:14         ` Eli Cohen
  2022-07-11 10:43           ` Eugenio Perez Martin
  2022-07-12  8:14             ` Jason Wang
  -1 siblings, 2 replies; 42+ messages in thread
From: Eli Cohen @ 2022-07-11  6:14 UTC (permalink / raw)
  To: Jason Wang, Eugenio Perez Martin
  Cc: mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

> From: Jason Wang <jasowang@redhat.com>
> Sent: Tuesday, June 21, 2022 6:05 AM
> To: Eugenio Perez Martin <eperezma@redhat.com>
> Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> 
> On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > >
> > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > they stop processing descriptors. This is required to allow the shadow
> > > > virtqueue to kick in.
> > > >
> > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > ---
> > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         if (err)
> > > >                 goto err_cmd;
> > > >
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > >         kfree(in);
> > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > >
> > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > >                 return;
> > > >         }
> > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         umems_destroy(ndev, mvq);
> > > >  }
> > > >
> > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > >         return err;
> > > >  }
> > > >
> > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > +{
> > > > +       switch (oldstate) {
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > +       default:
> > > > +               return false;
> > > > +       }
> > > > +}
> > > > +
> > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > >  {
> > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > >         void *in;
> > > >         int err;
> > > >
> > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > +               return 0;
> > > > +
> > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > +               return -EINVAL;
> > > > +
> > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > >         if (!in)
> > > >                 return -ENOMEM;
> > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int err;
> > > >
> > > >         if (!mvdev->actual_features)
> > > >                 return;
> > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         }
> > > >
> > > >         mvq = &ndev->vqs[idx];
> > > > -       if (!ready)
> > > > +       if (!ready) {
> > > >                 suspend_vq(ndev, mvq);
> > > > +       } else {
> > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > +               if (err) {
> > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > +                       ready = false;
> > > > +               }
> > > > +       }
> > > > +
> > > >
> > > >         mvq->ready = ready;
> > > >  }
> > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > >         return err;
> > > >  }
> > > >
> > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_control_vq *cvq;
> > > > +
> > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > +               return;
> > > > +
> > > > +       cvq = &mvdev->cvq;
> > > > +       cvq->ready = !suspend;
> > > > +}
> > >
> > > It looks to me we need to synchronize this with reslock. And this
> > > probably deserve a dedicated fix.
> > >
> > > > +
> > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > +{
> > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > +       int i;
> > > > +
> > > > +       if (!suspend) {
> > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > +               return -EOPNOTSUPP;
> > > > +       }
> > > > +
> > > > +       down_write(&ndev->reslock);
> > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > +               mvq = &ndev->vqs[i];
> > > > +               suspend_vq(ndev, mvq);
> > > > +       }
> > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > >
> > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > get config notification after suspending.
> > >
> > > > +       up_write(&ndev->reslock);
> > > > +       return 0;
> > > > +}
> > > > +
> > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > >         .get_generation = mlx5_vdpa_get_generation,
> > > >         .set_map = mlx5_vdpa_set_map,
> > > >         .free = mlx5_vdpa_free,
> > > > +       .suspend = mlx5_vdpa_suspend,
> > >
> > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > >
> >
> > Should we add
> > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> >
> > To this series?
> 
> Probably, but that series seems to support resume while this series doesn't.
> 
> Any reason for this?

I think Eugenio agreed that resume is not really required since we're going stop using this
instance and migrate. In any case, we don't support resume for the hardware object
though it could be simulated should it be absolutely necessary.

> 
> (I don't see any blocker for this especially considering parents can
> choose to do reset + set_vring_state etc.)
> 
> Thanks
> 
> >
> > > Thanks
> > >
> > > >  };
> > > >
> > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > >                 mvq->index = i;
> > > >                 mvq->ndev = ndev;
> > > >                 mvq->fwqp.fw = true;
> > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > >         }
> > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > >                 mvq = &ndev->vqs[i];
> > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > @@ -150,6 +150,14 @@ enum {
> > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > >  };
> > > >
> > > > +/* This indicates that the object was not created or has alreadyi
> > > > + * been desroyed. It is very safe to assume that this object will never
> > > > + * have so many states
> > > > + */
> > > > +enum {
> > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > +};
> > > > +
> > > >  enum {
> > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > --
> > > > 2.35.1
> > > >
> > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-07-11  6:14         ` Eli Cohen
@ 2022-07-11 10:43           ` Eugenio Perez Martin
  2022-07-12  8:14             ` Jason Wang
  1 sibling, 0 replies; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-07-11 10:43 UTC (permalink / raw)
  To: Eli Cohen
  Cc: Jason Wang, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Mon, Jul 11, 2022 at 8:14 AM Eli Cohen <elic@nvidia.com> wrote:
>
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Tuesday, June 21, 2022 6:05 AM
> > To: Eugenio Perez Martin <eperezma@redhat.com>
> > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > virtqueue to kick in.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         if (err)
> > > > >                 goto err_cmd;
> > > > >
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > >         kfree(in);
> > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > >
> > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > >                 return;
> > > > >         }
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         umems_destroy(ndev, mvq);
> > > > >  }
> > > > >
> > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > +{
> > > > > +       switch (oldstate) {
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > +       default:
> > > > > +               return false;
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > >  {
> > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         void *in;
> > > > >         int err;
> > > > >
> > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > +               return 0;
> > > > > +
> > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > +               return -EINVAL;
> > > > > +
> > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > >         if (!in)
> > > > >                 return -ENOMEM;
> > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int err;
> > > > >
> > > > >         if (!mvdev->actual_features)
> > > > >                 return;
> > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         }
> > > > >
> > > > >         mvq = &ndev->vqs[idx];
> > > > > -       if (!ready)
> > > > > +       if (!ready) {
> > > > >                 suspend_vq(ndev, mvq);
> > > > > +       } else {
> > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > +               if (err) {
> > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > +                       ready = false;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >
> > > > >         mvq->ready = ready;
> > > > >  }
> > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq;
> > > > > +
> > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > +               return;
> > > > > +
> > > > > +       cvq = &mvdev->cvq;
> > > > > +       cvq->ready = !suspend;
> > > > > +}
> > > >
> > > > It looks to me we need to synchronize this with reslock. And this
> > > > probably deserve a dedicated fix.
> > > >
> > > > > +
> > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int i;
> > > > > +
> > > > > +       if (!suspend) {
> > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > +               return -EOPNOTSUPP;
> > > > > +       }
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > +               mvq = &ndev->vqs[i];
> > > > > +               suspend_vq(ndev, mvq);
> > > > > +       }
> > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > >
> > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > get config notification after suspending.
> > > >
> > > > > +       up_write(&ndev->reslock);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > >         .set_map = mlx5_vdpa_set_map,
> > > > >         .free = mlx5_vdpa_free,
> > > > > +       .suspend = mlx5_vdpa_suspend,
> > > >
> > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > >
> > >
> > > Should we add
> > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > >
> > > To this series?
> >
> > Probably, but that series seems to support resume while this series doesn't.
> >
> > Any reason for this?
>
> I think Eugenio agreed that resume is not really required since we're going stop using this
> instance and migrate. In any case, we don't support resume for the hardware object
> though it could be simulated should it be absolutely necessary.
>

That's right, to resume the device it's not mandatory to achieve Live
Migration use case and we can always add another backend feature bit
to support the resume of a device from my point of view.

Thanks!


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-07-11  6:14         ` Eli Cohen
@ 2022-07-12  8:14             ` Jason Wang
  2022-07-12  8:14             ` Jason Wang
  1 sibling, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-07-12  8:14 UTC (permalink / raw)
  To: Eli Cohen
  Cc: Eugenio Perez Martin, mst, virtualization, linux-kernel,
	Si-Wei Liu, Parav Pandit

On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
>
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Tuesday, June 21, 2022 6:05 AM
> > To: Eugenio Perez Martin <eperezma@redhat.com>
> > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > virtqueue to kick in.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         if (err)
> > > > >                 goto err_cmd;
> > > > >
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > >         kfree(in);
> > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > >
> > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > >                 return;
> > > > >         }
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         umems_destroy(ndev, mvq);
> > > > >  }
> > > > >
> > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > +{
> > > > > +       switch (oldstate) {
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > +       default:
> > > > > +               return false;
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > >  {
> > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         void *in;
> > > > >         int err;
> > > > >
> > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > +               return 0;
> > > > > +
> > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > +               return -EINVAL;
> > > > > +
> > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > >         if (!in)
> > > > >                 return -ENOMEM;
> > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int err;
> > > > >
> > > > >         if (!mvdev->actual_features)
> > > > >                 return;
> > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         }
> > > > >
> > > > >         mvq = &ndev->vqs[idx];
> > > > > -       if (!ready)
> > > > > +       if (!ready) {
> > > > >                 suspend_vq(ndev, mvq);
> > > > > +       } else {
> > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > +               if (err) {
> > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > +                       ready = false;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >
> > > > >         mvq->ready = ready;
> > > > >  }
> > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq;
> > > > > +
> > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > +               return;
> > > > > +
> > > > > +       cvq = &mvdev->cvq;
> > > > > +       cvq->ready = !suspend;
> > > > > +}
> > > >
> > > > It looks to me we need to synchronize this with reslock. And this
> > > > probably deserve a dedicated fix.
> > > >
> > > > > +
> > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int i;
> > > > > +
> > > > > +       if (!suspend) {
> > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > +               return -EOPNOTSUPP;
> > > > > +       }
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > +               mvq = &ndev->vqs[i];
> > > > > +               suspend_vq(ndev, mvq);
> > > > > +       }
> > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > >
> > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > get config notification after suspending.
> > > >
> > > > > +       up_write(&ndev->reslock);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > >         .set_map = mlx5_vdpa_set_map,
> > > > >         .free = mlx5_vdpa_free,
> > > > > +       .suspend = mlx5_vdpa_suspend,
> > > >
> > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > >
> > >
> > > Should we add
> > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > >
> > > To this series?
> >
> > Probably, but that series seems to support resume while this series doesn't.
> >
> > Any reason for this?
>
> I think Eugenio agreed that resume is not really required since we're going stop using this
> instance and migrate. In any case, we don't support resume for the hardware object
> though it could be simulated should it be absolutely necessary.

This is fine if everything is fine during the live migration. But when
migration fails due to some reason, management (libvirt) may choose to
restart the device in the source.

This means we should either

1) support resume in the parent
2) emulate it in the qemu (with a lot of restoring of the states)

And it is not only used for live migration, it could be used for vmstop/start.

Thanks

>
> >
> > (I don't see any blocker for this especially considering parents can
> > choose to do reset + set_vring_state etc.)
> >
> > Thanks
> >
> > >
> > > > Thanks
> > > >
> > > > >  };
> > > > >
> > > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > > >                 mvq->index = i;
> > > > >                 mvq->ndev = ndev;
> > > > >                 mvq->fwqp.fw = true;
> > > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         }
> > > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > > >                 mvq = &ndev->vqs[i];
> > > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > @@ -150,6 +150,14 @@ enum {
> > > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > > >  };
> > > > >
> > > > > +/* This indicates that the object was not created or has alreadyi
> > > > > + * been desroyed. It is very safe to assume that this object will never
> > > > > + * have so many states
> > > > > + */
> > > > > +enum {
> > > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > > +};
> > > > > +
> > > > >  enum {
> > > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > > --
> > > > > 2.35.1
> > > > >
> > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-07-12  8:14             ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-07-12  8:14 UTC (permalink / raw)
  To: Eli Cohen; +Cc: mst, linux-kernel, virtualization, Eugenio Perez Martin

On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
>
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Tuesday, June 21, 2022 6:05 AM
> > To: Eugenio Perez Martin <eperezma@redhat.com>
> > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > virtqueue to kick in.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         if (err)
> > > > >                 goto err_cmd;
> > > > >
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > >         kfree(in);
> > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > >
> > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > >                 return;
> > > > >         }
> > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         umems_destroy(ndev, mvq);
> > > > >  }
> > > > >
> > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > +{
> > > > > +       switch (oldstate) {
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > +       default:
> > > > > +               return false;
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > >  {
> > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > >         void *in;
> > > > >         int err;
> > > > >
> > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > +               return 0;
> > > > > +
> > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > +               return -EINVAL;
> > > > > +
> > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > >         if (!in)
> > > > >                 return -ENOMEM;
> > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int err;
> > > > >
> > > > >         if (!mvdev->actual_features)
> > > > >                 return;
> > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         }
> > > > >
> > > > >         mvq = &ndev->vqs[idx];
> > > > > -       if (!ready)
> > > > > +       if (!ready) {
> > > > >                 suspend_vq(ndev, mvq);
> > > > > +       } else {
> > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > +               if (err) {
> > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > +                       ready = false;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >
> > > > >         mvq->ready = ready;
> > > > >  }
> > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq;
> > > > > +
> > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > +               return;
> > > > > +
> > > > > +       cvq = &mvdev->cvq;
> > > > > +       cvq->ready = !suspend;
> > > > > +}
> > > >
> > > > It looks to me we need to synchronize this with reslock. And this
> > > > probably deserve a dedicated fix.
> > > >
> > > > > +
> > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > +{
> > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       int i;
> > > > > +
> > > > > +       if (!suspend) {
> > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > +               return -EOPNOTSUPP;
> > > > > +       }
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > +               mvq = &ndev->vqs[i];
> > > > > +               suspend_vq(ndev, mvq);
> > > > > +       }
> > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > >
> > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > get config notification after suspending.
> > > >
> > > > > +       up_write(&ndev->reslock);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > >         .set_map = mlx5_vdpa_set_map,
> > > > >         .free = mlx5_vdpa_free,
> > > > > +       .suspend = mlx5_vdpa_suspend,
> > > >
> > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > >
> > >
> > > Should we add
> > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > >
> > > To this series?
> >
> > Probably, but that series seems to support resume while this series doesn't.
> >
> > Any reason for this?
>
> I think Eugenio agreed that resume is not really required since we're going stop using this
> instance and migrate. In any case, we don't support resume for the hardware object
> though it could be simulated should it be absolutely necessary.

This is fine if everything is fine during the live migration. But when
migration fails due to some reason, management (libvirt) may choose to
restart the device in the source.

This means we should either

1) support resume in the parent
2) emulate it in the qemu (with a lot of restoring of the states)

And it is not only used for live migration, it could be used for vmstop/start.

Thanks

>
> >
> > (I don't see any blocker for this especially considering parents can
> > choose to do reset + set_vring_state etc.)
> >
> > Thanks
> >
> > >
> > > > Thanks
> > > >
> > > > >  };
> > > > >
> > > > >  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > > >                 mvq->index = i;
> > > > >                 mvq->ndev = ndev;
> > > > >                 mvq->fwqp.fw = true;
> > > > > +               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > >         }
> > > > >         for (; i < ndev->mvdev.max_vqs; i++) {
> > > > >                 mvq = &ndev->vqs[i];
> > > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > > @@ -150,6 +150,14 @@ enum {
> > > > >         MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
> > > > >  };
> > > > >
> > > > > +/* This indicates that the object was not created or has alreadyi
> > > > > + * been desroyed. It is very safe to assume that this object will never
> > > > > + * have so many states
> > > > > + */
> > > > > +enum {
> > > > > +       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > > +};
> > > > > +
> > > > >  enum {
> > > > >         MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
> > > > >         MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
> > > > > --
> > > > > 2.35.1
> > > > >
> > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-07-12  8:14             ` Jason Wang
  (?)
@ 2022-07-12  9:15             ` Eugenio Perez Martin
  2022-07-13  3:29                 ` Jason Wang
  -1 siblings, 1 reply; 42+ messages in thread
From: Eugenio Perez Martin @ 2022-07-12  9:15 UTC (permalink / raw)
  To: Jason Wang
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Tue, Jul 12, 2022 at 10:14 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > > From: Jason Wang <jasowang@redhat.com>
> > > Sent: Tuesday, June 21, 2022 6:05 AM
> > > To: Eugenio Perez Martin <eperezma@redhat.com>
> > > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > >
> > > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > > <eperezma@redhat.com> wrote:
> > > >
> > > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > > >
> > > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > > virtqueue to kick in.
> > > > > >
> > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > ---
> > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > >         if (err)
> > > > > >                 goto err_cmd;
> > > > > >
> > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > > >         kfree(in);
> > > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > > >
> > > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > > >                 return;
> > > > > >         }
> > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > > >         umems_destroy(ndev, mvq);
> > > > > >  }
> > > > > >
> > > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > > >         return err;
> > > > > >  }
> > > > > >
> > > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > > +{
> > > > > > +       switch (oldstate) {
> > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > > +       default:
> > > > > > +               return false;
> > > > > > +       }
> > > > > > +}
> > > > > > +
> > > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > > >  {
> > > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > >         void *in;
> > > > > >         int err;
> > > > > >
> > > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > > +               return -EINVAL;
> > > > > > +
> > > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > > >         if (!in)
> > > > > >                 return -ENOMEM;
> > > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > > +       int err;
> > > > > >
> > > > > >         if (!mvdev->actual_features)
> > > > > >                 return;
> > > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > >         }
> > > > > >
> > > > > >         mvq = &ndev->vqs[idx];
> > > > > > -       if (!ready)
> > > > > > +       if (!ready) {
> > > > > >                 suspend_vq(ndev, mvq);
> > > > > > +       } else {
> > > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > > +               if (err) {
> > > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > > +                       ready = false;
> > > > > > +               }
> > > > > > +       }
> > > > > > +
> > > > > >
> > > > > >         mvq->ready = ready;
> > > > > >  }
> > > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > > >         return err;
> > > > > >  }
> > > > > >
> > > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > > +{
> > > > > > +       struct mlx5_control_vq *cvq;
> > > > > > +
> > > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > > +               return;
> > > > > > +
> > > > > > +       cvq = &mvdev->cvq;
> > > > > > +       cvq->ready = !suspend;
> > > > > > +}
> > > > >
> > > > > It looks to me we need to synchronize this with reslock. And this
> > > > > probably deserve a dedicated fix.
> > > > >
> > > > > > +
> > > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > > +{
> > > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > > +       int i;
> > > > > > +
> > > > > > +       if (!suspend) {
> > > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > > +               return -EOPNOTSUPP;
> > > > > > +       }
> > > > > > +
> > > > > > +       down_write(&ndev->reslock);
> > > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > > +               mvq = &ndev->vqs[i];
> > > > > > +               suspend_vq(ndev, mvq);
> > > > > > +       }
> > > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > > >
> > > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > > get config notification after suspending.
> > > > >
> > > > > > +       up_write(&ndev->reslock);
> > > > > > +       return 0;
> > > > > > +}
> > > > > > +
> > > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > > >         .set_map = mlx5_vdpa_set_map,
> > > > > >         .free = mlx5_vdpa_free,
> > > > > > +       .suspend = mlx5_vdpa_suspend,
> > > > >
> > > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > > >
> > > >
> > > > Should we add
> > > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > > >
> > > > To this series?
> > >
> > > Probably, but that series seems to support resume while this series doesn't.
> > >
> > > Any reason for this?
> >
> > I think Eugenio agreed that resume is not really required since we're going stop using this
> > instance and migrate. In any case, we don't support resume for the hardware object
> > though it could be simulated should it be absolutely necessary.
>
> This is fine if everything is fine during the live migration. But when
> migration fails due to some reason, management (libvirt) may choose to
> restart the device in the source.
>
> This means we should either
>
> 1) support resume in the parent
> 2) emulate it in the qemu (with a lot of restoring of the states)
>

I think it should be handled in qemu (at least the POC reset the
device), but I didn't exercise a lot of the failure paths there
because, well, it was a POC :).

> And it is not only used for live migration, it could be used for vmstop/start.
>

I think it would be easier if we dedicate a feature flag for resuming
the device in the future. Qemu could take advantage of it at some
error paths of live migration, but less than it seems because it
overrides things like ring addresses. And, obviously, in the
vmstop/vmstart.

Actually, net devices should be ok to restore with a full reset. The
problem should be filesystems etc that are not part of vdpa at the
moment.

Thanks!


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-07-12  9:15             ` Eugenio Perez Martin
@ 2022-07-13  3:29                 ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-07-13  3:29 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Eli Cohen, mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

On Tue, Jul 12, 2022 at 5:16 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Tue, Jul 12, 2022 at 10:14 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > > From: Jason Wang <jasowang@redhat.com>
> > > > Sent: Tuesday, June 21, 2022 6:05 AM
> > > > To: Eugenio Perez Martin <eperezma@redhat.com>
> > > > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > > > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > > >
> > > > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > > > <eperezma@redhat.com> wrote:
> > > > >
> > > > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > > > >
> > > > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > > > virtqueue to kick in.
> > > > > > >
> > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > ---
> > > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > > > >
> > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > >         if (err)
> > > > > > >                 goto err_cmd;
> > > > > > >
> > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > > > >         kfree(in);
> > > > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > > > >
> > > > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > > > >                 return;
> > > > > > >         }
> > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > > > >         umems_destroy(ndev, mvq);
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > > > >         return err;
> > > > > > >  }
> > > > > > >
> > > > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > > > +{
> > > > > > > +       switch (oldstate) {
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > > > +       default:
> > > > > > > +               return false;
> > > > > > > +       }
> > > > > > > +}
> > > > > > > +
> > > > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > > > >  {
> > > > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > >         void *in;
> > > > > > >         int err;
> > > > > > >
> > > > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > > > +               return -EINVAL;
> > > > > > > +
> > > > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > > > >         if (!in)
> > > > > > >                 return -ENOMEM;
> > > > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > +       int err;
> > > > > > >
> > > > > > >         if (!mvdev->actual_features)
> > > > > > >                 return;
> > > > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > >         }
> > > > > > >
> > > > > > >         mvq = &ndev->vqs[idx];
> > > > > > > -       if (!ready)
> > > > > > > +       if (!ready) {
> > > > > > >                 suspend_vq(ndev, mvq);
> > > > > > > +       } else {
> > > > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > > > +               if (err) {
> > > > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > > > +                       ready = false;
> > > > > > > +               }
> > > > > > > +       }
> > > > > > > +
> > > > > > >
> > > > > > >         mvq->ready = ready;
> > > > > > >  }
> > > > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > > > >         return err;
> > > > > > >  }
> > > > > > >
> > > > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > > > +{
> > > > > > > +       struct mlx5_control_vq *cvq;
> > > > > > > +
> > > > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > > > +               return;
> > > > > > > +
> > > > > > > +       cvq = &mvdev->cvq;
> > > > > > > +       cvq->ready = !suspend;
> > > > > > > +}
> > > > > >
> > > > > > It looks to me we need to synchronize this with reslock. And this
> > > > > > probably deserve a dedicated fix.
> > > > > >
> > > > > > > +
> > > > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > > > +{
> > > > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > +       int i;
> > > > > > > +
> > > > > > > +       if (!suspend) {
> > > > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > > > +               return -EOPNOTSUPP;
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       down_write(&ndev->reslock);
> > > > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > > > +               mvq = &ndev->vqs[i];
> > > > > > > +               suspend_vq(ndev, mvq);
> > > > > > > +       }
> > > > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > > > >
> > > > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > > > get config notification after suspending.
> > > > > >
> > > > > > > +       up_write(&ndev->reslock);
> > > > > > > +       return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > > > >         .set_map = mlx5_vdpa_set_map,
> > > > > > >         .free = mlx5_vdpa_free,
> > > > > > > +       .suspend = mlx5_vdpa_suspend,
> > > > > >
> > > > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > > > >
> > > > >
> > > > > Should we add
> > > > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > > > >
> > > > > To this series?
> > > >
> > > > Probably, but that series seems to support resume while this series doesn't.
> > > >
> > > > Any reason for this?
> > >
> > > I think Eugenio agreed that resume is not really required since we're going stop using this
> > > instance and migrate. In any case, we don't support resume for the hardware object
> > > though it could be simulated should it be absolutely necessary.
> >
> > This is fine if everything is fine during the live migration. But when
> > migration fails due to some reason, management (libvirt) may choose to
> > restart the device in the source.
> >
> > This means we should either
> >
> > 1) support resume in the parent
> > 2) emulate it in the qemu (with a lot of restoring of the states)
> >
>
> I think it should be handled in qemu (at least the POC reset the
> device), but I didn't exercise a lot of the failure paths there
> because, well, it was a POC :).

It looks like a must in the production environment. The failure is not
necessarily related to shadow virtqueue itself.

Thanks

>
> > And it is not only used for live migration, it could be used for vmstop/start.
> >
>
> I think it would be easier if we dedicate a feature flag for resuming
> the device in the future. Qemu could take advantage of it at some
> error paths of live migration, but less than it seems because it
> overrides things like ring addresses. And, obviously, in the
> vmstop/vmstart.
>
> Actually, net devices should be ok to restore with a full reset. The
> problem should be filesystems etc that are not part of vdpa at the
> moment.
>
> Thanks!
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-07-13  3:29                 ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-07-13  3:29 UTC (permalink / raw)
  To: Eugenio Perez Martin; +Cc: mst, linux-kernel, virtualization, Eli Cohen

On Tue, Jul 12, 2022 at 5:16 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Tue, Jul 12, 2022 at 10:14 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > > From: Jason Wang <jasowang@redhat.com>
> > > > Sent: Tuesday, June 21, 2022 6:05 AM
> > > > To: Eugenio Perez Martin <eperezma@redhat.com>
> > > > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > > > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > > >
> > > > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > > > <eperezma@redhat.com> wrote:
> > > > >
> > > > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > > > >
> > > > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > > > virtqueue to kick in.
> > > > > > >
> > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > ---
> > > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > > > >
> > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > >         if (err)
> > > > > > >                 goto err_cmd;
> > > > > > >
> > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > > > >         kfree(in);
> > > > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > > > >
> > > > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > > > >                 return;
> > > > > > >         }
> > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > > > >         umems_destroy(ndev, mvq);
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > > > >         return err;
> > > > > > >  }
> > > > > > >
> > > > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > > > +{
> > > > > > > +       switch (oldstate) {
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > > > +       default:
> > > > > > > +               return false;
> > > > > > > +       }
> > > > > > > +}
> > > > > > > +
> > > > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > > > >  {
> > > > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > >         void *in;
> > > > > > >         int err;
> > > > > > >
> > > > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > > > +               return -EINVAL;
> > > > > > > +
> > > > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > > > >         if (!in)
> > > > > > >                 return -ENOMEM;
> > > > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > +       int err;
> > > > > > >
> > > > > > >         if (!mvdev->actual_features)
> > > > > > >                 return;
> > > > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > >         }
> > > > > > >
> > > > > > >         mvq = &ndev->vqs[idx];
> > > > > > > -       if (!ready)
> > > > > > > +       if (!ready) {
> > > > > > >                 suspend_vq(ndev, mvq);
> > > > > > > +       } else {
> > > > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > > > +               if (err) {
> > > > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > > > +                       ready = false;
> > > > > > > +               }
> > > > > > > +       }
> > > > > > > +
> > > > > > >
> > > > > > >         mvq->ready = ready;
> > > > > > >  }
> > > > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > > > >         return err;
> > > > > > >  }
> > > > > > >
> > > > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > > > +{
> > > > > > > +       struct mlx5_control_vq *cvq;
> > > > > > > +
> > > > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > > > +               return;
> > > > > > > +
> > > > > > > +       cvq = &mvdev->cvq;
> > > > > > > +       cvq->ready = !suspend;
> > > > > > > +}
> > > > > >
> > > > > > It looks to me we need to synchronize this with reslock. And this
> > > > > > probably deserve a dedicated fix.
> > > > > >
> > > > > > > +
> > > > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > > > +{
> > > > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > +       int i;
> > > > > > > +
> > > > > > > +       if (!suspend) {
> > > > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > > > +               return -EOPNOTSUPP;
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       down_write(&ndev->reslock);
> > > > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > > > +               mvq = &ndev->vqs[i];
> > > > > > > +               suspend_vq(ndev, mvq);
> > > > > > > +       }
> > > > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > > > >
> > > > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > > > get config notification after suspending.
> > > > > >
> > > > > > > +       up_write(&ndev->reslock);
> > > > > > > +       return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > > > >         .set_map = mlx5_vdpa_set_map,
> > > > > > >         .free = mlx5_vdpa_free,
> > > > > > > +       .suspend = mlx5_vdpa_suspend,
> > > > > >
> > > > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > > > >
> > > > >
> > > > > Should we add
> > > > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > > > >
> > > > > To this series?
> > > >
> > > > Probably, but that series seems to support resume while this series doesn't.
> > > >
> > > > Any reason for this?
> > >
> > > I think Eugenio agreed that resume is not really required since we're going stop using this
> > > instance and migrate. In any case, we don't support resume for the hardware object
> > > though it could be simulated should it be absolutely necessary.
> >
> > This is fine if everything is fine during the live migration. But when
> > migration fails due to some reason, management (libvirt) may choose to
> > restart the device in the source.
> >
> > This means we should either
> >
> > 1) support resume in the parent
> > 2) emulate it in the qemu (with a lot of restoring of the states)
> >
>
> I think it should be handled in qemu (at least the POC reset the
> device), but I didn't exercise a lot of the failure paths there
> because, well, it was a POC :).

It looks like a must in the production environment. The failure is not
necessarily related to shadow virtqueue itself.

Thanks

>
> > And it is not only used for live migration, it could be used for vmstop/start.
> >
>
> I think it would be easier if we dedicate a feature flag for resuming
> the device in the future. Qemu could take advantage of it at some
> error paths of live migration, but less than it seems because it
> overrides things like ring addresses. And, obviously, in the
> vmstop/vmstart.
>
> Actually, net devices should be ok to restore with a full reset. The
> problem should be filesystems etc that are not part of vdpa at the
> moment.
>
> Thanks!
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* RE: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-07-13  3:29                 ` Jason Wang
  (?)
@ 2022-07-13  5:18                 ` Eli Cohen
  2022-07-18  9:03                     ` Jason Wang
  -1 siblings, 1 reply; 42+ messages in thread
From: Eli Cohen @ 2022-07-13  5:18 UTC (permalink / raw)
  To: Jason Wang, Eugenio Perez Martin
  Cc: mst, virtualization, linux-kernel, Si-Wei Liu, Parav Pandit

> From: Jason Wang <jasowang@redhat.com>
> Sent: Wednesday, July 13, 2022 6:29 AM
> To: Eugenio Perez Martin <eperezma@redhat.com>
> Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> 
> On Tue, Jul 12, 2022 at 5:16 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Tue, Jul 12, 2022 at 10:14 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
> > > >
> > > > > From: Jason Wang <jasowang@redhat.com>
> > > > > Sent: Tuesday, June 21, 2022 6:05 AM
> > > > > To: Eugenio Perez Martin <eperezma@redhat.com>
> > > > > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> kernel
> > > > > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > > > >
> > > > > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > > > > <eperezma@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > > > > >
> > > > > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > > > > virtqueue to kick in.
> > > > > > > >
> > > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > > ---
> > > > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > > >         if (err)
> > > > > > > >                 goto err_cmd;
> > > > > > > >
> > > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > > > > >         kfree(in);
> > > > > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > > > > >
> > > > > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > > > > >                 return;
> > > > > > > >         }
> > > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > > > > >         umems_destroy(ndev, mvq);
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > > > > >         return err;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > > > > +{
> > > > > > > > +       switch (oldstate) {
> > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > > > > +       default:
> > > > > > > > +               return false;
> > > > > > > > +       }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > > > > >  {
> > > > > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > > >         void *in;
> > > > > > > >         int err;
> > > > > > > >
> > > > > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > > > > +               return 0;
> > > > > > > > +
> > > > > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > > > > +               return -EINVAL;
> > > > > > > > +
> > > > > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > > > > >         if (!in)
> > > > > > > >                 return -ENOMEM;
> > > > > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > > +       int err;
> > > > > > > >
> > > > > > > >         if (!mvdev->actual_features)
> > > > > > > >                 return;
> > > > > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > > >         }
> > > > > > > >
> > > > > > > >         mvq = &ndev->vqs[idx];
> > > > > > > > -       if (!ready)
> > > > > > > > +       if (!ready) {
> > > > > > > >                 suspend_vq(ndev, mvq);
> > > > > > > > +       } else {
> > > > > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > > > > +               if (err) {
> > > > > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > > > > +                       ready = false;
> > > > > > > > +               }
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > >
> > > > > > > >         mvq->ready = ready;
> > > > > > > >  }
> > > > > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > > > > >         return err;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > > > > +{
> > > > > > > > +       struct mlx5_control_vq *cvq;
> > > > > > > > +
> > > > > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > > > > +               return;
> > > > > > > > +
> > > > > > > > +       cvq = &mvdev->cvq;
> > > > > > > > +       cvq->ready = !suspend;
> > > > > > > > +}
> > > > > > >
> > > > > > > It looks to me we need to synchronize this with reslock. And this
> > > > > > > probably deserve a dedicated fix.
> > > > > > >
> > > > > > > > +
> > > > > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > > > > +{
> > > > > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > > +       int i;
> > > > > > > > +
> > > > > > > > +       if (!suspend) {
> > > > > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > > > > +               return -EOPNOTSUPP;
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > > +       down_write(&ndev->reslock);
> > > > > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > > > > +               mvq = &ndev->vqs[i];
> > > > > > > > +               suspend_vq(ndev, mvq);
> > > > > > > > +       }
> > > > > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > > > > >
> > > > > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > > > > get config notification after suspending.
> > > > > > >
> > > > > > > > +       up_write(&ndev->reslock);
> > > > > > > > +       return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > > > > >         .set_map = mlx5_vdpa_set_map,
> > > > > > > >         .free = mlx5_vdpa_free,
> > > > > > > > +       .suspend = mlx5_vdpa_suspend,
> > > > > > >
> > > > > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > > > > >
> > > > > >
> > > > > > Should we add
> > > > > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > > > > >
> > > > > > To this series?
> > > > >
> > > > > Probably, but that series seems to support resume while this series doesn't.
> > > > >
> > > > > Any reason for this?
> > > >
> > > > I think Eugenio agreed that resume is not really required since we're going stop using this
> > > > instance and migrate. In any case, we don't support resume for the hardware object
> > > > though it could be simulated should it be absolutely necessary.
> > >
> > > This is fine if everything is fine during the live migration. But when
> > > migration fails due to some reason, management (libvirt) may choose to
> > > restart the device in the source.
> > >
> > > This means we should either
> > >
> > > 1) support resume in the parent
> > > 2) emulate it in the qemu (with a lot of restoring of the states)
> > >
> >
> > I think it should be handled in qemu (at least the POC reset the
> > device), but I didn't exercise a lot of the failure paths there
> > because, well, it was a POC :).
> 
> It looks like a must in the production environment. The failure is not
> necessarily related to shadow virtqueue itself.
> 

I don't see a specific interface to resume the device after suspend.
Reset however could do the job and we already have it.

> Thanks
> 
> >
> > > And it is not only used for live migration, it could be used for vmstop/start.
> > >
> >
> > I think it would be easier if we dedicate a feature flag for resuming
> > the device in the future. Qemu could take advantage of it at some
> > error paths of live migration, but less than it seems because it
> > overrides things like ring addresses. And, obviously, in the
> > vmstop/vmstart.
> >
> > Actually, net devices should be ok to restore with a full reset. The
> > problem should be filesystems etc that are not part of vdpa at the
> > moment.
> >
> > Thanks!
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
  2022-07-13  5:18                 ` Eli Cohen
@ 2022-07-18  9:03                     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-07-18  9:03 UTC (permalink / raw)
  To: Eli Cohen
  Cc: Eugenio Perez Martin, mst, virtualization, linux-kernel,
	Si-Wei Liu, Parav Pandit

On Wed, Jul 13, 2022 at 1:18 PM Eli Cohen <elic@nvidia.com> wrote:
>
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Wednesday, July 13, 2022 6:29 AM
> > To: Eugenio Perez Martin <eperezma@redhat.com>
> > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Tue, Jul 12, 2022 at 5:16 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Tue, Jul 12, 2022 at 10:14 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > > From: Jason Wang <jasowang@redhat.com>
> > > > > > Sent: Tuesday, June 21, 2022 6:05 AM
> > > > > > To: Eugenio Perez Martin <eperezma@redhat.com>
> > > > > > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> > kernel
> > > > > > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > > > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > > > > >
> > > > > > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > > > > > <eperezma@redhat.com> wrote:
> > > > > > >
> > > > > > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > > > > > >
> > > > > > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > > > > > virtqueue to kick in.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > > > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > > > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > > > >         if (err)
> > > > > > > > >                 goto err_cmd;
> > > > > > > > >
> > > > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > > > > > >         kfree(in);
> > > > > > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > > > > > >
> > > > > > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > > > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > > > > > >                 return;
> > > > > > > > >         }
> > > > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > > > > > >         umems_destroy(ndev, mvq);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > > > > > >         return err;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > > > > > +{
> > > > > > > > > +       switch (oldstate) {
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > > > > > +       default:
> > > > > > > > > +               return false;
> > > > > > > > > +       }
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > > > > > >  {
> > > > > > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > > > >         void *in;
> > > > > > > > >         int err;
> > > > > > > > >
> > > > > > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > > > > > +               return -EINVAL;
> > > > > > > > > +
> > > > > > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > > > > > >         if (!in)
> > > > > > > > >                 return -ENOMEM;
> > > > > > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > > > +       int err;
> > > > > > > > >
> > > > > > > > >         if (!mvdev->actual_features)
> > > > > > > > >                 return;
> > > > > > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > > > >         }
> > > > > > > > >
> > > > > > > > >         mvq = &ndev->vqs[idx];
> > > > > > > > > -       if (!ready)
> > > > > > > > > +       if (!ready) {
> > > > > > > > >                 suspend_vq(ndev, mvq);
> > > > > > > > > +       } else {
> > > > > > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > > > > > +               if (err) {
> > > > > > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > > > > > +                       ready = false;
> > > > > > > > > +               }
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > >
> > > > > > > > >         mvq->ready = ready;
> > > > > > > > >  }
> > > > > > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > > > > > >         return err;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > > > > > +{
> > > > > > > > > +       struct mlx5_control_vq *cvq;
> > > > > > > > > +
> > > > > > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > > > > > +               return;
> > > > > > > > > +
> > > > > > > > > +       cvq = &mvdev->cvq;
> > > > > > > > > +       cvq->ready = !suspend;
> > > > > > > > > +}
> > > > > > > >
> > > > > > > > It looks to me we need to synchronize this with reslock. And this
> > > > > > > > probably deserve a dedicated fix.
> > > > > > > >
> > > > > > > > > +
> > > > > > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > > > > > +{
> > > > > > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > > > +       int i;
> > > > > > > > > +
> > > > > > > > > +       if (!suspend) {
> > > > > > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > > > > > +               return -EOPNOTSUPP;
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > > +       down_write(&ndev->reslock);
> > > > > > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > > > > > +               mvq = &ndev->vqs[i];
> > > > > > > > > +               suspend_vq(ndev, mvq);
> > > > > > > > > +       }
> > > > > > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > > > > > >
> > > > > > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > > > > > get config notification after suspending.
> > > > > > > >
> > > > > > > > > +       up_write(&ndev->reslock);
> > > > > > > > > +       return 0;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > > > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > > > > > >         .set_map = mlx5_vdpa_set_map,
> > > > > > > > >         .free = mlx5_vdpa_free,
> > > > > > > > > +       .suspend = mlx5_vdpa_suspend,
> > > > > > > >
> > > > > > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > > > > > >
> > > > > > >
> > > > > > > Should we add
> > > > > > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > > > > > >
> > > > > > > To this series?
> > > > > >
> > > > > > Probably, but that series seems to support resume while this series doesn't.
> > > > > >
> > > > > > Any reason for this?
> > > > >
> > > > > I think Eugenio agreed that resume is not really required since we're going stop using this
> > > > > instance and migrate. In any case, we don't support resume for the hardware object
> > > > > though it could be simulated should it be absolutely necessary.
> > > >
> > > > This is fine if everything is fine during the live migration. But when
> > > > migration fails due to some reason, management (libvirt) may choose to
> > > > restart the device in the source.
> > > >
> > > > This means we should either
> > > >
> > > > 1) support resume in the parent
> > > > 2) emulate it in the qemu (with a lot of restoring of the states)
> > > >
> > >
> > > I think it should be handled in qemu (at least the POC reset the
> > > device), but I didn't exercise a lot of the failure paths there
> > > because, well, it was a POC :).
> >
> > It looks like a must in the production environment. The failure is not
> > necessarily related to shadow virtqueue itself.
> >
>
> I don't see a specific interface to resume the device after suspend.
> Reset however could do the job and we already have it.

Yes, this is fine as long as we can emulate it via set_vq_state + reset.

Thanks

>
> > Thanks
> >
> > >
> > > > And it is not only used for live migration, it could be used for vmstop/start.
> > > >
> > >
> > > I think it would be easier if we dedicate a feature flag for resuming
> > > the device in the future. Qemu could take advantage of it at some
> > > error paths of live migration, but less than it seems because it
> > > overrides things like ring addresses. And, obviously, in the
> > > vmstop/vmstart.
> > >
> > > Actually, net devices should be ok to restore with a full reset. The
> > > problem should be filesystems etc that are not part of vdpa at the
> > > moment.
> > >
> > > Thanks!
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
@ 2022-07-18  9:03                     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2022-07-18  9:03 UTC (permalink / raw)
  To: Eli Cohen; +Cc: mst, linux-kernel, virtualization, Eugenio Perez Martin

On Wed, Jul 13, 2022 at 1:18 PM Eli Cohen <elic@nvidia.com> wrote:
>
> > From: Jason Wang <jasowang@redhat.com>
> > Sent: Wednesday, July 13, 2022 6:29 AM
> > To: Eugenio Perez Martin <eperezma@redhat.com>
> > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-kernel
> > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> >
> > On Tue, Jul 12, 2022 at 5:16 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Tue, Jul 12, 2022 at 10:14 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Mon, Jul 11, 2022 at 2:14 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > >
> > > > > > From: Jason Wang <jasowang@redhat.com>
> > > > > > Sent: Tuesday, June 21, 2022 6:05 AM
> > > > > > To: Eugenio Perez Martin <eperezma@redhat.com>
> > > > > > Cc: Eli Cohen <elic@nvidia.com>; mst <mst@redhat.com>; virtualization <virtualization@lists.linux-foundation.org>; linux-
> > kernel
> > > > > > <linux-kernel@vger.kernel.org>; Si-Wei Liu <si-wei.liu@oracle.com>; Parav Pandit <parav@nvidia.com>
> > > > > > Subject: Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback
> > > > > >
> > > > > > On Mon, Jun 20, 2022 at 5:59 PM Eugenio Perez Martin
> > > > > > <eperezma@redhat.com> wrote:
> > > > > > >
> > > > > > > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@nvidia.com> wrote:
> > > > > > > > >
> > > > > > > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > > > > > > they stop processing descriptors. This is required to allow the shadow
> > > > > > > > > virtqueue to kick in.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 68 +++++++++++++++++++++++++++++-
> > > > > > > > >  include/linux/mlx5/mlx5_ifc_vdpa.h |  8 ++++
> > > > > > > > >  2 files changed, 75 insertions(+), 1 deletion(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > > > >         if (err)
> > > > > > > > >                 goto err_cmd;
> > > > > > > > >
> > > > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > > > > > >         kfree(in);
> > > > > > > > >         mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > > > > > > >
> > > > > > > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > > > > > >                 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > > > > > >                 return;
> > > > > > > > >         }
> > > > > > > > > +       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > > > > > >         umems_destroy(ndev, mvq);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > > > > > >         return err;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > > > > > > +{
> > > > > > > > > +       switch (oldstate) {
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > > > > > > +               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > > > > > > +       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > > > > > > +       default:
> > > > > > > > > +               return false;
> > > > > > > > > +       }
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > > > > > >  {
> > > > > > > > >         int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > > > > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > > > > > >         void *in;
> > > > > > > > >         int err;
> > > > > > > > >
> > > > > > > > > +       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       if (!is_valid_state_change(mvq->fw_state, state))
> > > > > > > > > +               return -EINVAL;
> > > > > > > > > +
> > > > > > > > >         in = kzalloc(inlen, GFP_KERNEL);
> > > > > > > > >         if (!in)
> > > > > > > > >                 return -ENOMEM;
> > > > > > > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > > > +       int err;
> > > > > > > > >
> > > > > > > > >         if (!mvdev->actual_features)
> > > > > > > > >                 return;
> > > > > > > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > > > >         }
> > > > > > > > >
> > > > > > > > >         mvq = &ndev->vqs[idx];
> > > > > > > > > -       if (!ready)
> > > > > > > > > +       if (!ready) {
> > > > > > > > >                 suspend_vq(ndev, mvq);
> > > > > > > > > +       } else {
> > > > > > > > > +               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > > > > > > +               if (err) {
> > > > > > > > > +                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > > > > > > +                       ready = false;
> > > > > > > > > +               }
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > >
> > > > > > > > >         mvq->ready = ready;
> > > > > > > > >  }
> > > > > > > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > > > > > >         return err;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > > > > > > +{
> > > > > > > > > +       struct mlx5_control_vq *cvq;
> > > > > > > > > +
> > > > > > > > > +       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > > > > > +               return;
> > > > > > > > > +
> > > > > > > > > +       cvq = &mvdev->cvq;
> > > > > > > > > +       cvq->ready = !suspend;
> > > > > > > > > +}
> > > > > > > >
> > > > > > > > It looks to me we need to synchronize this with reslock. And this
> > > > > > > > probably deserve a dedicated fix.
> > > > > > > >
> > > > > > > > > +
> > > > > > > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > > > > > > +{
> > > > > > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > > > > > +       int i;
> > > > > > > > > +
> > > > > > > > > +       if (!suspend) {
> > > > > > > > > +               mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > > > > > > +               return -EOPNOTSUPP;
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > > +       down_write(&ndev->reslock);
> > > > > > > > > +       for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > > > > > > +               mvq = &ndev->vqs[i];
> > > > > > > > > +               suspend_vq(ndev, mvq);
> > > > > > > > > +       }
> > > > > > > > > +       mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > > > > > > >
> > > > > > > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > > > > > > get config notification after suspending.
> > > > > > > >
> > > > > > > > > +       up_write(&ndev->reslock);
> > > > > > > > > +       return 0;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > > > >         .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > > > > > >         .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > > > > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > > > > > >         .get_generation = mlx5_vdpa_get_generation,
> > > > > > > > >         .set_map = mlx5_vdpa_set_map,
> > > > > > > > >         .free = mlx5_vdpa_free,
> > > > > > > > > +       .suspend = mlx5_vdpa_suspend,
> > > > > > > >
> > > > > > > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > > > > > > >
> > > > > > >
> > > > > > > Should we add
> > > > > > > Based-on: <20220526124338.36247-1-eperezma@redhat.com>
> > > > > > >
> > > > > > > To this series?
> > > > > >
> > > > > > Probably, but that series seems to support resume while this series doesn't.
> > > > > >
> > > > > > Any reason for this?
> > > > >
> > > > > I think Eugenio agreed that resume is not really required since we're going stop using this
> > > > > instance and migrate. In any case, we don't support resume for the hardware object
> > > > > though it could be simulated should it be absolutely necessary.
> > > >
> > > > This is fine if everything is fine during the live migration. But when
> > > > migration fails due to some reason, management (libvirt) may choose to
> > > > restart the device in the source.
> > > >
> > > > This means we should either
> > > >
> > > > 1) support resume in the parent
> > > > 2) emulate it in the qemu (with a lot of restoring of the states)
> > > >
> > >
> > > I think it should be handled in qemu (at least the POC reset the
> > > device), but I didn't exercise a lot of the failure paths there
> > > because, well, it was a POC :).
> >
> > It looks like a must in the production environment. The failure is not
> > necessarily related to shadow virtqueue itself.
> >
>
> I don't see a specific interface to resume the device after suspend.
> Reset however could do the job and we already have it.

Yes, this is fine as long as we can emulate it via set_vq_state + reset.

Thanks

>
> > Thanks
> >
> > >
> > > > And it is not only used for live migration, it could be used for vmstop/start.
> > > >
> > >
> > > I think it would be easier if we dedicate a feature flag for resuming
> > > the device in the future. Qemu could take advantage of it at some
> > > error paths of live migration, but less than it seems because it
> > > overrides things like ring addresses. And, obviously, in the
> > > vmstop/vmstart.
> > >
> > > Actually, net devices should be ok to restore with a full reset. The
> > > problem should be filesystems etc that are not part of vdpa at the
> > > moment.
> > >
> > > Thanks!
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2022-07-18  9:04 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-16 13:27 [PATCH RFC 0/3] Support live migration with mlx5_vdpa Eli Cohen
2022-06-16 13:27 ` [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback Eli Cohen
2022-06-16 17:12   ` kernel test robot
2022-06-19 16:33   ` Eugenio Perez Martin
2022-06-20  8:56   ` Jason Wang
2022-06-20  8:56     ` Jason Wang
2022-06-20  9:58     ` Eugenio Perez Martin
2022-06-20 10:06       ` Michael S. Tsirkin
2022-06-20 10:06         ` Michael S. Tsirkin
2022-06-20 11:09         ` Eugenio Perez Martin
2022-06-21  3:04       ` Jason Wang
2022-06-21  3:04         ` Jason Wang
2022-06-21  7:48         ` Eugenio Perez Martin
2022-06-21  7:52           ` Jason Wang
2022-06-21  7:52             ` Jason Wang
2022-07-11  6:14         ` Eli Cohen
2022-07-11 10:43           ` Eugenio Perez Martin
2022-07-12  8:14           ` Jason Wang
2022-07-12  8:14             ` Jason Wang
2022-07-12  9:15             ` Eugenio Perez Martin
2022-07-13  3:29               ` Jason Wang
2022-07-13  3:29                 ` Jason Wang
2022-07-13  5:18                 ` Eli Cohen
2022-07-18  9:03                   ` Jason Wang
2022-07-18  9:03                     ` Jason Wang
2022-06-20 13:09     ` Eli Cohen
2022-06-21  2:58       ` Jason Wang
2022-06-21  2:58         ` Jason Wang
2022-07-11  6:10         ` Eli Cohen
2022-06-20 10:07   ` Eugenio Perez Martin
2022-06-16 13:27 ` [PATCH RFC 2/3] vdpa/mlx5: Support different address spaces for control and data Eli Cohen
2022-06-20  8:47   ` Jason Wang
2022-06-20  8:47     ` Jason Wang
2022-06-20  8:57   ` Eugenio Perez Martin
2022-06-20  9:20     ` Jason Wang
2022-06-20  9:20       ` Jason Wang
2022-06-16 13:27 ` [PATCH RFC 3/3] vdpa/mlx5: Disable VLAN support to support live migration Eli Cohen
2022-06-20  8:47   ` Jason Wang
2022-06-20  8:47     ` Jason Wang
2022-06-20  9:01     ` Eugenio Perez Martin
2022-06-20  9:25       ` Jason Wang
2022-06-20  9:25         ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.