linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling
@ 2023-08-02 17:12 Dragos Tatulea
  2023-08-02 17:12 ` [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics Dragos Tatulea
                   ` (2 more replies)
  0 siblings, 3 replies; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-02 17:12 UTC (permalink / raw)
  To: Jason Wang, Michael S. Tsirkin, Xuan Zhuo
  Cc: Dragos Tatulea, linux-kernel, virtualization

This patch series is based on Eugenio's fix for handling CVQs in
a different ASID [0].

The first patch is the actual fix.

The next 2 patches are fixing a possible issue that I found while
implementing patch 1. The patches are ordered like this for clarity.

[0] https://lore.kernel.org/lkml/20230112142218.725622-1-eperezma@redhat.com/

Dragos Tatulea (1):
  vdpa/mlx5: Fix mr->initialized semantics

Eugenio Pérez (1):
  vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary

 drivers/vdpa/mlx5/core/mlx5_vdpa.h |  2 +
 drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
 drivers/vdpa/mlx5/net/mlx5_vnet.c  |  4 +-
 3 files changed, 74 insertions(+), 29 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-02 17:12 [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Dragos Tatulea
@ 2023-08-02 17:12 ` Dragos Tatulea
  2023-08-03  8:03   ` Jason Wang
  2023-08-02 17:12 ` [PATCH 2/2] vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary Dragos Tatulea
  2023-08-10  8:54 ` [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Michael S. Tsirkin
  2 siblings, 1 reply; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-02 17:12 UTC (permalink / raw)
  To: Michael S. Tsirkin, Jason Wang, Xuan Zhuo
  Cc: Dragos Tatulea, Eugenio Pérez, Gal Pressman, virtualization,
	linux-kernel

The mr->initialized flag is shared between the control vq and data vq
part of the mr init/uninit. But if the control vq and data vq get placed
in different ASIDs, it can happen that initializing the control vq will
prevent the data vq mr from being initialized.

This patch consolidates the control and data vq init parts into their
own init functions. The mr->initialized will now be used for the data vq
only. The control vq currently doesn't need a flag.

The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
split into data and control vq functions which are now also ASID aware.

Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
---
 drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
 drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
 2 files changed, 71 insertions(+), 27 deletions(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 25fc4120b618..a0420be5059f 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
 	struct list_head head;
 	unsigned long num_directs;
 	unsigned long num_klms;
+	/* state of dvq mr */
 	bool initialized;
 
 	/* serialize mkey creation and destruction */
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 03e543229791..4ae14a248a4b 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
 	}
 }
 
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+		return;
+
+	prune_iotlb(mvdev);
+}
+
+static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
 {
 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
 
-	mutex_lock(&mr->mkey_mtx);
+	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
+		return;
+
 	if (!mr->initialized)
-		goto out;
+		return;
 
-	prune_iotlb(mvdev);
 	if (mr->user_mr)
 		destroy_user_mr(mvdev, mr);
 	else
 		destroy_dma_mr(mvdev, mr);
 
 	mr->initialized = false;
-out:
+}
+
+static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+	struct mlx5_vdpa_mr *mr = &mvdev->mr;
+
+	mutex_lock(&mr->mkey_mtx);
+
+	_mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
+	_mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
+
 	mutex_unlock(&mr->mkey_mtx);
 }
 
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
-				struct vhost_iotlb *iotlb, unsigned int asid)
+void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+{
+	mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
+	mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
+}
+
+static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
+				    struct vhost_iotlb *iotlb,
+				    unsigned int asid)
+{
+	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+		return 0;
+
+	return dup_iotlb(mvdev, iotlb);
+}
+
+static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
+				    struct vhost_iotlb *iotlb,
+				    unsigned int asid)
 {
 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
 	int err;
 
-	if (mr->initialized)
+	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
 		return 0;
 
-	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
-		if (iotlb)
-			err = create_user_mr(mvdev, iotlb);
-		else
-			err = create_dma_mr(mvdev, mr);
+	if (mr->initialized)
+		return 0;
 
-		if (err)
-			return err;
-	}
+	if (iotlb)
+		err = create_user_mr(mvdev, iotlb);
+	else
+		err = create_dma_mr(mvdev, mr);
 
-	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
-		err = dup_iotlb(mvdev, iotlb);
-		if (err)
-			goto out_err;
-	}
+	if (err)
+		return err;
 
 	mr->initialized = true;
+
+	return 0;
+}
+
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+				struct vhost_iotlb *iotlb, unsigned int asid)
+{
+	int err;
+
+	err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
+	if (err)
+		return err;
+
+	err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
+	if (err)
+		goto out_err;
+
 	return 0;
 
 out_err:
-	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
-		if (iotlb)
-			destroy_user_mr(mvdev, mr);
-		else
-			destroy_dma_mr(mvdev, mr);
-	}
+	_mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
 
 	return err;
 }
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 2/2] vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary
  2023-08-02 17:12 [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Dragos Tatulea
  2023-08-02 17:12 ` [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics Dragos Tatulea
@ 2023-08-02 17:12 ` Dragos Tatulea
  2023-08-10  8:54 ` [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Michael S. Tsirkin
  2 siblings, 0 replies; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-02 17:12 UTC (permalink / raw)
  To: Michael S. Tsirkin, Jason Wang, Xuan Zhuo
  Cc: Eugenio Pérez, Gal Pressman, virtualization, linux-kernel

From: Eugenio Pérez <eperezma@redhat.com>

mlx5_vdpa_destroy_mr can be called from .set_map with data ASID after
the control virtqueue ASID iotlb has been populated. The control vq
iotlb must not be cleared, since it will not be populated again.

So call the ASID aware destroy function which makes sure that the
right vq resource is destroyed.

Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
---
 drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
 drivers/vdpa/mlx5/core/mr.c        | 2 +-
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index a0420be5059f..b53420e874ac 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -122,6 +122,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 			unsigned int asid);
 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
 
 #define mlx5_vdpa_warn(__dev, format, ...)                                                         \
 	dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__,     \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 4ae14a248a4b..5a1971fcd87b 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -515,7 +515,7 @@ static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int
 	mr->initialized = false;
 }
 
-static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
 {
 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
 
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 9138ef2fb2c8..61c10ba5e3f5 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2636,7 +2636,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
 		goto err_mr;
 
 	teardown_driver(ndev);
-	mlx5_vdpa_destroy_mr(mvdev);
+	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
 	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
 	if (err)
 		goto err_mr;
@@ -2652,7 +2652,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
 	return 0;
 
 err_setup:
-	mlx5_vdpa_destroy_mr(mvdev);
+	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
 err_mr:
 	return err;
 }
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-02 17:12 ` [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics Dragos Tatulea
@ 2023-08-03  8:03   ` Jason Wang
  2023-08-03 11:40     ` Dragos Tatulea
  2023-08-03 17:57     ` Si-Wei Liu
  0 siblings, 2 replies; 42+ messages in thread
From: Jason Wang @ 2023-08-03  8:03 UTC (permalink / raw)
  To: Dragos Tatulea
  Cc: Michael S. Tsirkin, Xuan Zhuo, Eugenio Pérez, Gal Pressman,
	virtualization, linux-kernel

On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> The mr->initialized flag is shared between the control vq and data vq
> part of the mr init/uninit. But if the control vq and data vq get placed
> in different ASIDs, it can happen that initializing the control vq will
> prevent the data vq mr from being initialized.
>
> This patch consolidates the control and data vq init parts into their
> own init functions. The mr->initialized will now be used for the data vq
> only. The control vq currently doesn't need a flag.
>
> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> split into data and control vq functions which are now also ASID aware.
>
> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> Reviewed-by: Gal Pressman <gal@nvidia.com>
> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
>  2 files changed, 71 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 25fc4120b618..a0420be5059f 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
>         struct list_head head;
>         unsigned long num_directs;
>         unsigned long num_klms;
> +       /* state of dvq mr */
>         bool initialized;
>
>         /* serialize mkey creation and destruction */
> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> index 03e543229791..4ae14a248a4b 100644
> --- a/drivers/vdpa/mlx5/core/mr.c
> +++ b/drivers/vdpa/mlx5/core/mr.c
> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
>         }
>  }
>
> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> +{
> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> +               return;
> +
> +       prune_iotlb(mvdev);
> +}
> +
> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>  {
>         struct mlx5_vdpa_mr *mr = &mvdev->mr;
>
> -       mutex_lock(&mr->mkey_mtx);
> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> +               return;
> +
>         if (!mr->initialized)
> -               goto out;
> +               return;
>
> -       prune_iotlb(mvdev);
>         if (mr->user_mr)
>                 destroy_user_mr(mvdev, mr);
>         else
>                 destroy_dma_mr(mvdev, mr);
>
>         mr->initialized = false;
> -out:
> +}
> +
> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> +{
> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> +
> +       mutex_lock(&mr->mkey_mtx);
> +
> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> +
>         mutex_unlock(&mr->mkey_mtx);
>  }
>
> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> -                               struct vhost_iotlb *iotlb, unsigned int asid)
> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> +{
> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> +}
> +
> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> +                                   struct vhost_iotlb *iotlb,
> +                                   unsigned int asid)
> +{
> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> +               return 0;
> +
> +       return dup_iotlb(mvdev, iotlb);

This worries me as conceptually, there should be no difference between
dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.

One example is that, if we only do dup_iotlb() but not try to create
dma mr here, we will break virtio-vdpa:

commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
Author: Eli Cohen <elic@nvidia.com>
Date:   Wed Jun 2 11:58:54 2021 +0300

    vdpa/mlx5: Add support for running with virtio_vdpa

    In order to support running vdpa using vritio_vdpa driver, we need  to
    create a different kind of MR, one that has 1:1 mapping, since the
    addresses referring to virtqueues are dma addresses.

    We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
    supports the general capability umem_uid_0. The reason for that is that
    1:1 MRs must be created with uid == 0 while virtqueue objects can be
    created with uid == 0 only when the firmware capability is on.

    If the set_map() callback is called with new translations provided
    through iotlb, the driver will destroy the 1:1 MR and create a regular
    one.

    Signed-off-by: Eli Cohen <elic@nvidia.com>
    Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
    Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
    Acked-by: Jason Wang <jasowang@redhat.com>

Thanks


> +}
> +
> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> +                                   struct vhost_iotlb *iotlb,
> +                                   unsigned int asid)
>  {
>         struct mlx5_vdpa_mr *mr = &mvdev->mr;
>         int err;
>
> -       if (mr->initialized)
> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>                 return 0;
>
> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> -               if (iotlb)
> -                       err = create_user_mr(mvdev, iotlb);
> -               else
> -                       err = create_dma_mr(mvdev, mr);
> +       if (mr->initialized)
> +               return 0;
>
> -               if (err)
> -                       return err;
> -       }
> +       if (iotlb)
> +               err = create_user_mr(mvdev, iotlb);
> +       else
> +               err = create_dma_mr(mvdev, mr);
>
> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> -               err = dup_iotlb(mvdev, iotlb);
> -               if (err)
> -                       goto out_err;
> -       }
> +       if (err)
> +               return err;
>
>         mr->initialized = true;
> +
> +       return 0;
> +}
> +
> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> +                               struct vhost_iotlb *iotlb, unsigned int asid)
> +{
> +       int err;
> +
> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> +       if (err)
> +               return err;
> +
> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> +       if (err)
> +               goto out_err;
> +
>         return 0;
>
>  out_err:
> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> -               if (iotlb)
> -                       destroy_user_mr(mvdev, mr);
> -               else
> -                       destroy_dma_mr(mvdev, mr);
> -       }
> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>
>         return err;
>  }
> --
> 2.41.0
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-03  8:03   ` Jason Wang
@ 2023-08-03 11:40     ` Dragos Tatulea
  2023-08-08  2:57       ` Jason Wang
  2023-08-03 17:57     ` Si-Wei Liu
  1 sibling, 1 reply; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-03 11:40 UTC (permalink / raw)
  To: jasowang
  Cc: virtualization, mst, eperezma, linux-kernel, Gal Pressman, xuanzhuo

On Thu, 2023-08-03 at 16:03 +0800, Jason Wang wrote:
> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > 
> > The mr->initialized flag is shared between the control vq and data vq
> > part of the mr init/uninit. But if the control vq and data vq get placed
> > in different ASIDs, it can happen that initializing the control vq will
> > prevent the data vq mr from being initialized.
> > 
> > This patch consolidates the control and data vq init parts into their
> > own init functions. The mr->initialized will now be used for the data vq
> > only. The control vq currently doesn't need a flag.
> > 
> > The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> > split into data and control vq functions which are now also ASID aware.
> > 
> > Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for
> > control and data")
> > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> > Reviewed-by: Gal Pressman <gal@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >  2 files changed, 71 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > index 25fc4120b618..a0420be5059f 100644
> > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> >         struct list_head head;
> >         unsigned long num_directs;
> >         unsigned long num_klms;
> > +       /* state of dvq mr */
> >         bool initialized;
> > 
> >         /* serialize mkey creation and destruction */
> > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> > index 03e543229791..4ae14a248a4b 100644
> > --- a/drivers/vdpa/mlx5/core/mr.c
> > +++ b/drivers/vdpa/mlx5/core/mr.c
> > @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev
> > *mvdev, struct mlx5_vdpa_mr *mr
> >         }
> >  }
> > 
> > -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
> > int asid)
> > +{
> > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > +               return;
> > +
> > +       prune_iotlb(mvdev);
> > +}
> > +
> > +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
> > int asid)
> >  {
> >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > 
> > -       mutex_lock(&mr->mkey_mtx);
> > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > +               return;
> > +
> >         if (!mr->initialized)
> > -               goto out;
> > +               return;
> > 
> > -       prune_iotlb(mvdev);
> >         if (mr->user_mr)
> >                 destroy_user_mr(mvdev, mr);
> >         else
> >                 destroy_dma_mr(mvdev, mr);
> > 
> >         mr->initialized = false;
> > -out:
> > +}
> > +
> > +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned
> > int asid)
> > +{
> > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > +
> > +       mutex_lock(&mr->mkey_mtx);
> > +
> > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> > +
> >         mutex_unlock(&mr->mkey_mtx);
> >  }
> > 
> > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > -                               struct vhost_iotlb *iotlb, unsigned int
> > asid)
> > +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > +{
> > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > >group2asid[MLX5_VDPA_CVQ_GROUP]);
> > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > >group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> > +}
> > +
> > +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > +                                   struct vhost_iotlb *iotlb,
> > +                                   unsigned int asid)
> > +{
> > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > +               return 0;
> > +
> > +       return dup_iotlb(mvdev, iotlb);
> 
> This worries me as conceptually, there should be no difference between
> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> 
Are you worried by the changes in this patch or about the possibility of having 

The reason for this change is that I noticed if you create one mr in one asid
you could be blocked out from creating another one in a different asid due to
mr->initialized being true. To me that seemed problematic. Is it not?

> One example is that, if we only do dup_iotlb() but not try to create
> dma mr here, we will break virtio-vdpa:
> 
How will that be possible? _mlx5_vdpa_create_mr calls _mlx5_vdpa_create_dvq_mr
and _mlx5_vdpa_create_cvq_mr. The only thing that is different in this patch is
that the cvq is not protected by an init flag. My understanding was that it
would be ok to dup_iotlb again. Is it not? If not I could add an additional
initialized flag for the cvq mr.

Thanks,
Dragos

> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> Author: Eli Cohen <elic@nvidia.com>
> Date:   Wed Jun 2 11:58:54 2021 +0300
> 
>     vdpa/mlx5: Add support for running with virtio_vdpa
> 
>     In order to support running vdpa using vritio_vdpa driver, we need  to
>     create a different kind of MR, one that has 1:1 mapping, since the
>     addresses referring to virtqueues are dma addresses.
> 
>     We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
>     supports the general capability umem_uid_0. The reason for that is that
>     1:1 MRs must be created with uid == 0 while virtqueue objects can be
>     created with uid == 0 only when the firmware capability is on.
> 
>     If the set_map() callback is called with new translations provided
>     through iotlb, the driver will destroy the 1:1 MR and create a regular
>     one.
> 
>     Signed-off-by: Eli Cohen <elic@nvidia.com>
>     Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
>     Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>     Acked-by: Jason Wang <jasowang@redhat.com>
> 
> 

> Thanks
> 
> 

> > +}
> > +
> > +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > +                                   struct vhost_iotlb *iotlb,
> > +                                   unsigned int asid)
> >  {
> >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >         int err;
> > 
> > -       if (mr->initialized)
> > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >                 return 0;
> > 
> > -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> > -               if (iotlb)
> > -                       err = create_user_mr(mvdev, iotlb);
> > -               else
> > -                       err = create_dma_mr(mvdev, mr);
> > +       if (mr->initialized)
> > +               return 0;
> > 
> > -               if (err)
> > -                       return err;
> > -       }
> > +       if (iotlb)
> > +               err = create_user_mr(mvdev, iotlb);
> > +       else
> > +               err = create_dma_mr(mvdev, mr);
> > 
> > -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> > -               err = dup_iotlb(mvdev, iotlb);
> > -               if (err)
> > -                       goto out_err;
> > -       }
> > +       if (err)
> > +               return err;
> > 
> >         mr->initialized = true;
> > +
> > +       return 0;
> > +}
> > +
> > +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > +                               struct vhost_iotlb *iotlb, unsigned int
> > asid)
> > +{
> > +       int err;
> > +
> > +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> > +       if (err)
> > +               return err;
> > +
> > +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> > +       if (err)
> > +               goto out_err;
> > +
> >         return 0;
> > 
> >  out_err:
> > -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> > -               if (iotlb)
> > -                       destroy_user_mr(mvdev, mr);
> > -               else
> > -                       destroy_dma_mr(mvdev, mr);
> > -       }
> > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > 
> >         return err;
> >  }
> > --
> > 2.41.0
> > 
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-03  8:03   ` Jason Wang
  2023-08-03 11:40     ` Dragos Tatulea
@ 2023-08-03 17:57     ` Si-Wei Liu
  2023-08-08  3:00       ` Jason Wang
  1 sibling, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-03 17:57 UTC (permalink / raw)
  To: Jason Wang, Dragos Tatulea
  Cc: Xuan Zhuo, Michael S. Tsirkin, Gal Pressman, linux-kernel,
	virtualization, Eugenio Pérez



On 8/3/2023 1:03 AM, Jason Wang wrote:
> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>> The mr->initialized flag is shared between the control vq and data vq
>> part of the mr init/uninit. But if the control vq and data vq get placed
>> in different ASIDs, it can happen that initializing the control vq will
>> prevent the data vq mr from being initialized.
>>
>> This patch consolidates the control and data vq init parts into their
>> own init functions. The mr->initialized will now be used for the data vq
>> only. The control vq currently doesn't need a flag.
>>
>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
>> split into data and control vq functions which are now also ASID aware.
>>
>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
>> Reviewed-by: Gal Pressman <gal@nvidia.com>
>> ---
>>   drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>>   drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
>>   2 files changed, 71 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> index 25fc4120b618..a0420be5059f 100644
>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
>>          struct list_head head;
>>          unsigned long num_directs;
>>          unsigned long num_klms;
>> +       /* state of dvq mr */
>>          bool initialized;
>>
>>          /* serialize mkey creation and destruction */
>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>> index 03e543229791..4ae14a248a4b 100644
>> --- a/drivers/vdpa/mlx5/core/mr.c
>> +++ b/drivers/vdpa/mlx5/core/mr.c
>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
>>          }
>>   }
>>
>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>> +{
>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>> +               return;
>> +
>> +       prune_iotlb(mvdev);
>> +}
>> +
>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>   {
>>          struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>
>> -       mutex_lock(&mr->mkey_mtx);
>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>> +               return;
>> +
>>          if (!mr->initialized)
>> -               goto out;
>> +               return;
>>
>> -       prune_iotlb(mvdev);
>>          if (mr->user_mr)
>>                  destroy_user_mr(mvdev, mr);
>>          else
>>                  destroy_dma_mr(mvdev, mr);
>>
>>          mr->initialized = false;
>> -out:
>> +}
>> +
>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>> +{
>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
>> +
>> +       mutex_lock(&mr->mkey_mtx);
>> +
>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
>> +
>>          mutex_unlock(&mr->mkey_mtx);
>>   }
>>
>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>> +{
>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
>> +}
>> +
>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
>> +                                   struct vhost_iotlb *iotlb,
>> +                                   unsigned int asid)
>> +{
>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>> +               return 0;
>> +
>> +       return dup_iotlb(mvdev, iotlb);
> This worries me as conceptually, there should be no difference between
> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
>
> One example is that, if we only do dup_iotlb() but not try to create
> dma mr here, we will break virtio-vdpa:
For this case, I guess we may need another way to support virtio-vdpa 
1:1 mapping rather than overloading virtio device reset semantics, see:

https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html

 > Conceptually, the address mapping is not a part of the abstraction for
 > a virtio device now. So resetting the memory mapping during virtio
 > device reset seems wrong.

where we want to keep memory mapping intact across virtio device reset 
for best live migration latency/downtime. I wonder would it work to 
reset the mapping in vhost-vdpa life cycle out of virtio reset, say 
introduce a .reset_map() op to restore 1:1 mapping within 
vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can 
move the iotlb reset logic to there without worry breaking virtio-vdpa.

Thanks,
-Siwei

>
> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> Author: Eli Cohen <elic@nvidia.com>
> Date:   Wed Jun 2 11:58:54 2021 +0300
>
>      vdpa/mlx5: Add support for running with virtio_vdpa
>
>      In order to support running vdpa using vritio_vdpa driver, we need  to
>      create a different kind of MR, one that has 1:1 mapping, since the
>      addresses referring to virtqueues are dma addresses.
>
>      We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
>      supports the general capability umem_uid_0. The reason for that is that
>      1:1 MRs must be created with uid == 0 while virtqueue objects can be
>      created with uid == 0 only when the firmware capability is on.
>
>      If the set_map() callback is called with new translations provided
>      through iotlb, the driver will destroy the 1:1 MR and create a regular
>      one.
>
>      Signed-off-by: Eli Cohen <elic@nvidia.com>
>      Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
>      Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>      Acked-by: Jason Wang <jasowang@redhat.com>
>
> Thanks
>
>
>> +}
>> +
>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
>> +                                   struct vhost_iotlb *iotlb,
>> +                                   unsigned int asid)
>>   {
>>          struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>          int err;
>>
>> -       if (mr->initialized)
>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>                  return 0;
>>
>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>> -               if (iotlb)
>> -                       err = create_user_mr(mvdev, iotlb);
>> -               else
>> -                       err = create_dma_mr(mvdev, mr);
>> +       if (mr->initialized)
>> +               return 0;
>>
>> -               if (err)
>> -                       return err;
>> -       }
>> +       if (iotlb)
>> +               err = create_user_mr(mvdev, iotlb);
>> +       else
>> +               err = create_dma_mr(mvdev, mr);
>>
>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
>> -               err = dup_iotlb(mvdev, iotlb);
>> -               if (err)
>> -                       goto out_err;
>> -       }
>> +       if (err)
>> +               return err;
>>
>>          mr->initialized = true;
>> +
>> +       return 0;
>> +}
>> +
>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
>> +{
>> +       int err;
>> +
>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
>> +       if (err)
>> +               return err;
>> +
>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
>> +       if (err)
>> +               goto out_err;
>> +
>>          return 0;
>>
>>   out_err:
>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>> -               if (iotlb)
>> -                       destroy_user_mr(mvdev, mr);
>> -               else
>> -                       destroy_dma_mr(mvdev, mr);
>> -       }
>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>
>>          return err;
>>   }
>> --
>> 2.41.0
>>
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-03 11:40     ` Dragos Tatulea
@ 2023-08-08  2:57       ` Jason Wang
  2023-08-08  7:24         ` Dragos Tatulea
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-08  2:57 UTC (permalink / raw)
  To: Dragos Tatulea
  Cc: virtualization, mst, eperezma, linux-kernel, Gal Pressman, xuanzhuo

On Thu, Aug 3, 2023 at 7:40 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> On Thu, 2023-08-03 at 16:03 +0800, Jason Wang wrote:
> > On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > >
> > > The mr->initialized flag is shared between the control vq and data vq
> > > part of the mr init/uninit. But if the control vq and data vq get placed
> > > in different ASIDs, it can happen that initializing the control vq will
> > > prevent the data vq mr from being initialized.
> > >
> > > This patch consolidates the control and data vq init parts into their
> > > own init functions. The mr->initialized will now be used for the data vq
> > > only. The control vq currently doesn't need a flag.
> > >
> > > The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> > > split into data and control vq functions which are now also ASID aware.
> > >
> > > Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for
> > > control and data")
> > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> > > Reviewed-by: Gal Pressman <gal@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> > >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> > >  2 files changed, 71 insertions(+), 27 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > index 25fc4120b618..a0420be5059f 100644
> > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> > >         struct list_head head;
> > >         unsigned long num_directs;
> > >         unsigned long num_klms;
> > > +       /* state of dvq mr */
> > >         bool initialized;
> > >
> > >         /* serialize mkey creation and destruction */
> > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> > > index 03e543229791..4ae14a248a4b 100644
> > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev
> > > *mvdev, struct mlx5_vdpa_mr *mr
> > >         }
> > >  }
> > >
> > > -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
> > > int asid)
> > > +{
> > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > +               return;
> > > +
> > > +       prune_iotlb(mvdev);
> > > +}
> > > +
> > > +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
> > > int asid)
> > >  {
> > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > >
> > > -       mutex_lock(&mr->mkey_mtx);
> > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > > +               return;
> > > +
> > >         if (!mr->initialized)
> > > -               goto out;
> > > +               return;
> > >
> > > -       prune_iotlb(mvdev);
> > >         if (mr->user_mr)
> > >                 destroy_user_mr(mvdev, mr);
> > >         else
> > >                 destroy_dma_mr(mvdev, mr);
> > >
> > >         mr->initialized = false;
> > > -out:
> > > +}
> > > +
> > > +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned
> > > int asid)
> > > +{
> > > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > +
> > > +       mutex_lock(&mr->mkey_mtx);
> > > +
> > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > > +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> > > +
> > >         mutex_unlock(&mr->mkey_mtx);
> > >  }
> > >
> > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > -                               struct vhost_iotlb *iotlb, unsigned int
> > > asid)
> > > +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > +{
> > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > >group2asid[MLX5_VDPA_CVQ_GROUP]);
> > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > >group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> > > +}
> > > +
> > > +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > +                                   struct vhost_iotlb *iotlb,
> > > +                                   unsigned int asid)
> > > +{
> > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > +               return 0;
> > > +
> > > +       return dup_iotlb(mvdev, iotlb);
> >
> > This worries me as conceptually, there should be no difference between
> > dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> >
> Are you worried by the changes in this patch or about the possibility of having
>
> The reason for this change is that I noticed if you create one mr in one asid
> you could be blocked out from creating another one in a different asid due to
> mr->initialized being true. To me that seemed problematic. Is it not?

My feeling is that mr.c should be device agnostic. It needs to know
nothing about the device details to work. But this patch seems to
break the layer.

>
> > One example is that, if we only do dup_iotlb() but not try to create
> > dma mr here, we will break virtio-vdpa:
> >
> How will that be possible? _mlx5_vdpa_create_mr calls _mlx5_vdpa_create_dvq_mr
> and _mlx5_vdpa_create_cvq_mr. The only thing that is different in this patch is
> that the cvq is not protected by an init flag. My understanding was that it
> would be ok to dup_iotlb again. Is it not? If not I could add an additional
> initialized flag for the cvq mr.

You are right here.

Thanks


>
> Thanks,
> Dragos
>
> > commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> > Author: Eli Cohen <elic@nvidia.com>
> > Date:   Wed Jun 2 11:58:54 2021 +0300
> >
> >     vdpa/mlx5: Add support for running with virtio_vdpa
> >
> >     In order to support running vdpa using vritio_vdpa driver, we need  to
> >     create a different kind of MR, one that has 1:1 mapping, since the
> >     addresses referring to virtqueues are dma addresses.
> >
> >     We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
> >     supports the general capability umem_uid_0. The reason for that is that
> >     1:1 MRs must be created with uid == 0 while virtqueue objects can be
> >     created with uid == 0 only when the firmware capability is on.
> >
> >     If the set_map() callback is called with new translations provided
> >     through iotlb, the driver will destroy the 1:1 MR and create a regular
> >     one.
> >
> >     Signed-off-by: Eli Cohen <elic@nvidia.com>
> >     Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
> >     Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >     Acked-by: Jason Wang <jasowang@redhat.com>
> >
> >
>
> > Thanks
> >
> >
>
> > > +}
> > > +
> > > +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > +                                   struct vhost_iotlb *iotlb,
> > > +                                   unsigned int asid)
> > >  {
> > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > >         int err;
> > >
> > > -       if (mr->initialized)
> > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > >                 return 0;
> > >
> > > -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> > > -               if (iotlb)
> > > -                       err = create_user_mr(mvdev, iotlb);
> > > -               else
> > > -                       err = create_dma_mr(mvdev, mr);
> > > +       if (mr->initialized)
> > > +               return 0;
> > >
> > > -               if (err)
> > > -                       return err;
> > > -       }
> > > +       if (iotlb)
> > > +               err = create_user_mr(mvdev, iotlb);
> > > +       else
> > > +               err = create_dma_mr(mvdev, mr);
> > >
> > > -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> > > -               err = dup_iotlb(mvdev, iotlb);
> > > -               if (err)
> > > -                       goto out_err;
> > > -       }
> > > +       if (err)
> > > +               return err;
> > >
> > >         mr->initialized = true;
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > +                               struct vhost_iotlb *iotlb, unsigned int
> > > asid)
> > > +{
> > > +       int err;
> > > +
> > > +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> > > +       if (err)
> > > +               return err;
> > > +
> > > +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> > > +       if (err)
> > > +               goto out_err;
> > > +
> > >         return 0;
> > >
> > >  out_err:
> > > -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> > > -               if (iotlb)
> > > -                       destroy_user_mr(mvdev, mr);
> > > -               else
> > > -                       destroy_dma_mr(mvdev, mr);
> > > -       }
> > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > >
> > >         return err;
> > >  }
> > > --
> > > 2.41.0
> > >
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-03 17:57     ` Si-Wei Liu
@ 2023-08-08  3:00       ` Jason Wang
  2023-08-08 22:58         ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-08  3:00 UTC (permalink / raw)
  To: Si-Wei Liu
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez

On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/3/2023 1:03 AM, Jason Wang wrote:
> > On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >> The mr->initialized flag is shared between the control vq and data vq
> >> part of the mr init/uninit. But if the control vq and data vq get placed
> >> in different ASIDs, it can happen that initializing the control vq will
> >> prevent the data vq mr from being initialized.
> >>
> >> This patch consolidates the control and data vq init parts into their
> >> own init functions. The mr->initialized will now be used for the data vq
> >> only. The control vq currently doesn't need a flag.
> >>
> >> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> >> split into data and control vq functions which are now also ASID aware.
> >>
> >> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
> >> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> >> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> >> Reviewed-by: Gal Pressman <gal@nvidia.com>
> >> ---
> >>   drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> >>   drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >>   2 files changed, 71 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >> index 25fc4120b618..a0420be5059f 100644
> >> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> >>          struct list_head head;
> >>          unsigned long num_directs;
> >>          unsigned long num_klms;
> >> +       /* state of dvq mr */
> >>          bool initialized;
> >>
> >>          /* serialize mkey creation and destruction */
> >> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> >> index 03e543229791..4ae14a248a4b 100644
> >> --- a/drivers/vdpa/mlx5/core/mr.c
> >> +++ b/drivers/vdpa/mlx5/core/mr.c
> >> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
> >>          }
> >>   }
> >>
> >> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >> +{
> >> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >> +               return;
> >> +
> >> +       prune_iotlb(mvdev);
> >> +}
> >> +
> >> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>   {
> >>          struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>
> >> -       mutex_lock(&mr->mkey_mtx);
> >> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >> +               return;
> >> +
> >>          if (!mr->initialized)
> >> -               goto out;
> >> +               return;
> >>
> >> -       prune_iotlb(mvdev);
> >>          if (mr->user_mr)
> >>                  destroy_user_mr(mvdev, mr);
> >>          else
> >>                  destroy_dma_mr(mvdev, mr);
> >>
> >>          mr->initialized = false;
> >> -out:
> >> +}
> >> +
> >> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >> +{
> >> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >> +
> >> +       mutex_lock(&mr->mkey_mtx);
> >> +
> >> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> >> +
> >>          mutex_unlock(&mr->mkey_mtx);
> >>   }
> >>
> >> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >> -                               struct vhost_iotlb *iotlb, unsigned int asid)
> >> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >> +{
> >> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
> >> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> >> +}
> >> +
> >> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> >> +                                   struct vhost_iotlb *iotlb,
> >> +                                   unsigned int asid)
> >> +{
> >> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >> +               return 0;
> >> +
> >> +       return dup_iotlb(mvdev, iotlb);
> > This worries me as conceptually, there should be no difference between
> > dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> >
> > One example is that, if we only do dup_iotlb() but not try to create
> > dma mr here, we will break virtio-vdpa:
> For this case, I guess we may need another way to support virtio-vdpa
> 1:1 mapping rather than overloading virtio device reset semantics, see:
>
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
>
>  > Conceptually, the address mapping is not a part of the abstraction for
>  > a virtio device now. So resetting the memory mapping during virtio
>  > device reset seems wrong.
>
> where we want to keep memory mapping intact across virtio device reset
> for best live migration latency/downtime. I wonder would it work to
> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
> introduce a .reset_map() op to restore 1:1 mapping within
> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
> move the iotlb reset logic to there without worry breaking virtio-vdpa.

It looks to me we don't need a new ops. We can simply do set_map()
twice or do you mean it would be faster?

Thanks

>
> Thanks,
> -Siwei
>
> >
> > commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> > Author: Eli Cohen <elic@nvidia.com>
> > Date:   Wed Jun 2 11:58:54 2021 +0300
> >
> >      vdpa/mlx5: Add support for running with virtio_vdpa
> >
> >      In order to support running vdpa using vritio_vdpa driver, we need  to
> >      create a different kind of MR, one that has 1:1 mapping, since the
> >      addresses referring to virtqueues are dma addresses.
> >
> >      We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
> >      supports the general capability umem_uid_0. The reason for that is that
> >      1:1 MRs must be created with uid == 0 while virtqueue objects can be
> >      created with uid == 0 only when the firmware capability is on.
> >
> >      If the set_map() callback is called with new translations provided
> >      through iotlb, the driver will destroy the 1:1 MR and create a regular
> >      one.
> >
> >      Signed-off-by: Eli Cohen <elic@nvidia.com>
> >      Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
> >      Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >      Acked-by: Jason Wang <jasowang@redhat.com>
> >
> > Thanks
> >
> >
> >> +}
> >> +
> >> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> >> +                                   struct vhost_iotlb *iotlb,
> >> +                                   unsigned int asid)
> >>   {
> >>          struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>          int err;
> >>
> >> -       if (mr->initialized)
> >> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>                  return 0;
> >>
> >> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >> -               if (iotlb)
> >> -                       err = create_user_mr(mvdev, iotlb);
> >> -               else
> >> -                       err = create_dma_mr(mvdev, mr);
> >> +       if (mr->initialized)
> >> +               return 0;
> >>
> >> -               if (err)
> >> -                       return err;
> >> -       }
> >> +       if (iotlb)
> >> +               err = create_user_mr(mvdev, iotlb);
> >> +       else
> >> +               err = create_dma_mr(mvdev, mr);
> >>
> >> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> >> -               err = dup_iotlb(mvdev, iotlb);
> >> -               if (err)
> >> -                       goto out_err;
> >> -       }
> >> +       if (err)
> >> +               return err;
> >>
> >>          mr->initialized = true;
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >> +                               struct vhost_iotlb *iotlb, unsigned int asid)
> >> +{
> >> +       int err;
> >> +
> >> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> >> +       if (err)
> >> +               return err;
> >> +
> >> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> >> +       if (err)
> >> +               goto out_err;
> >> +
> >>          return 0;
> >>
> >>   out_err:
> >> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >> -               if (iotlb)
> >> -                       destroy_user_mr(mvdev, mr);
> >> -               else
> >> -                       destroy_dma_mr(mvdev, mr);
> >> -       }
> >> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>
> >>          return err;
> >>   }
> >> --
> >> 2.41.0
> >>
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-08  2:57       ` Jason Wang
@ 2023-08-08  7:24         ` Dragos Tatulea
  2023-08-09  1:42           ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-08  7:24 UTC (permalink / raw)
  To: jasowang
  Cc: virtualization, mst, eperezma, linux-kernel, Gal Pressman, xuanzhuo

On Tue, 2023-08-08 at 10:57 +0800, Jason Wang wrote:
> On Thu, Aug 3, 2023 at 7:40 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > 
> > On Thu, 2023-08-03 at 16:03 +0800, Jason Wang wrote:
> > > On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > 
> > > > The mr->initialized flag is shared between the control vq and data vq
> > > > part of the mr init/uninit. But if the control vq and data vq get placed
> > > > in different ASIDs, it can happen that initializing the control vq will
> > > > prevent the data vq mr from being initialized.
> > > > 
> > > > This patch consolidates the control and data vq init parts into their
> > > > own init functions. The mr->initialized will now be used for the data vq
> > > > only. The control vq currently doesn't need a flag.
> > > > 
> > > > The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> > > > split into data and control vq functions which are now also ASID aware.
> > > > 
> > > > Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for
> > > > control and data")
> > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> > > > Reviewed-by: Gal Pressman <gal@nvidia.com>
> > > > ---
> > > >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> > > >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> > > >  2 files changed, 71 insertions(+), 27 deletions(-)
> > > > 
> > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > index 25fc4120b618..a0420be5059f 100644
> > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> > > >         struct list_head head;
> > > >         unsigned long num_directs;
> > > >         unsigned long num_klms;
> > > > +       /* state of dvq mr */
> > > >         bool initialized;
> > > > 
> > > >         /* serialize mkey creation and destruction */
> > > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> > > > index 03e543229791..4ae14a248a4b 100644
> > > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > > @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev
> > > > *mvdev, struct mlx5_vdpa_mr *mr
> > > >         }
> > > >  }
> > > > 
> > > > -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > unsigned
> > > > int asid)
> > > > +{
> > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > +               return;
> > > > +
> > > > +       prune_iotlb(mvdev);
> > > > +}
> > > > +
> > > > +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > unsigned
> > > > int asid)
> > > >  {
> > > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > 
> > > > -       mutex_lock(&mr->mkey_mtx);
> > > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > > > +               return;
> > > > +
> > > >         if (!mr->initialized)
> > > > -               goto out;
> > > > +               return;
> > > > 
> > > > -       prune_iotlb(mvdev);
> > > >         if (mr->user_mr)
> > > >                 destroy_user_mr(mvdev, mr);
> > > >         else
> > > >                 destroy_dma_mr(mvdev, mr);
> > > > 
> > > >         mr->initialized = false;
> > > > -out:
> > > > +}
> > > > +
> > > > +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev,
> > > > unsigned
> > > > int asid)
> > > > +{
> > > > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > +
> > > > +       mutex_lock(&mr->mkey_mtx);
> > > > +
> > > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > > > +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> > > > +
> > > >         mutex_unlock(&mr->mkey_mtx);
> > > >  }
> > > > 
> > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > > -                               struct vhost_iotlb *iotlb, unsigned int
> > > > asid)
> > > > +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > +{
> > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > group2asid[MLX5_VDPA_CVQ_GROUP]);
> > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> > > > +}
> > > > +
> > > > +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > +                                   struct vhost_iotlb *iotlb,
> > > > +                                   unsigned int asid)
> > > > +{
> > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > +               return 0;
> > > > +
> > > > +       return dup_iotlb(mvdev, iotlb);
> > > 
> > > This worries me as conceptually, there should be no difference between
> > > dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> > > 
> > Are you worried by the changes in this patch or about the possibility of
> > having
> > 
> > The reason for this change is that I noticed if you create one mr in one
> > asid
> > you could be blocked out from creating another one in a different asid due
> > to
> > mr->initialized being true. To me that seemed problematic. Is it not?
> 
> My feeling is that mr.c should be device agnostic. It needs to know
> nothing about the device details to work. But this patch seems to
> break the layer.
> 
But the same logic was there before (with the exception of cvq not having an
init flag anymore). So what am I missing here?

> > 
> > > One example is that, if we only do dup_iotlb() but not try to create
> > > dma mr here, we will break virtio-vdpa:
> > > 
> > How will that be possible? _mlx5_vdpa_create_mr calls
> > _mlx5_vdpa_create_dvq_mr
> > and _mlx5_vdpa_create_cvq_mr. The only thing that is different in this patch
> > is
> > that the cvq is not protected by an init flag. My understanding was that it
> > would be ok to dup_iotlb again. Is it not? If not I could add an additional
> > initialized flag for the cvq mr.
> 
> You are right here.
> 
> Thanks
> 
> 
> > 
> > Thanks,
> > Dragos
> > 
> > > commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> > > Author: Eli Cohen <elic@nvidia.com>
> > > Date:   Wed Jun 2 11:58:54 2021 +0300
> > > 
> > >     vdpa/mlx5: Add support for running with virtio_vdpa
> > > 
> > >     In order to support running vdpa using vritio_vdpa driver, we need  to
> > >     create a different kind of MR, one that has 1:1 mapping, since the
> > >     addresses referring to virtqueues are dma addresses.
> > > 
> > >     We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
> > >     supports the general capability umem_uid_0. The reason for that is
> > > that
> > >     1:1 MRs must be created with uid == 0 while virtqueue objects can be
> > >     created with uid == 0 only when the firmware capability is on.
> > > 
> > >     If the set_map() callback is called with new translations provided
> > >     through iotlb, the driver will destroy the 1:1 MR and create a regular
> > >     one.
> > > 
> > >     Signed-off-by: Eli Cohen <elic@nvidia.com>
> > >     Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
> > >     Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > >     Acked-by: Jason Wang <jasowang@redhat.com>
> > > 
> > > 
> > 
> > > Thanks
> > > 
> > > 
> > 
> > > > +}
> > > > +
> > > > +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > +                                   struct vhost_iotlb *iotlb,
> > > > +                                   unsigned int asid)
> > > >  {
> > > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > >         int err;
> > > > 
> > > > -       if (mr->initialized)
> > > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > > >                 return 0;
> > > > 
> > > > -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> > > > -               if (iotlb)
> > > > -                       err = create_user_mr(mvdev, iotlb);
> > > > -               else
> > > > -                       err = create_dma_mr(mvdev, mr);
> > > > +       if (mr->initialized)
> > > > +               return 0;
> > > > 
> > > > -               if (err)
> > > > -                       return err;
> > > > -       }
> > > > +       if (iotlb)
> > > > +               err = create_user_mr(mvdev, iotlb);
> > > > +       else
> > > > +               err = create_dma_mr(mvdev, mr);
> > > > 
> > > > -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> > > > -               err = dup_iotlb(mvdev, iotlb);
> > > > -               if (err)
> > > > -                       goto out_err;
> > > > -       }
> > > > +       if (err)
> > > > +               return err;
> > > > 
> > > >         mr->initialized = true;
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > > +                               struct vhost_iotlb *iotlb, unsigned int
> > > > asid)
> > > > +{
> > > > +       int err;
> > > > +
> > > > +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> > > > +       if (err)
> > > > +               return err;
> > > > +
> > > > +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> > > > +       if (err)
> > > > +               goto out_err;
> > > > +
> > > >         return 0;
> > > > 
> > > >  out_err:
> > > > -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> > > > -               if (iotlb)
> > > > -                       destroy_user_mr(mvdev, mr);
> > > > -               else
> > > > -                       destroy_dma_mr(mvdev, mr);
> > > > -       }
> > > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > > > 
> > > >         return err;
> > > >  }
> > > > --
> > > > 2.41.0
> > > > 
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-08  3:00       ` Jason Wang
@ 2023-08-08 22:58         ` Si-Wei Liu
  2023-08-09  6:52           ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-08 22:58 UTC (permalink / raw)
  To: Jason Wang
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez



On 8/7/2023 8:00 PM, Jason Wang wrote:
> On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/3/2023 1:03 AM, Jason Wang wrote:
>>> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>> The mr->initialized flag is shared between the control vq and data vq
>>>> part of the mr init/uninit. But if the control vq and data vq get placed
>>>> in different ASIDs, it can happen that initializing the control vq will
>>>> prevent the data vq mr from being initialized.
>>>>
>>>> This patch consolidates the control and data vq init parts into their
>>>> own init functions. The mr->initialized will now be used for the data vq
>>>> only. The control vq currently doesn't need a flag.
>>>>
>>>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
>>>> split into data and control vq functions which are now also ASID aware.
>>>>
>>>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
>>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
>>>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
>>>> Reviewed-by: Gal Pressman <gal@nvidia.com>
>>>> ---
>>>>    drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>>>>    drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
>>>>    2 files changed, 71 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>> index 25fc4120b618..a0420be5059f 100644
>>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
>>>>           struct list_head head;
>>>>           unsigned long num_directs;
>>>>           unsigned long num_klms;
>>>> +       /* state of dvq mr */
>>>>           bool initialized;
>>>>
>>>>           /* serialize mkey creation and destruction */
>>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>>>> index 03e543229791..4ae14a248a4b 100644
>>>> --- a/drivers/vdpa/mlx5/core/mr.c
>>>> +++ b/drivers/vdpa/mlx5/core/mr.c
>>>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
>>>>           }
>>>>    }
>>>>
>>>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>> +{
>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>>>> +               return;
>>>> +
>>>> +       prune_iotlb(mvdev);
>>>> +}
>>>> +
>>>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>    {
>>>>           struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>
>>>> -       mutex_lock(&mr->mkey_mtx);
>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>>> +               return;
>>>> +
>>>>           if (!mr->initialized)
>>>> -               goto out;
>>>> +               return;
>>>>
>>>> -       prune_iotlb(mvdev);
>>>>           if (mr->user_mr)
>>>>                   destroy_user_mr(mvdev, mr);
>>>>           else
>>>>                   destroy_dma_mr(mvdev, mr);
>>>>
>>>>           mr->initialized = false;
>>>> -out:
>>>> +}
>>>> +
>>>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>> +{
>>>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>> +
>>>> +       mutex_lock(&mr->mkey_mtx);
>>>> +
>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
>>>> +
>>>>           mutex_unlock(&mr->mkey_mtx);
>>>>    }
>>>>
>>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>>>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
>>>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>>> +{
>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
>>>> +}
>>>> +
>>>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
>>>> +                                   struct vhost_iotlb *iotlb,
>>>> +                                   unsigned int asid)
>>>> +{
>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>>>> +               return 0;
>>>> +
>>>> +       return dup_iotlb(mvdev, iotlb);
>>> This worries me as conceptually, there should be no difference between
>>> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
>>>
>>> One example is that, if we only do dup_iotlb() but not try to create
>>> dma mr here, we will break virtio-vdpa:
>> For this case, I guess we may need another way to support virtio-vdpa
>> 1:1 mapping rather than overloading virtio device reset semantics, see:
>>
>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
>>
>>   > Conceptually, the address mapping is not a part of the abstraction for
>>   > a virtio device now. So resetting the memory mapping during virtio
>>   > device reset seems wrong.
>>
>> where we want to keep memory mapping intact across virtio device reset
>> for best live migration latency/downtime. I wonder would it work to
>> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
>> introduce a .reset_map() op to restore 1:1 mapping within
>> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
>> move the iotlb reset logic to there without worry breaking virtio-vdpa.
> It looks to me we don't need a new ops. We can simply do set_map()
> twice
What does it mean, first set_map(0, -1ULL) with zero iotlb entry passed 
in to destroy all iotlb mappings previously added, and second set_map(0, 
-1ULL) to restore 1:1 DMA MR? But userspace (maybe a buggy one but 
doesn't do harm) apart from vhost-vdpa itself can do unmap twice anyway, 
this is supported today I think. Why there'll be such obscure 
distinction, or what's the benefit to treat second .set_map() as 
recreating 1:1 mapping?

>   or do you mean it would be faster?
I think with .reset_map() we at least can avoid indefinite latency 
hiccup from destroying and recreating 1:1 mapping with the unwarranted 
2rd unmap call. And .reset_map() should work with both .dma_map() and 
.set_map() APIs with clear semantics.

Regards,
-Siwei
>
> Thanks
>
>> Thanks,
>> -Siwei
>>
>>> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
>>> Author: Eli Cohen <elic@nvidia.com>
>>> Date:   Wed Jun 2 11:58:54 2021 +0300
>>>
>>>       vdpa/mlx5: Add support for running with virtio_vdpa
>>>
>>>       In order to support running vdpa using vritio_vdpa driver, we need  to
>>>       create a different kind of MR, one that has 1:1 mapping, since the
>>>       addresses referring to virtqueues are dma addresses.
>>>
>>>       We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
>>>       supports the general capability umem_uid_0. The reason for that is that
>>>       1:1 MRs must be created with uid == 0 while virtqueue objects can be
>>>       created with uid == 0 only when the firmware capability is on.
>>>
>>>       If the set_map() callback is called with new translations provided
>>>       through iotlb, the driver will destroy the 1:1 MR and create a regular
>>>       one.
>>>
>>>       Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>       Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
>>>       Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>       Acked-by: Jason Wang <jasowang@redhat.com>
>>>
>>> Thanks
>>>
>>>
>>>> +}
>>>> +
>>>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
>>>> +                                   struct vhost_iotlb *iotlb,
>>>> +                                   unsigned int asid)
>>>>    {
>>>>           struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>           int err;
>>>>
>>>> -       if (mr->initialized)
>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>>>                   return 0;
>>>>
>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>>>> -               if (iotlb)
>>>> -                       err = create_user_mr(mvdev, iotlb);
>>>> -               else
>>>> -                       err = create_dma_mr(mvdev, mr);
>>>> +       if (mr->initialized)
>>>> +               return 0;
>>>>
>>>> -               if (err)
>>>> -                       return err;
>>>> -       }
>>>> +       if (iotlb)
>>>> +               err = create_user_mr(mvdev, iotlb);
>>>> +       else
>>>> +               err = create_dma_mr(mvdev, mr);
>>>>
>>>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
>>>> -               err = dup_iotlb(mvdev, iotlb);
>>>> -               if (err)
>>>> -                       goto out_err;
>>>> -       }
>>>> +       if (err)
>>>> +               return err;
>>>>
>>>>           mr->initialized = true;
>>>> +
>>>> +       return 0;
>>>> +}
>>>> +
>>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>>>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
>>>> +{
>>>> +       int err;
>>>> +
>>>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
>>>> +       if (err)
>>>> +               return err;
>>>> +
>>>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
>>>> +       if (err)
>>>> +               goto out_err;
>>>> +
>>>>           return 0;
>>>>
>>>>    out_err:
>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>>>> -               if (iotlb)
>>>> -                       destroy_user_mr(mvdev, mr);
>>>> -               else
>>>> -                       destroy_dma_mr(mvdev, mr);
>>>> -       }
>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>>>
>>>>           return err;
>>>>    }
>>>> --
>>>> 2.41.0
>>>>
>>> _______________________________________________
>>> Virtualization mailing list
>>> Virtualization@lists.linux-foundation.org
>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-08  7:24         ` Dragos Tatulea
@ 2023-08-09  1:42           ` Jason Wang
  2023-08-14 14:15             ` Dragos Tatulea
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-09  1:42 UTC (permalink / raw)
  To: Dragos Tatulea
  Cc: virtualization, mst, eperezma, linux-kernel, Gal Pressman, xuanzhuo

On Tue, Aug 8, 2023 at 3:24 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> On Tue, 2023-08-08 at 10:57 +0800, Jason Wang wrote:
> > On Thu, Aug 3, 2023 at 7:40 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > >
> > > On Thu, 2023-08-03 at 16:03 +0800, Jason Wang wrote:
> > > > On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > >
> > > > > The mr->initialized flag is shared between the control vq and data vq
> > > > > part of the mr init/uninit. But if the control vq and data vq get placed
> > > > > in different ASIDs, it can happen that initializing the control vq will
> > > > > prevent the data vq mr from being initialized.
> > > > >
> > > > > This patch consolidates the control and data vq init parts into their
> > > > > own init functions. The mr->initialized will now be used for the data vq
> > > > > only. The control vq currently doesn't need a flag.
> > > > >
> > > > > The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> > > > > split into data and control vq functions which are now also ASID aware.
> > > > >
> > > > > Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for
> > > > > control and data")
> > > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > > Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > Reviewed-by: Gal Pressman <gal@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> > > > >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> > > > >  2 files changed, 71 insertions(+), 27 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > index 25fc4120b618..a0420be5059f 100644
> > > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> > > > >         struct list_head head;
> > > > >         unsigned long num_directs;
> > > > >         unsigned long num_klms;
> > > > > +       /* state of dvq mr */
> > > > >         bool initialized;
> > > > >
> > > > >         /* serialize mkey creation and destruction */
> > > > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> > > > > index 03e543229791..4ae14a248a4b 100644
> > > > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > > > @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev
> > > > > *mvdev, struct mlx5_vdpa_mr *mr
> > > > >         }
> > > > >  }
> > > > >
> > > > > -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > unsigned
> > > > > int asid)
> > > > > +{
> > > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > > +               return;
> > > > > +
> > > > > +       prune_iotlb(mvdev);
> > > > > +}
> > > > > +
> > > > > +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > unsigned
> > > > > int asid)
> > > > >  {
> > > > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > >
> > > > > -       mutex_lock(&mr->mkey_mtx);
> > > > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > > > > +               return;
> > > > > +
> > > > >         if (!mr->initialized)
> > > > > -               goto out;
> > > > > +               return;
> > > > >
> > > > > -       prune_iotlb(mvdev);
> > > > >         if (mr->user_mr)
> > > > >                 destroy_user_mr(mvdev, mr);
> > > > >         else
> > > > >                 destroy_dma_mr(mvdev, mr);
> > > > >
> > > > >         mr->initialized = false;
> > > > > -out:
> > > > > +}
> > > > > +
> > > > > +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev,
> > > > > unsigned
> > > > > int asid)
> > > > > +{
> > > > > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > +
> > > > > +       mutex_lock(&mr->mkey_mtx);
> > > > > +
> > > > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > > > > +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> > > > > +
> > > > >         mutex_unlock(&mr->mkey_mtx);
> > > > >  }
> > > > >
> > > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > -                               struct vhost_iotlb *iotlb, unsigned int
> > > > > asid)
> > > > > +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > > group2asid[MLX5_VDPA_CVQ_GROUP]);
> > > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > > group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> > > > > +}
> > > > > +
> > > > > +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > +                                   struct vhost_iotlb *iotlb,
> > > > > +                                   unsigned int asid)
> > > > > +{
> > > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > > +               return 0;
> > > > > +
> > > > > +       return dup_iotlb(mvdev, iotlb);
> > > >
> > > > This worries me as conceptually, there should be no difference between
> > > > dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> > > >
> > > Are you worried by the changes in this patch or about the possibility of
> > > having
> > >
> > > The reason for this change is that I noticed if you create one mr in one
> > > asid
> > > you could be blocked out from creating another one in a different asid due
> > > to
> > > mr->initialized being true. To me that seemed problematic. Is it not?
> >
> > My feeling is that mr.c should be device agnostic. It needs to know
> > nothing about the device details to work. But this patch seems to
> > break the layer.
> >
> But the same logic was there before (with the exception of cvq not having an
> init flag anymore). So what am I missing here?

Nothing, I think you're right.

I think we can have this patch go first and tweak on top by moving CVQ
aware logic into the net specific codes.

Thanks


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-08 22:58         ` Si-Wei Liu
@ 2023-08-09  6:52           ` Jason Wang
  2023-08-10  0:40             ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-09  6:52 UTC (permalink / raw)
  To: Si-Wei Liu
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez

On Wed, Aug 9, 2023 at 6:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/7/2023 8:00 PM, Jason Wang wrote:
> > On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 8/3/2023 1:03 AM, Jason Wang wrote:
> >>> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >>>> The mr->initialized flag is shared between the control vq and data vq
> >>>> part of the mr init/uninit. But if the control vq and data vq get placed
> >>>> in different ASIDs, it can happen that initializing the control vq will
> >>>> prevent the data vq mr from being initialized.
> >>>>
> >>>> This patch consolidates the control and data vq init parts into their
> >>>> own init functions. The mr->initialized will now be used for the data vq
> >>>> only. The control vq currently doesn't need a flag.
> >>>>
> >>>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> >>>> split into data and control vq functions which are now also ASID aware.
> >>>>
> >>>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
> >>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> >>>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> >>>> Reviewed-by: Gal Pressman <gal@nvidia.com>
> >>>> ---
> >>>>    drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> >>>>    drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >>>>    2 files changed, 71 insertions(+), 27 deletions(-)
> >>>>
> >>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>> index 25fc4120b618..a0420be5059f 100644
> >>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> >>>>           struct list_head head;
> >>>>           unsigned long num_directs;
> >>>>           unsigned long num_klms;
> >>>> +       /* state of dvq mr */
> >>>>           bool initialized;
> >>>>
> >>>>           /* serialize mkey creation and destruction */
> >>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> >>>> index 03e543229791..4ae14a248a4b 100644
> >>>> --- a/drivers/vdpa/mlx5/core/mr.c
> >>>> +++ b/drivers/vdpa/mlx5/core/mr.c
> >>>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
> >>>>           }
> >>>>    }
> >>>>
> >>>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >>>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>> +{
> >>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >>>> +               return;
> >>>> +
> >>>> +       prune_iotlb(mvdev);
> >>>> +}
> >>>> +
> >>>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>    {
> >>>>           struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>
> >>>> -       mutex_lock(&mr->mkey_mtx);
> >>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>>> +               return;
> >>>> +
> >>>>           if (!mr->initialized)
> >>>> -               goto out;
> >>>> +               return;
> >>>>
> >>>> -       prune_iotlb(mvdev);
> >>>>           if (mr->user_mr)
> >>>>                   destroy_user_mr(mvdev, mr);
> >>>>           else
> >>>>                   destroy_dma_mr(mvdev, mr);
> >>>>
> >>>>           mr->initialized = false;
> >>>> -out:
> >>>> +}
> >>>> +
> >>>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>> +{
> >>>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>> +
> >>>> +       mutex_lock(&mr->mkey_mtx);
> >>>> +
> >>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> >>>> +
> >>>>           mutex_unlock(&mr->mkey_mtx);
> >>>>    }
> >>>>
> >>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >>>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
> >>>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >>>> +{
> >>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
> >>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> >>>> +}
> >>>> +
> >>>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> >>>> +                                   struct vhost_iotlb *iotlb,
> >>>> +                                   unsigned int asid)
> >>>> +{
> >>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >>>> +               return 0;
> >>>> +
> >>>> +       return dup_iotlb(mvdev, iotlb);
> >>> This worries me as conceptually, there should be no difference between
> >>> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> >>>
> >>> One example is that, if we only do dup_iotlb() but not try to create
> >>> dma mr here, we will break virtio-vdpa:
> >> For this case, I guess we may need another way to support virtio-vdpa
> >> 1:1 mapping rather than overloading virtio device reset semantics, see:
> >>
> >> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
> >>
> >>   > Conceptually, the address mapping is not a part of the abstraction for
> >>   > a virtio device now. So resetting the memory mapping during virtio
> >>   > device reset seems wrong.
> >>
> >> where we want to keep memory mapping intact across virtio device reset
> >> for best live migration latency/downtime. I wonder would it work to
> >> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
> >> introduce a .reset_map() op to restore 1:1 mapping within
> >> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
> >> move the iotlb reset logic to there without worry breaking virtio-vdpa.
> > It looks to me we don't need a new ops. We can simply do set_map()
> > twice
> What does it mean, first set_map(0, -1ULL) with zero iotlb entry passed
> in to destroy all iotlb mappings previously added, and second set_map(0,
> -1ULL) to restore 1:1 DMA MR? But userspace (maybe a buggy one but
> doesn't do harm) apart from vhost-vdpa itself can do unmap twice anyway,
> this is supported today I think. Why there'll be such obscure
> distinction, or what's the benefit to treat second .set_map() as
> recreating 1:1 mapping?

Ok, I think I miss some context. I agree that it's better to decouple
memory mappings from the virtio reset. It helps to reduce the
unnecessary memory transactions. It might require a new feature flag.

Regarding the method of restoring to 1:1 DMA MR, it might be dangerous
for (buggy) vhost-vDPA devices. Since its userspace doesn't set up any
mapping it can explore the kernel with that via CVQ?

Thanks

>
> >   or do you mean it would be faster?
> I think with .reset_map() we at least can avoid indefinite latency
> hiccup from destroying and recreating 1:1 mapping with the unwarranted
> 2rd unmap call. And .reset_map() should work with both .dma_map() and
> .set_map() APIs with clear semantics.
>
> Regards,
> -Siwei
> >
> > Thanks
> >
> >> Thanks,
> >> -Siwei
> >>
> >>> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> >>> Author: Eli Cohen <elic@nvidia.com>
> >>> Date:   Wed Jun 2 11:58:54 2021 +0300
> >>>
> >>>       vdpa/mlx5: Add support for running with virtio_vdpa
> >>>
> >>>       In order to support running vdpa using vritio_vdpa driver, we need  to
> >>>       create a different kind of MR, one that has 1:1 mapping, since the
> >>>       addresses referring to virtqueues are dma addresses.
> >>>
> >>>       We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
> >>>       supports the general capability umem_uid_0. The reason for that is that
> >>>       1:1 MRs must be created with uid == 0 while virtqueue objects can be
> >>>       created with uid == 0 only when the firmware capability is on.
> >>>
> >>>       If the set_map() callback is called with new translations provided
> >>>       through iotlb, the driver will destroy the 1:1 MR and create a regular
> >>>       one.
> >>>
> >>>       Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>       Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
> >>>       Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >>>       Acked-by: Jason Wang <jasowang@redhat.com>
> >>>
> >>> Thanks
> >>>
> >>>
> >>>> +}
> >>>> +
> >>>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> >>>> +                                   struct vhost_iotlb *iotlb,
> >>>> +                                   unsigned int asid)
> >>>>    {
> >>>>           struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>           int err;
> >>>>
> >>>> -       if (mr->initialized)
> >>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>>>                   return 0;
> >>>>
> >>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >>>> -               if (iotlb)
> >>>> -                       err = create_user_mr(mvdev, iotlb);
> >>>> -               else
> >>>> -                       err = create_dma_mr(mvdev, mr);
> >>>> +       if (mr->initialized)
> >>>> +               return 0;
> >>>>
> >>>> -               if (err)
> >>>> -                       return err;
> >>>> -       }
> >>>> +       if (iotlb)
> >>>> +               err = create_user_mr(mvdev, iotlb);
> >>>> +       else
> >>>> +               err = create_dma_mr(mvdev, mr);
> >>>>
> >>>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> >>>> -               err = dup_iotlb(mvdev, iotlb);
> >>>> -               if (err)
> >>>> -                       goto out_err;
> >>>> -       }
> >>>> +       if (err)
> >>>> +               return err;
> >>>>
> >>>>           mr->initialized = true;
> >>>> +
> >>>> +       return 0;
> >>>> +}
> >>>> +
> >>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >>>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
> >>>> +{
> >>>> +       int err;
> >>>> +
> >>>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> >>>> +       if (err)
> >>>> +               return err;
> >>>> +
> >>>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> >>>> +       if (err)
> >>>> +               goto out_err;
> >>>> +
> >>>>           return 0;
> >>>>
> >>>>    out_err:
> >>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >>>> -               if (iotlb)
> >>>> -                       destroy_user_mr(mvdev, mr);
> >>>> -               else
> >>>> -                       destroy_dma_mr(mvdev, mr);
> >>>> -       }
> >>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>>>
> >>>>           return err;
> >>>>    }
> >>>> --
> >>>> 2.41.0
> >>>>
> >>> _______________________________________________
> >>> Virtualization mailing list
> >>> Virtualization@lists.linux-foundation.org
> >>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-09  6:52           ` Jason Wang
@ 2023-08-10  0:40             ` Si-Wei Liu
  2023-08-10  3:10               ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-10  0:40 UTC (permalink / raw)
  To: Jason Wang
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez



On 8/8/2023 11:52 PM, Jason Wang wrote:
> On Wed, Aug 9, 2023 at 6:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/7/2023 8:00 PM, Jason Wang wrote:
>>> On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>
>>>> On 8/3/2023 1:03 AM, Jason Wang wrote:
>>>>> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>>>> The mr->initialized flag is shared between the control vq and data vq
>>>>>> part of the mr init/uninit. But if the control vq and data vq get placed
>>>>>> in different ASIDs, it can happen that initializing the control vq will
>>>>>> prevent the data vq mr from being initialized.
>>>>>>
>>>>>> This patch consolidates the control and data vq init parts into their
>>>>>> own init functions. The mr->initialized will now be used for the data vq
>>>>>> only. The control vq currently doesn't need a flag.
>>>>>>
>>>>>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
>>>>>> split into data and control vq functions which are now also ASID aware.
>>>>>>
>>>>>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
>>>>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
>>>>>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
>>>>>> Reviewed-by: Gal Pressman <gal@nvidia.com>
>>>>>> ---
>>>>>>     drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>>>>>>     drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
>>>>>>     2 files changed, 71 insertions(+), 27 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>>>> index 25fc4120b618..a0420be5059f 100644
>>>>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>>>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
>>>>>>            struct list_head head;
>>>>>>            unsigned long num_directs;
>>>>>>            unsigned long num_klms;
>>>>>> +       /* state of dvq mr */
>>>>>>            bool initialized;
>>>>>>
>>>>>>            /* serialize mkey creation and destruction */
>>>>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>>>>>> index 03e543229791..4ae14a248a4b 100644
>>>>>> --- a/drivers/vdpa/mlx5/core/mr.c
>>>>>> +++ b/drivers/vdpa/mlx5/core/mr.c
>>>>>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
>>>>>>            }
>>>>>>     }
>>>>>>
>>>>>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>>>>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>>> +{
>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>>>>>> +               return;
>>>>>> +
>>>>>> +       prune_iotlb(mvdev);
>>>>>> +}
>>>>>> +
>>>>>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>>>     {
>>>>>>            struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>>>
>>>>>> -       mutex_lock(&mr->mkey_mtx);
>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>>>>> +               return;
>>>>>> +
>>>>>>            if (!mr->initialized)
>>>>>> -               goto out;
>>>>>> +               return;
>>>>>>
>>>>>> -       prune_iotlb(mvdev);
>>>>>>            if (mr->user_mr)
>>>>>>                    destroy_user_mr(mvdev, mr);
>>>>>>            else
>>>>>>                    destroy_dma_mr(mvdev, mr);
>>>>>>
>>>>>>            mr->initialized = false;
>>>>>> -out:
>>>>>> +}
>>>>>> +
>>>>>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>>> +{
>>>>>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>>> +
>>>>>> +       mutex_lock(&mr->mkey_mtx);
>>>>>> +
>>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>>>>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
>>>>>> +
>>>>>>            mutex_unlock(&mr->mkey_mtx);
>>>>>>     }
>>>>>>
>>>>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
>>>>>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>>>>> +{
>>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
>>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
>>>>>> +}
>>>>>> +
>>>>>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>> +                                   struct vhost_iotlb *iotlb,
>>>>>> +                                   unsigned int asid)
>>>>>> +{
>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>>>>>> +               return 0;
>>>>>> +
>>>>>> +       return dup_iotlb(mvdev, iotlb);
>>>>> This worries me as conceptually, there should be no difference between
>>>>> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
>>>>>
>>>>> One example is that, if we only do dup_iotlb() but not try to create
>>>>> dma mr here, we will break virtio-vdpa:
>>>> For this case, I guess we may need another way to support virtio-vdpa
>>>> 1:1 mapping rather than overloading virtio device reset semantics, see:
>>>>
>>>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
>>>>
>>>>    > Conceptually, the address mapping is not a part of the abstraction for
>>>>    > a virtio device now. So resetting the memory mapping during virtio
>>>>    > device reset seems wrong.
>>>>
>>>> where we want to keep memory mapping intact across virtio device reset
>>>> for best live migration latency/downtime. I wonder would it work to
>>>> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
>>>> introduce a .reset_map() op to restore 1:1 mapping within
>>>> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
>>>> move the iotlb reset logic to there without worry breaking virtio-vdpa.
>>> It looks to me we don't need a new ops. We can simply do set_map()
>>> twice
>> What does it mean, first set_map(0, -1ULL) with zero iotlb entry passed
>> in to destroy all iotlb mappings previously added, and second set_map(0,
>> -1ULL) to restore 1:1 DMA MR? But userspace (maybe a buggy one but
>> doesn't do harm) apart from vhost-vdpa itself can do unmap twice anyway,
>> this is supported today I think. Why there'll be such obscure
>> distinction, or what's the benefit to treat second .set_map() as
>> recreating 1:1 mapping?
> Ok, I think I miss some context. I agree that it's better to decouple
> memory mappings from the virtio reset. It helps to reduce the
> unnecessary memory transactions. It might require a new feature flag.
This I agreed. AFAICT QEMU would need to check this new feature flag to 
make sure memory mappings are kept intact across reset, otherwise for 
the sake of avoid breaking older kernels it has to recreate all the 
mappings after reset like how it is done today.

> Regarding the method of restoring to 1:1 DMA MR, it might be dangerous
> for (buggy) vhost-vDPA devices. Since its userspace doesn't set up any
> mapping it can explore the kernel with that via CVQ?
Not sure I understand this proposal. The 1:1 DMA MR is first created at 
vdpa device add, and gets destroyed implicitly when the first .set_map 
or .dma_map call is made, which is only possible after the vhost-vdpa 
module is loaded and bound to vdpa devices. Naturally the DMA MR should 
be restored to how it was before when vhost-vdpa module is unloaded, or 
if anything the 1:1 DMA MR creation can be deferred to until virtio-vdpa 
is probed and bound to devices. Today vhost_vdpa_remove_as() as part of 
the vhost-vdpa unload code path already gets all mappings purged through 
vhost_vdpa_iotlb_unmap(0, -1ULL), and it should be pretty safe to 
restore DMA MR via .reset_map() right after. Not sure what's the concern 
here with buggy vhost-vdpa device?

Noted when vhost-vdpa is being unloaded there's even no chance to probe 
kernel through CVQ, as the virtio feature is not even negotiated at that 
point. And it is even trickier to wait for CVQ response from device 
indefinitely when trying to unload a module.

Regards,
-Siwei
>
> Thanks
>
>>>    or do you mean it would be faster?
>> I think with .reset_map() we at least can avoid indefinite latency
>> hiccup from destroying and recreating 1:1 mapping with the unwarranted
>> 2rd unmap call. And .reset_map() should work with both .dma_map() and
>> .set_map() APIs with clear semantics.
>>
>> Regards,
>> -Siwei
>>> Thanks
>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
>>>>> Author: Eli Cohen <elic@nvidia.com>
>>>>> Date:   Wed Jun 2 11:58:54 2021 +0300
>>>>>
>>>>>        vdpa/mlx5: Add support for running with virtio_vdpa
>>>>>
>>>>>        In order to support running vdpa using vritio_vdpa driver, we need  to
>>>>>        create a different kind of MR, one that has 1:1 mapping, since the
>>>>>        addresses referring to virtqueues are dma addresses.
>>>>>
>>>>>        We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
>>>>>        supports the general capability umem_uid_0. The reason for that is that
>>>>>        1:1 MRs must be created with uid == 0 while virtqueue objects can be
>>>>>        created with uid == 0 only when the firmware capability is on.
>>>>>
>>>>>        If the set_map() callback is called with new translations provided
>>>>>        through iotlb, the driver will destroy the 1:1 MR and create a regular
>>>>>        one.
>>>>>
>>>>>        Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>        Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
>>>>>        Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>        Acked-by: Jason Wang <jasowang@redhat.com>
>>>>>
>>>>> Thanks
>>>>>
>>>>>
>>>>>> +}
>>>>>> +
>>>>>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>> +                                   struct vhost_iotlb *iotlb,
>>>>>> +                                   unsigned int asid)
>>>>>>     {
>>>>>>            struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>>>            int err;
>>>>>>
>>>>>> -       if (mr->initialized)
>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>>>>>                    return 0;
>>>>>>
>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>>>>>> -               if (iotlb)
>>>>>> -                       err = create_user_mr(mvdev, iotlb);
>>>>>> -               else
>>>>>> -                       err = create_dma_mr(mvdev, mr);
>>>>>> +       if (mr->initialized)
>>>>>> +               return 0;
>>>>>>
>>>>>> -               if (err)
>>>>>> -                       return err;
>>>>>> -       }
>>>>>> +       if (iotlb)
>>>>>> +               err = create_user_mr(mvdev, iotlb);
>>>>>> +       else
>>>>>> +               err = create_dma_mr(mvdev, mr);
>>>>>>
>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
>>>>>> -               err = dup_iotlb(mvdev, iotlb);
>>>>>> -               if (err)
>>>>>> -                       goto out_err;
>>>>>> -       }
>>>>>> +       if (err)
>>>>>> +               return err;
>>>>>>
>>>>>>            mr->initialized = true;
>>>>>> +
>>>>>> +       return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
>>>>>> +{
>>>>>> +       int err;
>>>>>> +
>>>>>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
>>>>>> +       if (err)
>>>>>> +               return err;
>>>>>> +
>>>>>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
>>>>>> +       if (err)
>>>>>> +               goto out_err;
>>>>>> +
>>>>>>            return 0;
>>>>>>
>>>>>>     out_err:
>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>>>>>> -               if (iotlb)
>>>>>> -                       destroy_user_mr(mvdev, mr);
>>>>>> -               else
>>>>>> -                       destroy_dma_mr(mvdev, mr);
>>>>>> -       }
>>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>>>>>
>>>>>>            return err;
>>>>>>     }
>>>>>> --
>>>>>> 2.41.0
>>>>>>
>>>>> _______________________________________________
>>>>> Virtualization mailing list
>>>>> Virtualization@lists.linux-foundation.org
>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-10  0:40             ` Si-Wei Liu
@ 2023-08-10  3:10               ` Jason Wang
  2023-08-10 22:20                 ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-10  3:10 UTC (permalink / raw)
  To: Si-Wei Liu
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez

On Thu, Aug 10, 2023 at 8:40 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/8/2023 11:52 PM, Jason Wang wrote:
> > On Wed, Aug 9, 2023 at 6:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 8/7/2023 8:00 PM, Jason Wang wrote:
> >>> On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>>
> >>>> On 8/3/2023 1:03 AM, Jason Wang wrote:
> >>>>> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >>>>>> The mr->initialized flag is shared between the control vq and data vq
> >>>>>> part of the mr init/uninit. But if the control vq and data vq get placed
> >>>>>> in different ASIDs, it can happen that initializing the control vq will
> >>>>>> prevent the data vq mr from being initialized.
> >>>>>>
> >>>>>> This patch consolidates the control and data vq init parts into their
> >>>>>> own init functions. The mr->initialized will now be used for the data vq
> >>>>>> only. The control vq currently doesn't need a flag.
> >>>>>>
> >>>>>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> >>>>>> split into data and control vq functions which are now also ASID aware.
> >>>>>>
> >>>>>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
> >>>>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> >>>>>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> >>>>>> Reviewed-by: Gal Pressman <gal@nvidia.com>
> >>>>>> ---
> >>>>>>     drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> >>>>>>     drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >>>>>>     2 files changed, 71 insertions(+), 27 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>>>> index 25fc4120b618..a0420be5059f 100644
> >>>>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>>>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> >>>>>>            struct list_head head;
> >>>>>>            unsigned long num_directs;
> >>>>>>            unsigned long num_klms;
> >>>>>> +       /* state of dvq mr */
> >>>>>>            bool initialized;
> >>>>>>
> >>>>>>            /* serialize mkey creation and destruction */
> >>>>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> >>>>>> index 03e543229791..4ae14a248a4b 100644
> >>>>>> --- a/drivers/vdpa/mlx5/core/mr.c
> >>>>>> +++ b/drivers/vdpa/mlx5/core/mr.c
> >>>>>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
> >>>>>>            }
> >>>>>>     }
> >>>>>>
> >>>>>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >>>>>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>>> +{
> >>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +       prune_iotlb(mvdev);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>>>     {
> >>>>>>            struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>>>
> >>>>>> -       mutex_lock(&mr->mkey_mtx);
> >>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>>>>> +               return;
> >>>>>> +
> >>>>>>            if (!mr->initialized)
> >>>>>> -               goto out;
> >>>>>> +               return;
> >>>>>>
> >>>>>> -       prune_iotlb(mvdev);
> >>>>>>            if (mr->user_mr)
> >>>>>>                    destroy_user_mr(mvdev, mr);
> >>>>>>            else
> >>>>>>                    destroy_dma_mr(mvdev, mr);
> >>>>>>
> >>>>>>            mr->initialized = false;
> >>>>>> -out:
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>>> +{
> >>>>>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>>> +
> >>>>>> +       mutex_lock(&mr->mkey_mtx);
> >>>>>> +
> >>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>>>>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> >>>>>> +
> >>>>>>            mutex_unlock(&mr->mkey_mtx);
> >>>>>>     }
> >>>>>>
> >>>>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
> >>>>>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >>>>>> +{
> >>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
> >>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>> +                                   struct vhost_iotlb *iotlb,
> >>>>>> +                                   unsigned int asid)
> >>>>>> +{
> >>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >>>>>> +               return 0;
> >>>>>> +
> >>>>>> +       return dup_iotlb(mvdev, iotlb);
> >>>>> This worries me as conceptually, there should be no difference between
> >>>>> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> >>>>>
> >>>>> One example is that, if we only do dup_iotlb() but not try to create
> >>>>> dma mr here, we will break virtio-vdpa:
> >>>> For this case, I guess we may need another way to support virtio-vdpa
> >>>> 1:1 mapping rather than overloading virtio device reset semantics, see:
> >>>>
> >>>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
> >>>>
> >>>>    > Conceptually, the address mapping is not a part of the abstraction for
> >>>>    > a virtio device now. So resetting the memory mapping during virtio
> >>>>    > device reset seems wrong.
> >>>>
> >>>> where we want to keep memory mapping intact across virtio device reset
> >>>> for best live migration latency/downtime. I wonder would it work to
> >>>> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
> >>>> introduce a .reset_map() op to restore 1:1 mapping within
> >>>> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
> >>>> move the iotlb reset logic to there without worry breaking virtio-vdpa.
> >>> It looks to me we don't need a new ops. We can simply do set_map()
> >>> twice
> >> What does it mean, first set_map(0, -1ULL) with zero iotlb entry passed
> >> in to destroy all iotlb mappings previously added, and second set_map(0,
> >> -1ULL) to restore 1:1 DMA MR? But userspace (maybe a buggy one but
> >> doesn't do harm) apart from vhost-vdpa itself can do unmap twice anyway,
> >> this is supported today I think. Why there'll be such obscure
> >> distinction, or what's the benefit to treat second .set_map() as
> >> recreating 1:1 mapping?
> > Ok, I think I miss some context. I agree that it's better to decouple
> > memory mappings from the virtio reset. It helps to reduce the
> > unnecessary memory transactions. It might require a new feature flag.
> This I agreed. AFAICT QEMU would need to check this new feature flag to
> make sure memory mappings are kept intact across reset, otherwise for
> the sake of avoid breaking older kernels it has to recreate all the
> mappings after reset like how it is done today.
>
> > Regarding the method of restoring to 1:1 DMA MR, it might be dangerous
> > for (buggy) vhost-vDPA devices. Since its userspace doesn't set up any
> > mapping it can explore the kernel with that via CVQ?
> Not sure I understand this proposal. The 1:1 DMA MR is first created at
> vdpa device add, and gets destroyed implicitly when the first .set_map
> or .dma_map call is made, which is only possible after the vhost-vdpa
> module is loaded and bound to vdpa devices.

So what happens if there's a buggy userspace that doesn't do any IOTLB setup?

Thanks

> Naturally the DMA MR should
> be restored to how it was before when vhost-vdpa module is unloaded, or
> if anything the 1:1 DMA MR creation can be deferred to until virtio-vdpa
> is probed and bound to devices. Today vhost_vdpa_remove_as() as part of
> the vhost-vdpa unload code path already gets all mappings purged through
> vhost_vdpa_iotlb_unmap(0, -1ULL), and it should be pretty safe to
> restore DMA MR via .reset_map() right after. Not sure what's the concern
> here with buggy vhost-vdpa device?
>
> Noted when vhost-vdpa is being unloaded there's even no chance to probe
> kernel through CVQ, as the virtio feature is not even negotiated at that
> point. And it is even trickier to wait for CVQ response from device
> indefinitely when trying to unload a module.
>
> Regards,
> -Siwei
> >
> > Thanks
> >
> >>>    or do you mean it would be faster?
> >> I think with .reset_map() we at least can avoid indefinite latency
> >> hiccup from destroying and recreating 1:1 mapping with the unwarranted
> >> 2rd unmap call. And .reset_map() should work with both .dma_map() and
> >> .set_map() APIs with clear semantics.
> >>
> >> Regards,
> >> -Siwei
> >>> Thanks
> >>>
> >>>> Thanks,
> >>>> -Siwei
> >>>>
> >>>>> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> >>>>> Author: Eli Cohen <elic@nvidia.com>
> >>>>> Date:   Wed Jun 2 11:58:54 2021 +0300
> >>>>>
> >>>>>        vdpa/mlx5: Add support for running with virtio_vdpa
> >>>>>
> >>>>>        In order to support running vdpa using vritio_vdpa driver, we need  to
> >>>>>        create a different kind of MR, one that has 1:1 mapping, since the
> >>>>>        addresses referring to virtqueues are dma addresses.
> >>>>>
> >>>>>        We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
> >>>>>        supports the general capability umem_uid_0. The reason for that is that
> >>>>>        1:1 MRs must be created with uid == 0 while virtqueue objects can be
> >>>>>        created with uid == 0 only when the firmware capability is on.
> >>>>>
> >>>>>        If the set_map() callback is called with new translations provided
> >>>>>        through iotlb, the driver will destroy the 1:1 MR and create a regular
> >>>>>        one.
> >>>>>
> >>>>>        Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>>>        Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
> >>>>>        Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >>>>>        Acked-by: Jason Wang <jasowang@redhat.com>
> >>>>>
> >>>>> Thanks
> >>>>>
> >>>>>
> >>>>>> +}
> >>>>>> +
> >>>>>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>> +                                   struct vhost_iotlb *iotlb,
> >>>>>> +                                   unsigned int asid)
> >>>>>>     {
> >>>>>>            struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>>>            int err;
> >>>>>>
> >>>>>> -       if (mr->initialized)
> >>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>>>>>                    return 0;
> >>>>>>
> >>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >>>>>> -               if (iotlb)
> >>>>>> -                       err = create_user_mr(mvdev, iotlb);
> >>>>>> -               else
> >>>>>> -                       err = create_dma_mr(mvdev, mr);
> >>>>>> +       if (mr->initialized)
> >>>>>> +               return 0;
> >>>>>>
> >>>>>> -               if (err)
> >>>>>> -                       return err;
> >>>>>> -       }
> >>>>>> +       if (iotlb)
> >>>>>> +               err = create_user_mr(mvdev, iotlb);
> >>>>>> +       else
> >>>>>> +               err = create_dma_mr(mvdev, mr);
> >>>>>>
> >>>>>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> >>>>>> -               err = dup_iotlb(mvdev, iotlb);
> >>>>>> -               if (err)
> >>>>>> -                       goto out_err;
> >>>>>> -       }
> >>>>>> +       if (err)
> >>>>>> +               return err;
> >>>>>>
> >>>>>>            mr->initialized = true;
> >>>>>> +
> >>>>>> +       return 0;
> >>>>>> +}
> >>>>>> +
> >>>>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
> >>>>>> +{
> >>>>>> +       int err;
> >>>>>> +
> >>>>>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> >>>>>> +       if (err)
> >>>>>> +               return err;
> >>>>>> +
> >>>>>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> >>>>>> +       if (err)
> >>>>>> +               goto out_err;
> >>>>>> +
> >>>>>>            return 0;
> >>>>>>
> >>>>>>     out_err:
> >>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >>>>>> -               if (iotlb)
> >>>>>> -                       destroy_user_mr(mvdev, mr);
> >>>>>> -               else
> >>>>>> -                       destroy_dma_mr(mvdev, mr);
> >>>>>> -       }
> >>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>>>>>
> >>>>>>            return err;
> >>>>>>     }
> >>>>>> --
> >>>>>> 2.41.0
> >>>>>>
> >>>>> _______________________________________________
> >>>>> Virtualization mailing list
> >>>>> Virtualization@lists.linux-foundation.org
> >>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling
  2023-08-02 17:12 [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Dragos Tatulea
  2023-08-02 17:12 ` [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics Dragos Tatulea
  2023-08-02 17:12 ` [PATCH 2/2] vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary Dragos Tatulea
@ 2023-08-10  8:54 ` Michael S. Tsirkin
  2023-08-10  8:59   ` Jason Wang
  2023-08-10  9:04   ` Dragos Tatulea
  2 siblings, 2 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-08-10  8:54 UTC (permalink / raw)
  To: Dragos Tatulea; +Cc: Jason Wang, Xuan Zhuo, linux-kernel, virtualization

On Wed, Aug 02, 2023 at 08:12:16PM +0300, Dragos Tatulea wrote:
> This patch series is based on Eugenio's fix for handling CVQs in
> a different ASID [0].
> 
> The first patch is the actual fix.
> 
> The next 2 patches are fixing a possible issue that I found while
> implementing patch 1. The patches are ordered like this for clarity.
> 
> [0] https://lore.kernel.org/lkml/20230112142218.725622-1-eperezma@redhat.com/


So what are we doing with this patchset? If we are merging anything
for this release it has to happen now.

> Dragos Tatulea (1):
>   vdpa/mlx5: Fix mr->initialized semantics
> 
> Eugenio Pérez (1):
>   vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary
> 
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  2 +
>  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  |  4 +-
>  3 files changed, 74 insertions(+), 29 deletions(-)
> 
> -- 
> 2.41.0


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling
  2023-08-10  8:54 ` [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Michael S. Tsirkin
@ 2023-08-10  8:59   ` Jason Wang
  2023-08-10  9:04   ` Dragos Tatulea
  1 sibling, 0 replies; 42+ messages in thread
From: Jason Wang @ 2023-08-10  8:59 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Dragos Tatulea, Xuan Zhuo, linux-kernel, virtualization

On Thu, Aug 10, 2023 at 4:54 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Aug 02, 2023 at 08:12:16PM +0300, Dragos Tatulea wrote:
> > This patch series is based on Eugenio's fix for handling CVQs in
> > a different ASID [0].
> >
> > The first patch is the actual fix.
> >
> > The next 2 patches are fixing a possible issue that I found while
> > implementing patch 1. The patches are ordered like this for clarity.
> >
> > [0] https://lore.kernel.org/lkml/20230112142218.725622-1-eperezma@redhat.com/
>
>
> So what are we doing with this patchset? If we are merging anything
> for this release it has to happen now.

I think we can merge this and do optimization on top.

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks

>
> > Dragos Tatulea (1):
> >   vdpa/mlx5: Fix mr->initialized semantics
> >
> > Eugenio Pérez (1):
> >   vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary
> >
> >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  2 +
> >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c  |  4 +-
> >  3 files changed, 74 insertions(+), 29 deletions(-)
> >
> > --
> > 2.41.0
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling
  2023-08-10  8:54 ` [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Michael S. Tsirkin
  2023-08-10  8:59   ` Jason Wang
@ 2023-08-10  9:04   ` Dragos Tatulea
  1 sibling, 0 replies; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-10  9:04 UTC (permalink / raw)
  To: mst; +Cc: virtualization, jasowang, linux-kernel, xuanzhuo

On Thu, 2023-08-10 at 04:54 -0400, Michael S. Tsirkin wrote:
> On Wed, Aug 02, 2023 at 08:12:16PM +0300, Dragos Tatulea wrote:
> > This patch series is based on Eugenio's fix for handling CVQs in
> > a different ASID [0].
> > 
> > The first patch is the actual fix.
> > 
> > The next 2 patches are fixing a possible issue that I found while
> > implementing patch 1. The patches are ordered like this for clarity.
> > 
> > [0]
> > https://lore.kernel.org/lkml/20230112142218.725622-1-eperezma@redhat.com/
> 
> 
> So what are we doing with this patchset? If we are merging anything
> for this release it has to happen now.
> 
Jason mentioned that wanted an additional cleanup patch to move the cvq specific
code to the net part of mlx5_vdpa. That's quite a refactoring though and would
like to take my time to do an RFC for that first.

It would be good if this got merged now as it fixes an actual problem ...

> > Dragos Tatulea (1):
> >   vdpa/mlx5: Fix mr->initialized semantics
> > 
> > Eugenio Pérez (1):
> >   vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary
> > 
> >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  2 +
> >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c  |  4 +-
> >  3 files changed, 74 insertions(+), 29 deletions(-)
> > 
> > -- 
> > 2.41.0
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-10  3:10               ` Jason Wang
@ 2023-08-10 22:20                 ` Si-Wei Liu
  2023-08-14  2:59                   ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-10 22:20 UTC (permalink / raw)
  To: Jason Wang
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez



On 8/9/2023 8:10 PM, Jason Wang wrote:
> On Thu, Aug 10, 2023 at 8:40 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/8/2023 11:52 PM, Jason Wang wrote:
>>> On Wed, Aug 9, 2023 at 6:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>
>>>> On 8/7/2023 8:00 PM, Jason Wang wrote:
>>>>> On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>>> On 8/3/2023 1:03 AM, Jason Wang wrote:
>>>>>>> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>>>>>> The mr->initialized flag is shared between the control vq and data vq
>>>>>>>> part of the mr init/uninit. But if the control vq and data vq get placed
>>>>>>>> in different ASIDs, it can happen that initializing the control vq will
>>>>>>>> prevent the data vq mr from being initialized.
>>>>>>>>
>>>>>>>> This patch consolidates the control and data vq init parts into their
>>>>>>>> own init functions. The mr->initialized will now be used for the data vq
>>>>>>>> only. The control vq currently doesn't need a flag.
>>>>>>>>
>>>>>>>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
>>>>>>>> split into data and control vq functions which are now also ASID aware.
>>>>>>>>
>>>>>>>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
>>>>>>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
>>>>>>>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
>>>>>>>> Reviewed-by: Gal Pressman <gal@nvidia.com>
>>>>>>>> ---
>>>>>>>>      drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>>>>>>>>      drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
>>>>>>>>      2 files changed, 71 insertions(+), 27 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>>>>>> index 25fc4120b618..a0420be5059f 100644
>>>>>>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>>>>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>>>>>>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
>>>>>>>>             struct list_head head;
>>>>>>>>             unsigned long num_directs;
>>>>>>>>             unsigned long num_klms;
>>>>>>>> +       /* state of dvq mr */
>>>>>>>>             bool initialized;
>>>>>>>>
>>>>>>>>             /* serialize mkey creation and destruction */
>>>>>>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>>>>>>>> index 03e543229791..4ae14a248a4b 100644
>>>>>>>> --- a/drivers/vdpa/mlx5/core/mr.c
>>>>>>>> +++ b/drivers/vdpa/mlx5/core/mr.c
>>>>>>>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
>>>>>>>>             }
>>>>>>>>      }
>>>>>>>>
>>>>>>>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>>>>>>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>>>>> +{
>>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +       prune_iotlb(mvdev);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>>>>>      {
>>>>>>>>             struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>>>>>
>>>>>>>> -       mutex_lock(&mr->mkey_mtx);
>>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>>             if (!mr->initialized)
>>>>>>>> -               goto out;
>>>>>>>> +               return;
>>>>>>>>
>>>>>>>> -       prune_iotlb(mvdev);
>>>>>>>>             if (mr->user_mr)
>>>>>>>>                     destroy_user_mr(mvdev, mr);
>>>>>>>>             else
>>>>>>>>                     destroy_dma_mr(mvdev, mr);
>>>>>>>>
>>>>>>>>             mr->initialized = false;
>>>>>>>> -out:
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>>>>>>>> +{
>>>>>>>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>>>>> +
>>>>>>>> +       mutex_lock(&mr->mkey_mtx);
>>>>>>>> +
>>>>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>>>>>>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
>>>>>>>> +
>>>>>>>>             mutex_unlock(&mr->mkey_mtx);
>>>>>>>>      }
>>>>>>>>
>>>>>>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>>>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
>>>>>>>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>>>>>>> +{
>>>>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
>>>>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>>>> +                                   struct vhost_iotlb *iotlb,
>>>>>>>> +                                   unsigned int asid)
>>>>>>>> +{
>>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>>>>>>>> +               return 0;
>>>>>>>> +
>>>>>>>> +       return dup_iotlb(mvdev, iotlb);
>>>>>>> This worries me as conceptually, there should be no difference between
>>>>>>> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
>>>>>>>
>>>>>>> One example is that, if we only do dup_iotlb() but not try to create
>>>>>>> dma mr here, we will break virtio-vdpa:
>>>>>> For this case, I guess we may need another way to support virtio-vdpa
>>>>>> 1:1 mapping rather than overloading virtio device reset semantics, see:
>>>>>>
>>>>>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
>>>>>>
>>>>>>     > Conceptually, the address mapping is not a part of the abstraction for
>>>>>>     > a virtio device now. So resetting the memory mapping during virtio
>>>>>>     > device reset seems wrong.
>>>>>>
>>>>>> where we want to keep memory mapping intact across virtio device reset
>>>>>> for best live migration latency/downtime. I wonder would it work to
>>>>>> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
>>>>>> introduce a .reset_map() op to restore 1:1 mapping within
>>>>>> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
>>>>>> move the iotlb reset logic to there without worry breaking virtio-vdpa.
>>>>> It looks to me we don't need a new ops. We can simply do set_map()
>>>>> twice
>>>> What does it mean, first set_map(0, -1ULL) with zero iotlb entry passed
>>>> in to destroy all iotlb mappings previously added, and second set_map(0,
>>>> -1ULL) to restore 1:1 DMA MR? But userspace (maybe a buggy one but
>>>> doesn't do harm) apart from vhost-vdpa itself can do unmap twice anyway,
>>>> this is supported today I think. Why there'll be such obscure
>>>> distinction, or what's the benefit to treat second .set_map() as
>>>> recreating 1:1 mapping?
>>> Ok, I think I miss some context. I agree that it's better to decouple
>>> memory mappings from the virtio reset. It helps to reduce the
>>> unnecessary memory transactions. It might require a new feature flag.
>> This I agreed. AFAICT QEMU would need to check this new feature flag to
>> make sure memory mappings are kept intact across reset, otherwise for
>> the sake of avoid breaking older kernels it has to recreate all the
>> mappings after reset like how it is done today.
>>
>>> Regarding the method of restoring to 1:1 DMA MR, it might be dangerous
>>> for (buggy) vhost-vDPA devices. Since its userspace doesn't set up any
>>> mapping it can explore the kernel with that via CVQ?
>> Not sure I understand this proposal. The 1:1 DMA MR is first created at
>> vdpa device add, and gets destroyed implicitly when the first .set_map
>> or .dma_map call is made, which is only possible after the vhost-vdpa
>> module is loaded and bound to vdpa devices.
> So what happens if there's a buggy userspace that doesn't do any IOTLB setup?
Then parent driver doesn't do anything in .reset_map() - as the DMA MR 
is still there. Parent driver should be able to tell apart if DMA MR has 
been destroyed or not by checking the internal state.

-Siwei

>
> Thanks
>
>> Naturally the DMA MR should
>> be restored to how it was before when vhost-vdpa module is unloaded, or
>> if anything the 1:1 DMA MR creation can be deferred to until virtio-vdpa
>> is probed and bound to devices. Today vhost_vdpa_remove_as() as part of
>> the vhost-vdpa unload code path already gets all mappings purged through
>> vhost_vdpa_iotlb_unmap(0, -1ULL), and it should be pretty safe to
>> restore DMA MR via .reset_map() right after. Not sure what's the concern
>> here with buggy vhost-vdpa device?
>>
>> Noted when vhost-vdpa is being unloaded there's even no chance to probe
>> kernel through CVQ, as the virtio feature is not even negotiated at that
>> point. And it is even trickier to wait for CVQ response from device
>> indefinitely when trying to unload a module.
>>
>> Regards,
>> -Siwei
>>> Thanks
>>>
>>>>>     or do you mean it would be faster?
>>>> I think with .reset_map() we at least can avoid indefinite latency
>>>> hiccup from destroying and recreating 1:1 mapping with the unwarranted
>>>> 2rd unmap call. And .reset_map() should work with both .dma_map() and
>>>> .set_map() APIs with clear semantics.
>>>>
>>>> Regards,
>>>> -Siwei
>>>>> Thanks
>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
>>>>>>> Author: Eli Cohen <elic@nvidia.com>
>>>>>>> Date:   Wed Jun 2 11:58:54 2021 +0300
>>>>>>>
>>>>>>>         vdpa/mlx5: Add support for running with virtio_vdpa
>>>>>>>
>>>>>>>         In order to support running vdpa using vritio_vdpa driver, we need  to
>>>>>>>         create a different kind of MR, one that has 1:1 mapping, since the
>>>>>>>         addresses referring to virtqueues are dma addresses.
>>>>>>>
>>>>>>>         We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
>>>>>>>         supports the general capability umem_uid_0. The reason for that is that
>>>>>>>         1:1 MRs must be created with uid == 0 while virtqueue objects can be
>>>>>>>         created with uid == 0 only when the firmware capability is on.
>>>>>>>
>>>>>>>         If the set_map() callback is called with new translations provided
>>>>>>>         through iotlb, the driver will destroy the 1:1 MR and create a regular
>>>>>>>         one.
>>>>>>>
>>>>>>>         Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>         Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
>>>>>>>         Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>>         Acked-by: Jason Wang <jasowang@redhat.com>
>>>>>>>
>>>>>>> Thanks
>>>>>>>
>>>>>>>
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>>>> +                                   struct vhost_iotlb *iotlb,
>>>>>>>> +                                   unsigned int asid)
>>>>>>>>      {
>>>>>>>>             struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>>>>>>>             int err;
>>>>>>>>
>>>>>>>> -       if (mr->initialized)
>>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>>>>>>>>                     return 0;
>>>>>>>>
>>>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>>>>>>>> -               if (iotlb)
>>>>>>>> -                       err = create_user_mr(mvdev, iotlb);
>>>>>>>> -               else
>>>>>>>> -                       err = create_dma_mr(mvdev, mr);
>>>>>>>> +       if (mr->initialized)
>>>>>>>> +               return 0;
>>>>>>>>
>>>>>>>> -               if (err)
>>>>>>>> -                       return err;
>>>>>>>> -       }
>>>>>>>> +       if (iotlb)
>>>>>>>> +               err = create_user_mr(mvdev, iotlb);
>>>>>>>> +       else
>>>>>>>> +               err = create_dma_mr(mvdev, mr);
>>>>>>>>
>>>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
>>>>>>>> -               err = dup_iotlb(mvdev, iotlb);
>>>>>>>> -               if (err)
>>>>>>>> -                       goto out_err;
>>>>>>>> -       }
>>>>>>>> +       if (err)
>>>>>>>> +               return err;
>>>>>>>>
>>>>>>>>             mr->initialized = true;
>>>>>>>> +
>>>>>>>> +       return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>>>>>>>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
>>>>>>>> +{
>>>>>>>> +       int err;
>>>>>>>> +
>>>>>>>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
>>>>>>>> +       if (err)
>>>>>>>> +               return err;
>>>>>>>> +
>>>>>>>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
>>>>>>>> +       if (err)
>>>>>>>> +               goto out_err;
>>>>>>>> +
>>>>>>>>             return 0;
>>>>>>>>
>>>>>>>>      out_err:
>>>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>>>>>>>> -               if (iotlb)
>>>>>>>> -                       destroy_user_mr(mvdev, mr);
>>>>>>>> -               else
>>>>>>>> -                       destroy_dma_mr(mvdev, mr);
>>>>>>>> -       }
>>>>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>>>>>>>>
>>>>>>>>             return err;
>>>>>>>>      }
>>>>>>>> --
>>>>>>>> 2.41.0
>>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> Virtualization mailing list
>>>>>>> Virtualization@lists.linux-foundation.org
>>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-10 22:20                 ` Si-Wei Liu
@ 2023-08-14  2:59                   ` Jason Wang
  2023-08-15  1:43                     ` [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-14  2:59 UTC (permalink / raw)
  To: Si-Wei Liu
  Cc: Dragos Tatulea, Xuan Zhuo, Michael S. Tsirkin, Gal Pressman,
	linux-kernel, virtualization, Eugenio Pérez

On Fri, Aug 11, 2023 at 6:21 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/9/2023 8:10 PM, Jason Wang wrote:
> > On Thu, Aug 10, 2023 at 8:40 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 8/8/2023 11:52 PM, Jason Wang wrote:
> >>> On Wed, Aug 9, 2023 at 6:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>>
> >>>> On 8/7/2023 8:00 PM, Jason Wang wrote:
> >>>>> On Fri, Aug 4, 2023 at 1:58 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>>>> On 8/3/2023 1:03 AM, Jason Wang wrote:
> >>>>>>> On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >>>>>>>> The mr->initialized flag is shared between the control vq and data vq
> >>>>>>>> part of the mr init/uninit. But if the control vq and data vq get placed
> >>>>>>>> in different ASIDs, it can happen that initializing the control vq will
> >>>>>>>> prevent the data vq mr from being initialized.
> >>>>>>>>
> >>>>>>>> This patch consolidates the control and data vq init parts into their
> >>>>>>>> own init functions. The mr->initialized will now be used for the data vq
> >>>>>>>> only. The control vq currently doesn't need a flag.
> >>>>>>>>
> >>>>>>>> The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got
> >>>>>>>> split into data and control vq functions which are now also ASID aware.
> >>>>>>>>
> >>>>>>>> Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data")
> >>>>>>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> >>>>>>>> Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> >>>>>>>> Reviewed-by: Gal Pressman <gal@nvidia.com>
> >>>>>>>> ---
> >>>>>>>>      drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> >>>>>>>>      drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++---------
> >>>>>>>>      2 files changed, 71 insertions(+), 27 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>>>>>> index 25fc4120b618..a0420be5059f 100644
> >>>>>>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>>>>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> >>>>>>>> @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> >>>>>>>>             struct list_head head;
> >>>>>>>>             unsigned long num_directs;
> >>>>>>>>             unsigned long num_klms;
> >>>>>>>> +       /* state of dvq mr */
> >>>>>>>>             bool initialized;
> >>>>>>>>
> >>>>>>>>             /* serialize mkey creation and destruction */
> >>>>>>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> >>>>>>>> index 03e543229791..4ae14a248a4b 100644
> >>>>>>>> --- a/drivers/vdpa/mlx5/core/mr.c
> >>>>>>>> +++ b/drivers/vdpa/mlx5/core/mr.c
> >>>>>>>> @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
> >>>>>>>>             }
> >>>>>>>>      }
> >>>>>>>>
> >>>>>>>> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >>>>>>>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>>>>> +{
> >>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +       prune_iotlb(mvdev);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>>>>>      {
> >>>>>>>>             struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>>>>>
> >>>>>>>> -       mutex_lock(&mr->mkey_mtx);
> >>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>>             if (!mr->initialized)
> >>>>>>>> -               goto out;
> >>>>>>>> +               return;
> >>>>>>>>
> >>>>>>>> -       prune_iotlb(mvdev);
> >>>>>>>>             if (mr->user_mr)
> >>>>>>>>                     destroy_user_mr(mvdev, mr);
> >>>>>>>>             else
> >>>>>>>>                     destroy_dma_mr(mvdev, mr);
> >>>>>>>>
> >>>>>>>>             mr->initialized = false;
> >>>>>>>> -out:
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> >>>>>>>> +{
> >>>>>>>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>>>>> +
> >>>>>>>> +       mutex_lock(&mr->mkey_mtx);
> >>>>>>>> +
> >>>>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>>>>>>> +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> >>>>>>>> +
> >>>>>>>>             mutex_unlock(&mr->mkey_mtx);
> >>>>>>>>      }
> >>>>>>>>
> >>>>>>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>>>> -                               struct vhost_iotlb *iotlb, unsigned int asid)
> >>>>>>>> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> >>>>>>>> +{
> >>>>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
> >>>>>>>> +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>>>> +                                   struct vhost_iotlb *iotlb,
> >>>>>>>> +                                   unsigned int asid)
> >>>>>>>> +{
> >>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> >>>>>>>> +               return 0;
> >>>>>>>> +
> >>>>>>>> +       return dup_iotlb(mvdev, iotlb);
> >>>>>>> This worries me as conceptually, there should be no difference between
> >>>>>>> dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> >>>>>>>
> >>>>>>> One example is that, if we only do dup_iotlb() but not try to create
> >>>>>>> dma mr here, we will break virtio-vdpa:
> >>>>>> For this case, I guess we may need another way to support virtio-vdpa
> >>>>>> 1:1 mapping rather than overloading virtio device reset semantics, see:
> >>>>>>
> >>>>>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html
> >>>>>>
> >>>>>>     > Conceptually, the address mapping is not a part of the abstraction for
> >>>>>>     > a virtio device now. So resetting the memory mapping during virtio
> >>>>>>     > device reset seems wrong.
> >>>>>>
> >>>>>> where we want to keep memory mapping intact across virtio device reset
> >>>>>> for best live migration latency/downtime. I wonder would it work to
> >>>>>> reset the mapping in vhost-vdpa life cycle out of virtio reset, say
> >>>>>> introduce a .reset_map() op to restore 1:1 mapping within
> >>>>>> vhost_vdpa_remove_as() right after vhost_vdpa_iotlb_unmap()? Then we can
> >>>>>> move the iotlb reset logic to there without worry breaking virtio-vdpa.
> >>>>> It looks to me we don't need a new ops. We can simply do set_map()
> >>>>> twice
> >>>> What does it mean, first set_map(0, -1ULL) with zero iotlb entry passed
> >>>> in to destroy all iotlb mappings previously added, and second set_map(0,
> >>>> -1ULL) to restore 1:1 DMA MR? But userspace (maybe a buggy one but
> >>>> doesn't do harm) apart from vhost-vdpa itself can do unmap twice anyway,
> >>>> this is supported today I think. Why there'll be such obscure
> >>>> distinction, or what's the benefit to treat second .set_map() as
> >>>> recreating 1:1 mapping?
> >>> Ok, I think I miss some context. I agree that it's better to decouple
> >>> memory mappings from the virtio reset. It helps to reduce the
> >>> unnecessary memory transactions. It might require a new feature flag.
> >> This I agreed. AFAICT QEMU would need to check this new feature flag to
> >> make sure memory mappings are kept intact across reset, otherwise for
> >> the sake of avoid breaking older kernels it has to recreate all the
> >> mappings after reset like how it is done today.
> >>
> >>> Regarding the method of restoring to 1:1 DMA MR, it might be dangerous
> >>> for (buggy) vhost-vDPA devices. Since its userspace doesn't set up any
> >>> mapping it can explore the kernel with that via CVQ?
> >> Not sure I understand this proposal. The 1:1 DMA MR is first created at
> >> vdpa device add, and gets destroyed implicitly when the first .set_map
> >> or .dma_map call is made, which is only possible after the vhost-vdpa
> >> module is loaded and bound to vdpa devices.
> > So what happens if there's a buggy userspace that doesn't do any IOTLB setup?
> Then parent driver doesn't do anything in .reset_map() - as the DMA MR
> is still there. Parent driver should be able to tell apart if DMA MR has
> been destroyed or not by checking the internal state.

Would you mind posting a patch to demonstrate this?

Thanks

>
> -Siwei
>
> >
> > Thanks
> >
> >> Naturally the DMA MR should
> >> be restored to how it was before when vhost-vdpa module is unloaded, or
> >> if anything the 1:1 DMA MR creation can be deferred to until virtio-vdpa
> >> is probed and bound to devices. Today vhost_vdpa_remove_as() as part of
> >> the vhost-vdpa unload code path already gets all mappings purged through
> >> vhost_vdpa_iotlb_unmap(0, -1ULL), and it should be pretty safe to
> >> restore DMA MR via .reset_map() right after. Not sure what's the concern
> >> here with buggy vhost-vdpa device?
> >>
> >> Noted when vhost-vdpa is being unloaded there's even no chance to probe
> >> kernel through CVQ, as the virtio feature is not even negotiated at that
> >> point. And it is even trickier to wait for CVQ response from device
> >> indefinitely when trying to unload a module.
> >>
> >> Regards,
> >> -Siwei
> >>> Thanks
> >>>
> >>>>>     or do you mean it would be faster?
> >>>> I think with .reset_map() we at least can avoid indefinite latency
> >>>> hiccup from destroying and recreating 1:1 mapping with the unwarranted
> >>>> 2rd unmap call. And .reset_map() should work with both .dma_map() and
> >>>> .set_map() APIs with clear semantics.
> >>>>
> >>>> Regards,
> >>>> -Siwei
> >>>>> Thanks
> >>>>>
> >>>>>> Thanks,
> >>>>>> -Siwei
> >>>>>>
> >>>>>>> commit 6f5312f801836e6af9bcbb0bdb44dc423e129206
> >>>>>>> Author: Eli Cohen <elic@nvidia.com>
> >>>>>>> Date:   Wed Jun 2 11:58:54 2021 +0300
> >>>>>>>
> >>>>>>>         vdpa/mlx5: Add support for running with virtio_vdpa
> >>>>>>>
> >>>>>>>         In order to support running vdpa using vritio_vdpa driver, we need  to
> >>>>>>>         create a different kind of MR, one that has 1:1 mapping, since the
> >>>>>>>         addresses referring to virtqueues are dma addresses.
> >>>>>>>
> >>>>>>>         We create the 1:1 MR in mlx5_vdpa_dev_add() only in case firmware
> >>>>>>>         supports the general capability umem_uid_0. The reason for that is that
> >>>>>>>         1:1 MRs must be created with uid == 0 while virtqueue objects can be
> >>>>>>>         created with uid == 0 only when the firmware capability is on.
> >>>>>>>
> >>>>>>>         If the set_map() callback is called with new translations provided
> >>>>>>>         through iotlb, the driver will destroy the 1:1 MR and create a regular
> >>>>>>>         one.
> >>>>>>>
> >>>>>>>         Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>>>>>         Link: https://lore.kernel.org/r/20210602085854.62690-1-elic@nvidia.com
> >>>>>>>         Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >>>>>>>         Acked-by: Jason Wang <jasowang@redhat.com>
> >>>>>>>
> >>>>>>> Thanks
> >>>>>>>
> >>>>>>>
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>>>> +                                   struct vhost_iotlb *iotlb,
> >>>>>>>> +                                   unsigned int asid)
> >>>>>>>>      {
> >>>>>>>>             struct mlx5_vdpa_mr *mr = &mvdev->mr;
> >>>>>>>>             int err;
> >>>>>>>>
> >>>>>>>> -       if (mr->initialized)
> >>>>>>>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> >>>>>>>>                     return 0;
> >>>>>>>>
> >>>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >>>>>>>> -               if (iotlb)
> >>>>>>>> -                       err = create_user_mr(mvdev, iotlb);
> >>>>>>>> -               else
> >>>>>>>> -                       err = create_dma_mr(mvdev, mr);
> >>>>>>>> +       if (mr->initialized)
> >>>>>>>> +               return 0;
> >>>>>>>>
> >>>>>>>> -               if (err)
> >>>>>>>> -                       return err;
> >>>>>>>> -       }
> >>>>>>>> +       if (iotlb)
> >>>>>>>> +               err = create_user_mr(mvdev, iotlb);
> >>>>>>>> +       else
> >>>>>>>> +               err = create_dma_mr(mvdev, mr);
> >>>>>>>>
> >>>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> >>>>>>>> -               err = dup_iotlb(mvdev, iotlb);
> >>>>>>>> -               if (err)
> >>>>>>>> -                       goto out_err;
> >>>>>>>> -       }
> >>>>>>>> +       if (err)
> >>>>>>>> +               return err;
> >>>>>>>>
> >>>>>>>>             mr->initialized = true;
> >>>>>>>> +
> >>>>>>>> +       return 0;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> >>>>>>>> +                               struct vhost_iotlb *iotlb, unsigned int asid)
> >>>>>>>> +{
> >>>>>>>> +       int err;
> >>>>>>>> +
> >>>>>>>> +       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> >>>>>>>> +       if (err)
> >>>>>>>> +               return err;
> >>>>>>>> +
> >>>>>>>> +       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> >>>>>>>> +       if (err)
> >>>>>>>> +               goto out_err;
> >>>>>>>> +
> >>>>>>>>             return 0;
> >>>>>>>>
> >>>>>>>>      out_err:
> >>>>>>>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> >>>>>>>> -               if (iotlb)
> >>>>>>>> -                       destroy_user_mr(mvdev, mr);
> >>>>>>>> -               else
> >>>>>>>> -                       destroy_dma_mr(mvdev, mr);
> >>>>>>>> -       }
> >>>>>>>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> >>>>>>>>
> >>>>>>>>             return err;
> >>>>>>>>      }
> >>>>>>>> --
> >>>>>>>> 2.41.0
> >>>>>>>>
> >>>>>>> _______________________________________________
> >>>>>>> Virtualization mailing list
> >>>>>>> Virtualization@lists.linux-foundation.org
> >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-09  1:42           ` Jason Wang
@ 2023-08-14 14:15             ` Dragos Tatulea
  2023-08-15  1:28               ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-14 14:15 UTC (permalink / raw)
  To: jasowang
  Cc: virtualization, mst, eperezma, linux-kernel, Gal Pressman, xuanzhuo

On Wed, 2023-08-09 at 09:42 +0800, Jason Wang wrote:
> On Tue, Aug 8, 2023 at 3:24 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > 
> > On Tue, 2023-08-08 at 10:57 +0800, Jason Wang wrote:
> > > On Thu, Aug 3, 2023 at 7:40 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > 
> > > > On Thu, 2023-08-03 at 16:03 +0800, Jason Wang wrote:
> > > > > On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com>
> > > > > wrote:
> > > > > > 
> > > > > > The mr->initialized flag is shared between the control vq and data
> > > > > > vq
> > > > > > part of the mr init/uninit. But if the control vq and data vq get
> > > > > > placed
> > > > > > in different ASIDs, it can happen that initializing the control vq
> > > > > > will
> > > > > > prevent the data vq mr from being initialized.
> > > > > > 
> > > > > > This patch consolidates the control and data vq init parts into
> > > > > > their
> > > > > > own init functions. The mr->initialized will now be used for the
> > > > > > data vq
> > > > > > only. The control vq currently doesn't need a flag.
> > > > > > 
> > > > > > The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr
> > > > > > got
> > > > > > split into data and control vq functions which are now also ASID
> > > > > > aware.
> > > > > > 
> > > > > > Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces
> > > > > > for
> > > > > > control and data")
> > > > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > > > Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > Reviewed-by: Gal Pressman <gal@nvidia.com>
> > > > > > ---
> > > > > >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> > > > > >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++------
> > > > > > ---
> > > > > >  2 files changed, 71 insertions(+), 27 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > index 25fc4120b618..a0420be5059f 100644
> > > > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> > > > > >         struct list_head head;
> > > > > >         unsigned long num_directs;
> > > > > >         unsigned long num_klms;
> > > > > > +       /* state of dvq mr */
> > > > > >         bool initialized;
> > > > > > 
> > > > > >         /* serialize mkey creation and destruction */
> > > > > > diff --git a/drivers/vdpa/mlx5/core/mr.c
> > > > > > b/drivers/vdpa/mlx5/core/mr.c
> > > > > > index 03e543229791..4ae14a248a4b 100644
> > > > > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > > > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > > > > @@ -489,60 +489,103 @@ static void destroy_user_mr(struct
> > > > > > mlx5_vdpa_dev
> > > > > > *mvdev, struct mlx5_vdpa_mr *mr
> > > > > >         }
> > > > > >  }
> > > > > > 
> > > > > > -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > > +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > unsigned
> > > > > > int asid)
> > > > > > +{
> > > > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > > > +               return;
> > > > > > +
> > > > > > +       prune_iotlb(mvdev);
> > > > > > +}
> > > > > > +
> > > > > > +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > unsigned
> > > > > > int asid)
> > > > > >  {
> > > > > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > > 
> > > > > > -       mutex_lock(&mr->mkey_mtx);
> > > > > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > > > > > +               return;
> > > > > > +
> > > > > >         if (!mr->initialized)
> > > > > > -               goto out;
> > > > > > +               return;
> > > > > > 
> > > > > > -       prune_iotlb(mvdev);
> > > > > >         if (mr->user_mr)
> > > > > >                 destroy_user_mr(mvdev, mr);
> > > > > >         else
> > > > > >                 destroy_dma_mr(mvdev, mr);
> > > > > > 
> > > > > >         mr->initialized = false;
> > > > > > -out:
> > > > > > +}
> > > > > > +
> > > > > > +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev,
> > > > > > unsigned
> > > > > > int asid)
> > > > > > +{
> > > > > > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > > +
> > > > > > +       mutex_lock(&mr->mkey_mtx);
> > > > > > +
> > > > > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > > > > > +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> > > > > > +
> > > > > >         mutex_unlock(&mr->mkey_mtx);
> > > > > >  }
> > > > > > 
> > > > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > -                               struct vhost_iotlb *iotlb, unsigned
> > > > > > int
> > > > > > asid)
> > > > > > +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > > +{
> > > > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > > > group2asid[MLX5_VDPA_CVQ_GROUP]);
> > > > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > > > group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> > > > > > +}
> > > > > > +
> > > > > > +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > +                                   struct vhost_iotlb *iotlb,
> > > > > > +                                   unsigned int asid)
> > > > > > +{
> > > > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       return dup_iotlb(mvdev, iotlb);
> > > > > 
> > > > > This worries me as conceptually, there should be no difference between
> > > > > dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> > > > > 
> > > > Are you worried by the changes in this patch or about the possibility of
> > > > having
> > > > 
> > > > The reason for this change is that I noticed if you create one mr in one
> > > > asid
> > > > you could be blocked out from creating another one in a different asid
> > > > due
> > > > to
> > > > mr->initialized being true. To me that seemed problematic. Is it not?
> > > 
> > > My feeling is that mr.c should be device agnostic. It needs to know
> > > nothing about the device details to work. But this patch seems to
> > > break the layer.
> > > 
> > But the same logic was there before (with the exception of cvq not having an
> > init flag anymore). So what am I missing here?
> 
> Nothing, I think you're right.
> 
> I think we can have this patch go first and tweak on top by moving CVQ
> aware logic into the net specific codes.
> 
Is this anything more than a re-org? My plan is to move the cvq mr part from
mlx5_vdpa_dev into mlx5_vdpa_net. Is there anything else that you were expecting
here?

Thanks,
Dragos

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics
  2023-08-14 14:15             ` Dragos Tatulea
@ 2023-08-15  1:28               ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2023-08-15  1:28 UTC (permalink / raw)
  To: Dragos Tatulea
  Cc: virtualization, mst, eperezma, linux-kernel, Gal Pressman, xuanzhuo

On Mon, Aug 14, 2023 at 10:15 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> On Wed, 2023-08-09 at 09:42 +0800, Jason Wang wrote:
> > On Tue, Aug 8, 2023 at 3:24 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > >
> > > On Tue, 2023-08-08 at 10:57 +0800, Jason Wang wrote:
> > > > On Thu, Aug 3, 2023 at 7:40 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > >
> > > > > On Thu, 2023-08-03 at 16:03 +0800, Jason Wang wrote:
> > > > > > On Thu, Aug 3, 2023 at 1:13 AM Dragos Tatulea <dtatulea@nvidia.com>
> > > > > > wrote:
> > > > > > >
> > > > > > > The mr->initialized flag is shared between the control vq and data
> > > > > > > vq
> > > > > > > part of the mr init/uninit. But if the control vq and data vq get
> > > > > > > placed
> > > > > > > in different ASIDs, it can happen that initializing the control vq
> > > > > > > will
> > > > > > > prevent the data vq mr from being initialized.
> > > > > > >
> > > > > > > This patch consolidates the control and data vq init parts into
> > > > > > > their
> > > > > > > own init functions. The mr->initialized will now be used for the
> > > > > > > data vq
> > > > > > > only. The control vq currently doesn't need a flag.
> > > > > > >
> > > > > > > The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr
> > > > > > > got
> > > > > > > split into data and control vq functions which are now also ASID
> > > > > > > aware.
> > > > > > >
> > > > > > > Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces
> > > > > > > for
> > > > > > > control and data")
> > > > > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > > > > Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > > Reviewed-by: Gal Pressman <gal@nvidia.com>
> > > > > > > ---
> > > > > > >  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
> > > > > > >  drivers/vdpa/mlx5/core/mr.c        | 97 +++++++++++++++++++++------
> > > > > > > ---
> > > > > > >  2 files changed, 71 insertions(+), 27 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > > b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > > index 25fc4120b618..a0420be5059f 100644
> > > > > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > > > @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
> > > > > > >         struct list_head head;
> > > > > > >         unsigned long num_directs;
> > > > > > >         unsigned long num_klms;
> > > > > > > +       /* state of dvq mr */
> > > > > > >         bool initialized;
> > > > > > >
> > > > > > >         /* serialize mkey creation and destruction */
> > > > > > > diff --git a/drivers/vdpa/mlx5/core/mr.c
> > > > > > > b/drivers/vdpa/mlx5/core/mr.c
> > > > > > > index 03e543229791..4ae14a248a4b 100644
> > > > > > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > > > > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > > > > > @@ -489,60 +489,103 @@ static void destroy_user_mr(struct
> > > > > > > mlx5_vdpa_dev
> > > > > > > *mvdev, struct mlx5_vdpa_mr *mr
> > > > > > >         }
> > > > > > >  }
> > > > > > >
> > > > > > > -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > > > +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > > unsigned
> > > > > > > int asid)
> > > > > > > +{
> > > > > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > > > > +               return;
> > > > > > > +
> > > > > > > +       prune_iotlb(mvdev);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > > unsigned
> > > > > > > int asid)
> > > > > > >  {
> > > > > > >         struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > > >
> > > > > > > -       mutex_lock(&mr->mkey_mtx);
> > > > > > > +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> > > > > > > +               return;
> > > > > > > +
> > > > > > >         if (!mr->initialized)
> > > > > > > -               goto out;
> > > > > > > +               return;
> > > > > > >
> > > > > > > -       prune_iotlb(mvdev);
> > > > > > >         if (mr->user_mr)
> > > > > > >                 destroy_user_mr(mvdev, mr);
> > > > > > >         else
> > > > > > >                 destroy_dma_mr(mvdev, mr);
> > > > > > >
> > > > > > >         mr->initialized = false;
> > > > > > > -out:
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev,
> > > > > > > unsigned
> > > > > > > int asid)
> > > > > > > +{
> > > > > > > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > > > +
> > > > > > > +       mutex_lock(&mr->mkey_mtx);
> > > > > > > +
> > > > > > > +       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> > > > > > > +       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> > > > > > > +
> > > > > > >         mutex_unlock(&mr->mkey_mtx);
> > > > > > >  }
> > > > > > >
> > > > > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > > -                               struct vhost_iotlb *iotlb, unsigned
> > > > > > > int
> > > > > > > asid)
> > > > > > > +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > > > +{
> > > > > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > > > > group2asid[MLX5_VDPA_CVQ_GROUP]);
> > > > > > > +       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev-
> > > > > > > > group2asid[MLX5_VDPA_DATAVQ_GROUP]);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> > > > > > > +                                   struct vhost_iotlb *iotlb,
> > > > > > > +                                   unsigned int asid)
> > > > > > > +{
> > > > > > > +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       return dup_iotlb(mvdev, iotlb);
> > > > > >
> > > > > > This worries me as conceptually, there should be no difference between
> > > > > > dvq mr and cvq mr. The virtqueue should be loosely coupled with mr.
> > > > > >
> > > > > Are you worried by the changes in this patch or about the possibility of
> > > > > having
> > > > >
> > > > > The reason for this change is that I noticed if you create one mr in one
> > > > > asid
> > > > > you could be blocked out from creating another one in a different asid
> > > > > due
> > > > > to
> > > > > mr->initialized being true. To me that seemed problematic. Is it not?
> > > >
> > > > My feeling is that mr.c should be device agnostic. It needs to know
> > > > nothing about the device details to work. But this patch seems to
> > > > break the layer.
> > > >
> > > But the same logic was there before (with the exception of cvq not having an
> > > init flag anymore). So what am I missing here?
> >
> > Nothing, I think you're right.
> >
> > I think we can have this patch go first and tweak on top by moving CVQ
> > aware logic into the net specific codes.
> >
> Is this anything more than a re-org?

No.

> My plan is to move the cvq mr part from
> mlx5_vdpa_dev into mlx5_vdpa_net. Is there anything else that you were expecting
> here?
>

That's fine.

Thanks

> Thanks,
> Dragos


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset
  2023-08-14  2:59                   ` Jason Wang
@ 2023-08-15  1:43                     ` Si-Wei Liu
  2023-08-15  1:43                       ` [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback Si-Wei Liu
                                         ` (3 more replies)
  0 siblings, 4 replies; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15  1:43 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, gal, linux-kernel, mst, si-wei.liu, virtualization, xuanzhuo

In order to reduce needlessly high setup and teardown cost
of iotlb mapping during live migration, it's crucial to
decouple the vhost-vdpa iotlb abstraction from the virtio
device life cycle, i.e. iotlb mappings should be left
intact across virtio device reset [1]. For it to work, the
on-chip IOMMU parent device should implement a separate
.reset_map() operation callback to restore 1:1 DMA mapping
without having to resort to the .reset() callback, which
is mainly used to reset virtio specific device state.
This new .reset_map() callback will be invoked only when
the vhost-vdpa driver is to be removed and detached from
the vdpa bus, such that other vdpa bus drivers, e.g. 
virtio-vdpa, can get back on 1:1 DMA mapping when they
are attached. For the context, those on-chip IOMMU parent
devices, create the 1:1 DMA mapping at vdpa device add,
and they would implicitly destroy the 1:1 mapping when
the first .set_map or .dma_map callback is invoked.

[1] Reducing vdpa migration downtime because of memory pin / maps
https://www.mail-archive.com/qemu-devel@nongnu.org/msg953755.html

---

Si-Wei Liu (4):
  vdpa: introduce .reset_map operation callback
  vdpa/mlx5: implement .reset_map driver op
  vhost-vdpa: should restore 1:1 dma mapping before detaching driver
  vhost-vdpa: introduce IOTLB_PERSIST backend feature bit

 drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
 drivers/vdpa/mlx5/core/mr.c        | 72 +++++++++++++++++++++-----------------
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 18 +++++++---
 drivers/vhost/vdpa.c               | 33 ++++++++++++++++-
 include/linux/vdpa.h               |  7 ++++
 include/uapi/linux/vhost_types.h   |  2 ++
 6 files changed, 95 insertions(+), 38 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-15  1:43                     ` [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset Si-Wei Liu
@ 2023-08-15  1:43                       ` Si-Wei Liu
  2023-08-15  2:21                         ` Jason Wang
  2023-08-15  1:43                       ` [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op Si-Wei Liu
                                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15  1:43 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, gal, linux-kernel, mst, si-wei.liu, virtualization, xuanzhuo

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 include/linux/vdpa.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index db1b0ea..3a3878d 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -314,6 +314,12 @@ struct vdpa_map_file {
  *				@iova: iova to be unmapped
  *				@size: size of the area
  *				Returns integer: success (0) or error (< 0)
+ * @reset_map:			Reset device memory mapping (optional)
+ *				Needed for device that using device
+ *				specific DMA translation (on-chip IOMMU)
+ *				@vdev: vdpa device
+ *				@asid: address space identifier
+ *				Returns integer: success (0) or error (< 0)
  * @get_vq_dma_dev:		Get the dma device for a specific
  *				virtqueue (optional)
  *				@vdev: vdpa device
@@ -390,6 +396,7 @@ struct vdpa_config_ops {
 		       u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
 	int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
 			 u64 iova, u64 size);
+	int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
 	int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
 			      unsigned int asid);
 	struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op
  2023-08-15  1:43                     ` [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset Si-Wei Liu
  2023-08-15  1:43                       ` [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback Si-Wei Liu
@ 2023-08-15  1:43                       ` Si-Wei Liu
  2023-08-15  8:26                         ` Dragos Tatulea
  2023-08-15  1:43                       ` [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver Si-Wei Liu
  2023-08-15  1:43                       ` [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit Si-Wei Liu
  3 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15  1:43 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, gal, linux-kernel, mst, si-wei.liu, virtualization, xuanzhuo

This patch is based on top of the "vdpa/mlx5: Fixes
for ASID handling" series [1].

[1] vdpa/mlx5: Fixes for ASID handling
https://lore.kernel.org/virtualization/20230802171231.11001-1-dtatulea@nvidia.com/

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
 drivers/vdpa/mlx5/core/mr.c        | 72 +++++++++++++++++++++-----------------
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 18 +++++++---
 3 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index b53420e..5c9a25a 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -123,6 +123,7 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 			unsigned int asid);
 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
 void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
+int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
 
 #define mlx5_vdpa_warn(__dev, format, ...)                                                         \
 	dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__,     \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 5a1971fc..c8d64fc 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -489,21 +489,15 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
 	}
 }
 
-static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev)
 {
-	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
-		return;
-
 	prune_iotlb(mvdev);
 }
 
-static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev)
 {
 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
 
-	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
-		return;
-
 	if (!mr->initialized)
 		return;
 
@@ -521,8 +515,10 @@ void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
 
 	mutex_lock(&mr->mkey_mtx);
 
-	_mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
-	_mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
+	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid)
+		_mlx5_vdpa_destroy_dvq_mr(mvdev);
+	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
+		_mlx5_vdpa_destroy_cvq_mr(mvdev);
 
 	mutex_unlock(&mr->mkey_mtx);
 }
@@ -534,25 +530,17 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
 }
 
 static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
-				    struct vhost_iotlb *iotlb,
-				    unsigned int asid)
+				    struct vhost_iotlb *iotlb)
 {
-	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
-		return 0;
-
 	return dup_iotlb(mvdev, iotlb);
 }
 
 static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
-				    struct vhost_iotlb *iotlb,
-				    unsigned int asid)
+				    struct vhost_iotlb *iotlb)
 {
 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
 	int err;
 
-	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
-		return 0;
-
 	if (mr->initialized)
 		return 0;
 
@@ -574,20 +562,18 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
 {
 	int err;
 
-	err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
-	if (err)
-		return err;
-
-	err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
-	if (err)
-		goto out_err;
+	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+		err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
+		if (err)
+			return err;
+	}
+	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
+		err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb);
+		if (err)
+			return err;
+	}
 
 	return 0;
-
-out_err:
-	_mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
-
-	return err;
 }
 
 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
@@ -601,6 +587,28 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 	return err;
 }
 
+int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+	struct mlx5_vdpa_mr *mr = &mvdev->mr;
+	int err = 0;
+
+	if (asid != 0)
+		return 0;
+
+	mutex_lock(&mr->mkey_mtx);
+	if (!mr->user_mr)
+		goto out;
+	_mlx5_vdpa_destroy_dvq_mr(mvdev);
+	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+		err = _mlx5_vdpa_create_dvq_mr(mvdev, NULL, 0);
+		if (err)
+			mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
+	}
+out:
+	mutex_unlock(&mr->mkey_mtx);
+	return err;
+}
+
 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 			     bool *change_map, unsigned int asid)
 {
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 61c10ba..399a690 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2816,7 +2816,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 	unregister_link_notifier(ndev);
 	teardown_driver(ndev);
 	clear_vqs_ready(ndev);
-	mlx5_vdpa_destroy_mr(&ndev->mvdev);
 	ndev->mvdev.status = 0;
 	ndev->mvdev.suspended = false;
 	ndev->cur_num_vqs = 0;
@@ -2827,10 +2826,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 	init_group_to_asid_map(mvdev);
 	++mvdev->generation;
 
-	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
-		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
-			mlx5_vdpa_warn(mvdev, "create MR failed\n");
-	}
 	up_write(&ndev->reslock);
 
 	return 0;
@@ -2895,6 +2890,18 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
 	return err;
 }
 
+static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
+{
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	int err;
+
+	down_write(&ndev->reslock);
+	err = mlx5_vdpa_reset_mr(mvdev, asid);
+	up_write(&ndev->reslock);
+	return err;
+}
+
 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -3154,6 +3161,7 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
 	.set_config = mlx5_vdpa_set_config,
 	.get_generation = mlx5_vdpa_get_generation,
 	.set_map = mlx5_vdpa_set_map,
+	.reset_map = mlx5_vdpa_reset_map,
 	.set_group_asid = mlx5_set_group_asid,
 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
 	.free = mlx5_vdpa_free,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver
  2023-08-15  1:43                     ` [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset Si-Wei Liu
  2023-08-15  1:43                       ` [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback Si-Wei Liu
  2023-08-15  1:43                       ` [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op Si-Wei Liu
@ 2023-08-15  1:43                       ` Si-Wei Liu
  2023-08-15  2:32                         ` Jason Wang
  2023-08-15  1:43                       ` [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit Si-Wei Liu
  3 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15  1:43 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, gal, linux-kernel, mst, si-wei.liu, virtualization, xuanzhuo

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 drivers/vhost/vdpa.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b43e868..62b0a01 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
 	return vhost_vdpa_alloc_as(v, asid);
 }
 
+static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
+{
+	struct vdpa_device *vdpa = v->vdpa;
+	const struct vdpa_config_ops *ops = vdpa->config;
+
+	if (ops->reset_map)
+		ops->reset_map(vdpa, asid);
+}
+
 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
 {
 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
@@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
 
 	hlist_del(&as->hash_link);
 	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
+	/*
+	 * Devices with on-chip IOMMU need to restore iotlb
+	 * to 1:1 identity mapping before vhost-vdpa is going
+	 * to be removed and detached from the device. Give
+	 * them a chance to do so, as this cannot be done
+	 * efficiently via the whole-range unmap call above.
+	 */
+	vhost_vdpa_reset_map(v, asid);
 	kfree(as);
 
 	return 0;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-15  1:43                     ` [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset Si-Wei Liu
                                         ` (2 preceding siblings ...)
  2023-08-15  1:43                       ` [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver Si-Wei Liu
@ 2023-08-15  1:43                       ` Si-Wei Liu
  2023-08-15  2:25                         ` Jason Wang
  3 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15  1:43 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, gal, linux-kernel, mst, si-wei.liu, virtualization, xuanzhuo

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 drivers/vhost/vdpa.c             | 16 +++++++++++++++-
 include/uapi/linux/vhost_types.h |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 62b0a01..75092a7 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
 	return ops->resume;
 }
 
+static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
+{
+	struct vdpa_device *vdpa = v->vdpa;
+	const struct vdpa_config_ops *ops = vdpa->config;
+
+	return (!ops->set_map && !ops->dma_map) || ops->reset_map;
+}
+
 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 {
 	struct vdpa_device *vdpa = v->vdpa;
@@ -697,7 +705,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 			return -EFAULT;
 		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
 				 BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
-				 BIT_ULL(VHOST_BACKEND_F_RESUME)))
+				 BIT_ULL(VHOST_BACKEND_F_RESUME) |
+				 BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)))
 			return -EOPNOTSUPP;
 		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
 		     !vhost_vdpa_can_suspend(v))
@@ -705,6 +714,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 		if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
 		     !vhost_vdpa_can_resume(v))
 			return -EOPNOTSUPP;
+		if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
+		     !vhost_vdpa_has_persistent_map(v))
+			return -EOPNOTSUPP;
 		vhost_set_backend_features(&v->vdev, features);
 		return 0;
 	}
@@ -758,6 +770,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
 		if (vhost_vdpa_can_resume(v))
 			features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
+		if (vhost_vdpa_has_persistent_map(v))
+			features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 		if (copy_to_user(featurep, &features, sizeof(features)))
 			r = -EFAULT;
 		break;
diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index d3aad12a..a7f8fc6 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -181,5 +181,7 @@ struct vhost_vdpa_iova_range {
 #define VHOST_BACKEND_F_SUSPEND  0x4
 /* Device can be resumed */
 #define VHOST_BACKEND_F_RESUME  0x5
+/* IOTLB don't flush memory mapping across device reset */
+#define VHOST_BACKEND_F_IOTLB_PERSIST  0x6
 
 #endif
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-15  1:43                       ` [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback Si-Wei Liu
@ 2023-08-15  2:21                         ` Jason Wang
  2023-08-15 19:49                           ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-15  2:21 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo

On Tue, Aug 15, 2023 at 9:46 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  include/linux/vdpa.h | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index db1b0ea..3a3878d 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -314,6 +314,12 @@ struct vdpa_map_file {
>   *                             @iova: iova to be unmapped
>   *                             @size: size of the area
>   *                             Returns integer: success (0) or error (< 0)
> + * @reset_map:                 Reset device memory mapping (optional)
> + *                             Needed for device that using device
> + *                             specific DMA translation (on-chip IOMMU)

This exposes the device internal to the upper layer which is not optimal.

Btw, what's the difference between this and a simple

set_map(NULL)?

Thanks

> + *                             @vdev: vdpa device
> + *                             @asid: address space identifier
> + *                             Returns integer: success (0) or error (< 0)
>   * @get_vq_dma_dev:            Get the dma device for a specific
>   *                             virtqueue (optional)
>   *                             @vdev: vdpa device
> @@ -390,6 +396,7 @@ struct vdpa_config_ops {
>                        u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
>         int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
>                          u64 iova, u64 size);
> +       int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
>         int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
>                               unsigned int asid);
>         struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
> --
> 1.8.3.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-15  1:43                       ` [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit Si-Wei Liu
@ 2023-08-15  2:25                         ` Jason Wang
  2023-08-15 22:30                           ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-15  2:25 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo

On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  drivers/vhost/vdpa.c             | 16 +++++++++++++++-
>  include/uapi/linux/vhost_types.h |  2 ++
>  2 files changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 62b0a01..75092a7 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
>         return ops->resume;
>  }
>
> +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
> +{
> +       struct vdpa_device *vdpa = v->vdpa;
> +       const struct vdpa_config_ops *ops = vdpa->config;
> +
> +       return (!ops->set_map && !ops->dma_map) || ops->reset_map;

So this means the IOTLB/IOMMU mappings have already been decoupled
from the vdpa reset. So it should have been noticed by the userspace.
I guess we can just fix the simulator and mlx5 then we are fine?

Thanks


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver
  2023-08-15  1:43                       ` [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver Si-Wei Liu
@ 2023-08-15  2:32                         ` Jason Wang
  2023-08-15 23:09                           ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-15  2:32 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo

On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  drivers/vhost/vdpa.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index b43e868..62b0a01 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
>         return vhost_vdpa_alloc_as(v, asid);
>  }
>
> +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
> +{
> +       struct vdpa_device *vdpa = v->vdpa;
> +       const struct vdpa_config_ops *ops = vdpa->config;
> +
> +       if (ops->reset_map)
> +               ops->reset_map(vdpa, asid);
> +}
> +
>  static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
>  {
>         struct vhost_vdpa_as *as = asid_to_as(v, asid);
> @@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
>
>         hlist_del(&as->hash_link);
>         vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
> +       /*
> +        * Devices with on-chip IOMMU need to restore iotlb
> +        * to 1:1 identity mapping before vhost-vdpa is going
> +        * to be removed and detached from the device. Give
> +        * them a chance to do so, as this cannot be done
> +        * efficiently via the whole-range unmap call above.
> +        */

Same question as before, if 1:1 is restored and the userspace doesn't
do any IOTLB updating. It looks like a security issue? (Assuming IOVA
is PA)

Thanks

> +       vhost_vdpa_reset_map(v, asid);
>         kfree(as);
>
>         return 0;
> --
> 1.8.3.1
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op
  2023-08-15  1:43                       ` [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op Si-Wei Liu
@ 2023-08-15  8:26                         ` Dragos Tatulea
  2023-08-15 23:11                           ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Dragos Tatulea @ 2023-08-15  8:26 UTC (permalink / raw)
  To: si-wei.liu, jasowang
  Cc: virtualization, mst, Gal Pressman, linux-kernel, eperezma, xuanzhuo

On Mon, 2023-08-14 at 18:43 -0700, Si-Wei Liu wrote:
> This patch is based on top of the "vdpa/mlx5: Fixes
> for ASID handling" series [1].
> 
> [1] vdpa/mlx5: Fixes for ASID handling
> https://lore.kernel.org/virtualization/20230802171231.11001-1-dtatulea@nvidia.com/
> 
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>  drivers/vdpa/mlx5/core/mr.c        | 72 +++++++++++++++++++++----------------
> -
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 18 +++++++---
>  3 files changed, 54 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index b53420e..5c9a25a 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -123,6 +123,7 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> struct vhost_iotlb *iotlb,
>                         unsigned int asid);
>  void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
>  void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int
> asid);
> +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
>  
>  #define mlx5_vdpa_warn(__dev, format,
> ...)                                                         \
>         dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format,
> __func__, __LINE__,     \
> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> index 5a1971fc..c8d64fc 100644
> --- a/drivers/vdpa/mlx5/core/mr.c
> +++ b/drivers/vdpa/mlx5/core/mr.c
> @@ -489,21 +489,15 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev,
> struct mlx5_vdpa_mr *mr
>         }
>  }
>  
> -static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
> int asid)
> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev)
>  {
> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> -               return;
> -
>         prune_iotlb(mvdev);
>  }
>  
> -static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
> int asid)
> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev)
>  {
>         struct mlx5_vdpa_mr *mr = &mvdev->mr;
>  
> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> -               return;
> -
>         if (!mr->initialized)
>                 return;
>  
> @@ -521,8 +515,10 @@ void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev
> *mvdev, unsigned int asid)
>  
>         mutex_lock(&mr->mkey_mtx);
>  
> -       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> -       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid)
> +               _mlx5_vdpa_destroy_dvq_mr(mvdev);
> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
> +               _mlx5_vdpa_destroy_cvq_mr(mvdev);
>  
>         mutex_unlock(&mr->mkey_mtx);
>  }
> @@ -534,25 +530,17 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>  }
>  
>  static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
> -                                   struct vhost_iotlb *iotlb,
> -                                   unsigned int asid)
> +                                   struct vhost_iotlb *iotlb)
>  {
> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
> -               return 0;
> -
>         return dup_iotlb(mvdev, iotlb);
>  }
>  
>  static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
> -                                   struct vhost_iotlb *iotlb,
> -                                   unsigned int asid)
> +                                   struct vhost_iotlb *iotlb)
>  {
>         struct mlx5_vdpa_mr *mr = &mvdev->mr;
>         int err;
>  
> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
> -               return 0;
> -
>         if (mr->initialized)
>                 return 0;
>  
> @@ -574,20 +562,18 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev
> *mvdev,
>  {
>         int err;
>  
> -       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> -       if (err)
> -               return err;
> -
> -       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
> -       if (err)
> -               goto out_err;
> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
> +               err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
> +               if (err)
> +                       return err;
> +       }
> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
> +               err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb);
> +               if (err)
> +                       return err;
I think you still need the goto here, when CVQ and DVQ fall in same asid and
there's a CVQ mr creation error, you are left stuck with the DVQ mr.

> +       }
>  
>         return 0;
> -
> -out_err:
> -       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
> -
> -       return err;
>  }
>  
>  int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
> *iotlb,
> @@ -601,6 +587,28 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> struct vhost_iotlb *iotlb,
>         return err;
>  }
>  
> +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
> +{
> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> +       int err = 0;
> +
> +       if (asid != 0)
> +               return 0;
> +
> +       mutex_lock(&mr->mkey_mtx);
> +       if (!mr->user_mr)
> +               goto out;
> +       _mlx5_vdpa_destroy_dvq_mr(mvdev);
> +       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> +               err = _mlx5_vdpa_create_dvq_mr(mvdev, NULL, 0);
> +               if (err)
> +                       mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
> +       }
> +out:
> +       mutex_unlock(&mr->mkey_mtx);
> +       return err;
> +}
> +
>  int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
> *iotlb,
>                              bool *change_map, unsigned int asid)
>  {
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 61c10ba..399a690 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2816,7 +2816,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>         unregister_link_notifier(ndev);
>         teardown_driver(ndev);
>         clear_vqs_ready(ndev);
> -       mlx5_vdpa_destroy_mr(&ndev->mvdev);
>         ndev->mvdev.status = 0;
>         ndev->mvdev.suspended = false;
>         ndev->cur_num_vqs = 0;
> @@ -2827,10 +2826,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>         init_group_to_asid_map(mvdev);
>         ++mvdev->generation;
>  
> -       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> -               if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
> -                       mlx5_vdpa_warn(mvdev, "create MR failed\n");
> -       }
>         up_write(&ndev->reslock);
>  
>         return 0;
> @@ -2895,6 +2890,18 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev,
> unsigned int asid,
>         return err;
>  }
>  
> +static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
> +{
> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +       int err;
> +
> +       down_write(&ndev->reslock);
> +       err = mlx5_vdpa_reset_mr(mvdev, asid);
> +       up_write(&ndev->reslock);
> +       return err;
> +}
> +
>  static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
>  {
>         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -3154,6 +3161,7 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev,
> u32 group,
>         .set_config = mlx5_vdpa_set_config,
>         .get_generation = mlx5_vdpa_get_generation,
>         .set_map = mlx5_vdpa_set_map,
> +       .reset_map = mlx5_vdpa_reset_map,
>         .set_group_asid = mlx5_set_group_asid,
>         .get_vq_dma_dev = mlx5_get_vq_dma_dev,
>         .free = mlx5_vdpa_free,


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-15  2:21                         ` Jason Wang
@ 2023-08-15 19:49                           ` Si-Wei Liu
  2023-08-16  1:55                             ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15 19:49 UTC (permalink / raw)
  To: Jason Wang; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/14/2023 7:21 PM, Jason Wang wrote:
> On Tue, Aug 15, 2023 at 9:46 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>> ---
>>   include/linux/vdpa.h | 7 +++++++
>>   1 file changed, 7 insertions(+)
>>
>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>> index db1b0ea..3a3878d 100644
>> --- a/include/linux/vdpa.h
>> +++ b/include/linux/vdpa.h
>> @@ -314,6 +314,12 @@ struct vdpa_map_file {
>>    *                             @iova: iova to be unmapped
>>    *                             @size: size of the area
>>    *                             Returns integer: success (0) or error (< 0)
>> + * @reset_map:                 Reset device memory mapping (optional)
>> + *                             Needed for device that using device
>> + *                             specific DMA translation (on-chip IOMMU)
> This exposes the device internal to the upper layer which is not optimal.
Not sure what does it mean by "device internal", but this op callback 
just follows existing convention to describe what vdpa parent this API 
targets.

  * @set_map:                    Set device memory mapping (optional)
  *                              Needed for device that using device
  *                              specific DMA translation (on-chip IOMMU)
:
:
  * @dma_map:                    Map an area of PA to IOVA (optional)
  *                              Needed for device that using device
  *                              specific DMA translation (on-chip IOMMU)
  *                              and preferring incremental map.
:
:
  * @dma_unmap:                  Unmap an area of IOVA (optional but
  *                              must be implemented with dma_map)
  *                              Needed for device that using device
  *                              specific DMA translation (on-chip IOMMU)
  *                              and preferring incremental unmap.


> Btw, what's the difference between this and a simple
>
> set_map(NULL)?
I don't think parent drivers support this today - they can accept 
non-NULL iotlb containing empty map entry, but not a NULL iotlb. The 
behavior is undefined or it even causes panic when a NULL iotlb is 
passed in. Further this doesn't work with .dma_map parent drivers.

The reason why a new op is needed or better is because it allows 
userspace to tell apart different reset behavior from the older kernel 
(via the F_IOTLB_PERSIST feature bit in patch 4), while this behavior 
could vary between parent drivers.

Regards,
-Siwei

>
> Thanks
>
>> + *                             @vdev: vdpa device
>> + *                             @asid: address space identifier
>> + *                             Returns integer: success (0) or error (< 0)
>>    * @get_vq_dma_dev:            Get the dma device for a specific
>>    *                             virtqueue (optional)
>>    *                             @vdev: vdpa device
>> @@ -390,6 +396,7 @@ struct vdpa_config_ops {
>>                         u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
>>          int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
>>                           u64 iova, u64 size);
>> +       int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
>>          int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
>>                                unsigned int asid);
>>          struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
>> --
>> 1.8.3.1
>>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-15  2:25                         ` Jason Wang
@ 2023-08-15 22:30                           ` Si-Wei Liu
  2023-08-16  1:48                             ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15 22:30 UTC (permalink / raw)
  To: Jason Wang; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/14/2023 7:25 PM, Jason Wang wrote:
> On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>> ---
>>   drivers/vhost/vdpa.c             | 16 +++++++++++++++-
>>   include/uapi/linux/vhost_types.h |  2 ++
>>   2 files changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index 62b0a01..75092a7 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
>>          return ops->resume;
>>   }
>>
>> +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
>> +{
>> +       struct vdpa_device *vdpa = v->vdpa;
>> +       const struct vdpa_config_ops *ops = vdpa->config;
>> +
>> +       return (!ops->set_map && !ops->dma_map) || ops->reset_map;
> So this means the IOTLB/IOMMU mappings have already been decoupled
> from the vdpa reset.
Not in the sense of API, it' been coupled since day one from the 
implementations of every on-chip IOMMU parent driver, namely mlx5_vdpa 
and vdpa_sim. Because of that, later on the (improper) support for 
virtio-vdpa, from commit 6f5312f80183 ("vdpa/mlx5: Add support for 
running with virtio_vdpa") and 6c3d329e6486 ("vdpa_sim: get rid of DMA 
ops") misused the .reset() op to realize 1:1 mapping, rendering strong 
coupling between device reset and reset of iotlb mappings. This series 
try to rectify that implementation deficiency, while keep userspace 
continuing to work with older kernel behavior.

>   So it should have been noticed by the userspace.
Yes, userspace had noticed this no-chip IOMMU discrepancy since day one 
I suppose. Unfortunately there's already code in userspace with this 
assumption in mind that proactively tears down and sets up iotlb mapping 
around vdpa device reset...
> I guess we can just fix the simulator and mlx5 then we are fine?
Only IF we don't care about running new QEMU on older kernels with 
flawed on-chip iommu behavior around reset. But that's a big IF...

Regards,
-Siwei
>
> Thanks
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver
  2023-08-15  2:32                         ` Jason Wang
@ 2023-08-15 23:09                           ` Si-Wei Liu
  0 siblings, 0 replies; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15 23:09 UTC (permalink / raw)
  To: Jason Wang; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/14/2023 7:32 PM, Jason Wang wrote:
> On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>> ---
>>   drivers/vhost/vdpa.c | 17 +++++++++++++++++
>>   1 file changed, 17 insertions(+)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index b43e868..62b0a01 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
>>          return vhost_vdpa_alloc_as(v, asid);
>>   }
>>
>> +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
>> +{
>> +       struct vdpa_device *vdpa = v->vdpa;
>> +       const struct vdpa_config_ops *ops = vdpa->config;
>> +
>> +       if (ops->reset_map)
>> +               ops->reset_map(vdpa, asid);
>> +}
>> +
>>   static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
>>   {
>>          struct vhost_vdpa_as *as = asid_to_as(v, asid);
>> @@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
>>
>>          hlist_del(&as->hash_link);
>>          vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
>> +       /*
>> +        * Devices with on-chip IOMMU need to restore iotlb
>> +        * to 1:1 identity mapping before vhost-vdpa is going
>> +        * to be removed and detached from the device. Give
>> +        * them a chance to do so, as this cannot be done
>> +        * efficiently via the whole-range unmap call above.
>> +        */
> Same question as before, if 1:1 is restored and the userspace doesn't
> do any IOTLB updating. It looks like a security issue? (Assuming IOVA
> is PA)
This is already flawed independent of this series. It was introduced 
from the two commits I referenced earlier in the other thread. Today 
userspace is already able to do so with device reset and don't do any 
IOTLB update. This series don't get it worse nor make it better.

FWIW as said earlier, to address this security issue properly we 
probably should set up 1:1 DMA mapping in virtio_vdpa_probe() on demand, 
and tears it down at virtio_vdpa_release_dev(). Question is, was 
virtio-vdpa the only vdpa bus user that needs 1:1 DMA mapping, or it's 
the other way around that vhost-vdpa is the only exception among all 
vdpa bus drivers that don't want to start with 1:1 by default. This 
would help parent vdpa implementation for what kind of mapping it should 
start with upon creation.

Regards,
-Siwei



>
> Thanks
>
>> +       vhost_vdpa_reset_map(v, asid);
>>          kfree(as);
>>
>>          return 0;
>> --
>> 1.8.3.1
>>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op
  2023-08-15  8:26                         ` Dragos Tatulea
@ 2023-08-15 23:11                           ` Si-Wei Liu
  0 siblings, 0 replies; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-15 23:11 UTC (permalink / raw)
  To: Dragos Tatulea, jasowang
  Cc: virtualization, mst, Gal Pressman, linux-kernel, eperezma, xuanzhuo



On 8/15/2023 1:26 AM, Dragos Tatulea wrote:
> On Mon, 2023-08-14 at 18:43 -0700, Si-Wei Liu wrote:
>> This patch is based on top of the "vdpa/mlx5: Fixes
>> for ASID handling" series [1].
>>
>> [1] vdpa/mlx5: Fixes for ASID handling
>> https://lore.kernel.org/virtualization/20230802171231.11001-1-dtatulea@nvidia.com/
>>
>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>> ---
>>   drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
>>   drivers/vdpa/mlx5/core/mr.c        | 72 +++++++++++++++++++++----------------
>> -
>>   drivers/vdpa/mlx5/net/mlx5_vnet.c  | 18 +++++++---
>>   3 files changed, 54 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> index b53420e..5c9a25a 100644
>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>> @@ -123,6 +123,7 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>> struct vhost_iotlb *iotlb,
>>                          unsigned int asid);
>>   void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
>>   void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int
>> asid);
>> +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
>>   
>>   #define mlx5_vdpa_warn(__dev, format,
>> ...)                                                         \
>>          dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format,
>> __func__, __LINE__,     \
>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>> index 5a1971fc..c8d64fc 100644
>> --- a/drivers/vdpa/mlx5/core/mr.c
>> +++ b/drivers/vdpa/mlx5/core/mr.c
>> @@ -489,21 +489,15 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev,
>> struct mlx5_vdpa_mr *mr
>>          }
>>   }
>>   
>> -static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
>> int asid)
>> +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev)
>>   {
>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>> -               return;
>> -
>>          prune_iotlb(mvdev);
>>   }
>>   
>> -static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned
>> int asid)
>> +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev)
>>   {
>>          struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>   
>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>> -               return;
>> -
>>          if (!mr->initialized)
>>                  return;
>>   
>> @@ -521,8 +515,10 @@ void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev
>> *mvdev, unsigned int asid)
>>   
>>          mutex_lock(&mr->mkey_mtx);
>>   
>> -       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>> -       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid)
>> +               _mlx5_vdpa_destroy_dvq_mr(mvdev);
>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
>> +               _mlx5_vdpa_destroy_cvq_mr(mvdev);
>>   
>>          mutex_unlock(&mr->mkey_mtx);
>>   }
>> @@ -534,25 +530,17 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>   }
>>   
>>   static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
>> -                                   struct vhost_iotlb *iotlb,
>> -                                   unsigned int asid)
>> +                                   struct vhost_iotlb *iotlb)
>>   {
>> -       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
>> -               return 0;
>> -
>>          return dup_iotlb(mvdev, iotlb);
>>   }
>>   
>>   static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
>> -                                   struct vhost_iotlb *iotlb,
>> -                                   unsigned int asid)
>> +                                   struct vhost_iotlb *iotlb)
>>   {
>>          struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>          int err;
>>   
>> -       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
>> -               return 0;
>> -
>>          if (mr->initialized)
>>                  return 0;
>>   
>> @@ -574,20 +562,18 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev
>> *mvdev,
>>   {
>>          int err;
>>   
>> -       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
>> -       if (err)
>> -               return err;
>> -
>> -       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
>> -       if (err)
>> -               goto out_err;
>> +       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
>> +               err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
>> +               if (err)
>> +                       return err;
>> +       }
>> +       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
>> +               err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb);
>> +               if (err)
>> +                       return err;
> I think you still need the goto here, when CVQ and DVQ fall in same asid and
> there's a CVQ mr creation error, you are left stuck with the DVQ mr.
Yes, you are right, I will fix this in v2. Thank you for spotting this!

-Siwei

>
>> +       }
>>   
>>          return 0;
>> -
>> -out_err:
>> -       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
>> -
>> -       return err;
>>   }
>>   
>>   int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
>> *iotlb,
>> @@ -601,6 +587,28 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
>> struct vhost_iotlb *iotlb,
>>          return err;
>>   }
>>   
>> +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
>> +{
>> +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
>> +       int err = 0;
>> +
>> +       if (asid != 0)
>> +               return 0;
>> +
>> +       mutex_lock(&mr->mkey_mtx);
>> +       if (!mr->user_mr)
>> +               goto out;
>> +       _mlx5_vdpa_destroy_dvq_mr(mvdev);
>> +       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>> +               err = _mlx5_vdpa_create_dvq_mr(mvdev, NULL, 0);
>> +               if (err)
>> +                       mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
>> +       }
>> +out:
>> +       mutex_unlock(&mr->mkey_mtx);
>> +       return err;
>> +}
>> +
>>   int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
>> *iotlb,
>>                               bool *change_map, unsigned int asid)
>>   {
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 61c10ba..399a690 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2816,7 +2816,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>>          unregister_link_notifier(ndev);
>>          teardown_driver(ndev);
>>          clear_vqs_ready(ndev);
>> -       mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>          ndev->mvdev.status = 0;
>>          ndev->mvdev.suspended = false;
>>          ndev->cur_num_vqs = 0;
>> @@ -2827,10 +2826,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>>          init_group_to_asid_map(mvdev);
>>          ++mvdev->generation;
>>   
>> -       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>> -               if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
>> -                       mlx5_vdpa_warn(mvdev, "create MR failed\n");
>> -       }
>>          up_write(&ndev->reslock);
>>   
>>          return 0;
>> @@ -2895,6 +2890,18 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev,
>> unsigned int asid,
>>          return err;
>>   }
>>   
>> +static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
>> +{
>> +       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>> +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>> +       int err;
>> +
>> +       down_write(&ndev->reslock);
>> +       err = mlx5_vdpa_reset_mr(mvdev, asid);
>> +       up_write(&ndev->reslock);
>> +       return err;
>> +}
>> +
>>   static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
>>   {
>>          struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>> @@ -3154,6 +3161,7 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev,
>> u32 group,
>>          .set_config = mlx5_vdpa_set_config,
>>          .get_generation = mlx5_vdpa_get_generation,
>>          .set_map = mlx5_vdpa_set_map,
>> +       .reset_map = mlx5_vdpa_reset_map,
>>          .set_group_asid = mlx5_set_group_asid,
>>          .get_vq_dma_dev = mlx5_get_vq_dma_dev,
>>          .free = mlx5_vdpa_free,


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-15 22:30                           ` Si-Wei Liu
@ 2023-08-16  1:48                             ` Jason Wang
  2023-08-16 23:43                               ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-16  1:48 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo

On Wed, Aug 16, 2023 at 6:31 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/14/2023 7:25 PM, Jason Wang wrote:
> > On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >> ---
> >>   drivers/vhost/vdpa.c             | 16 +++++++++++++++-
> >>   include/uapi/linux/vhost_types.h |  2 ++
> >>   2 files changed, 17 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >> index 62b0a01..75092a7 100644
> >> --- a/drivers/vhost/vdpa.c
> >> +++ b/drivers/vhost/vdpa.c
> >> @@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
> >>          return ops->resume;
> >>   }
> >>
> >> +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
> >> +{
> >> +       struct vdpa_device *vdpa = v->vdpa;
> >> +       const struct vdpa_config_ops *ops = vdpa->config;
> >> +
> >> +       return (!ops->set_map && !ops->dma_map) || ops->reset_map;
> > So this means the IOTLB/IOMMU mappings have already been decoupled
> > from the vdpa reset.
> Not in the sense of API, it' been coupled since day one from the
> implementations of every on-chip IOMMU parent driver, namely mlx5_vdpa
> and vdpa_sim. Because of that, later on the (improper) support for
> virtio-vdpa, from commit 6f5312f80183 ("vdpa/mlx5: Add support for
> running with virtio_vdpa") and 6c3d329e6486 ("vdpa_sim: get rid of DMA
> ops") misused the .reset() op to realize 1:1 mapping, rendering strong
> coupling between device reset and reset of iotlb mappings. This series
> try to rectify that implementation deficiency, while keep userspace
> continuing to work with older kernel behavior.
>
> >   So it should have been noticed by the userspace.
> Yes, userspace had noticed this no-chip IOMMU discrepancy since day one
> I suppose. Unfortunately there's already code in userspace with this
> assumption in mind that proactively tears down and sets up iotlb mapping
> around vdpa device reset...
> > I guess we can just fix the simulator and mlx5 then we are fine?
> Only IF we don't care about running new QEMU on older kernels with
> flawed on-chip iommu behavior around reset. But that's a big IF...

So what I meant is:

Userspace doesn't know whether the vendor specific mappings (set_map)
are required or not. And in the implementation of vhost_vdpa, if
platform IOMMU is used, the mappings are decoupled from the reset. So
if the Qemu works with parents with platform IOMMU it means Qemu can
work if we just decouple vendor specific mappings from the parents
that uses set_map.

Thanks

>
> Regards,
> -Siwei
> >
> > Thanks
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-15 19:49                           ` Si-Wei Liu
@ 2023-08-16  1:55                             ` Jason Wang
  2023-08-17  0:05                               ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-16  1:55 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo

On Wed, Aug 16, 2023 at 3:49 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/14/2023 7:21 PM, Jason Wang wrote:
> > On Tue, Aug 15, 2023 at 9:46 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >> ---
> >>   include/linux/vdpa.h | 7 +++++++
> >>   1 file changed, 7 insertions(+)
> >>
> >> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> >> index db1b0ea..3a3878d 100644
> >> --- a/include/linux/vdpa.h
> >> +++ b/include/linux/vdpa.h
> >> @@ -314,6 +314,12 @@ struct vdpa_map_file {
> >>    *                             @iova: iova to be unmapped
> >>    *                             @size: size of the area
> >>    *                             Returns integer: success (0) or error (< 0)
> >> + * @reset_map:                 Reset device memory mapping (optional)
> >> + *                             Needed for device that using device
> >> + *                             specific DMA translation (on-chip IOMMU)
> > This exposes the device internal to the upper layer which is not optimal.
> Not sure what does it mean by "device internal", but this op callback
> just follows existing convention to describe what vdpa parent this API
> targets.

I meant the bus tries to hide the differences among vendors. So it
needs to hide on-chip IOMMU stuff to the upper layer.

We can expose two dimensional IO mappings models but it looks like
over engineering for this issue. More below.

>
>   * @set_map:                    Set device memory mapping (optional)
>   *                              Needed for device that using device
>   *                              specific DMA translation (on-chip IOMMU)
> :
> :
>   * @dma_map:                    Map an area of PA to IOVA (optional)
>   *                              Needed for device that using device
>   *                              specific DMA translation (on-chip IOMMU)
>   *                              and preferring incremental map.
> :
> :
>   * @dma_unmap:                  Unmap an area of IOVA (optional but
>   *                              must be implemented with dma_map)
>   *                              Needed for device that using device
>   *                              specific DMA translation (on-chip IOMMU)
>   *                              and preferring incremental unmap.
>
>
> > Btw, what's the difference between this and a simple
> >
> > set_map(NULL)?
> I don't think parent drivers support this today - they can accept
> non-NULL iotlb containing empty map entry, but not a NULL iotlb. The
> behavior is undefined or it even causes panic when a NULL iotlb is
> passed in.

We can do this simple change if it can work.

>  Further this doesn't work with .dma_map parent drivers.

Probably, but I'd remove dma_map as it doesn't have any real users
except for the simulator.

>
> The reason why a new op is needed or better is because it allows
> userspace to tell apart different reset behavior from the older kernel
> (via the F_IOTLB_PERSIST feature bit in patch 4), while this behavior
> could vary between parent drivers.

I'm ok with a new feature flag, but we need to first seek a way to
reuse the existing API.

Thanks

>
> Regards,
> -Siwei
>
> >
> > Thanks
> >
> >> + *                             @vdev: vdpa device
> >> + *                             @asid: address space identifier
> >> + *                             Returns integer: success (0) or error (< 0)
> >>    * @get_vq_dma_dev:            Get the dma device for a specific
> >>    *                             virtqueue (optional)
> >>    *                             @vdev: vdpa device
> >> @@ -390,6 +396,7 @@ struct vdpa_config_ops {
> >>                         u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
> >>          int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
> >>                           u64 iova, u64 size);
> >> +       int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
> >>          int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
> >>                                unsigned int asid);
> >>          struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
> >> --
> >> 1.8.3.1
> >>
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-16  1:48                             ` Jason Wang
@ 2023-08-16 23:43                               ` Si-Wei Liu
  2023-08-22  8:54                                 ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-16 23:43 UTC (permalink / raw)
  To: Jason Wang; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/15/2023 6:48 PM, Jason Wang wrote:
> On Wed, Aug 16, 2023 at 6:31 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/14/2023 7:25 PM, Jason Wang wrote:
>>> On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> ---
>>>>    drivers/vhost/vdpa.c             | 16 +++++++++++++++-
>>>>    include/uapi/linux/vhost_types.h |  2 ++
>>>>    2 files changed, 17 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>> index 62b0a01..75092a7 100644
>>>> --- a/drivers/vhost/vdpa.c
>>>> +++ b/drivers/vhost/vdpa.c
>>>> @@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
>>>>           return ops->resume;
>>>>    }
>>>>
>>>> +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
>>>> +{
>>>> +       struct vdpa_device *vdpa = v->vdpa;
>>>> +       const struct vdpa_config_ops *ops = vdpa->config;
>>>> +
>>>> +       return (!ops->set_map && !ops->dma_map) || ops->reset_map;
>>> So this means the IOTLB/IOMMU mappings have already been decoupled
>>> from the vdpa reset.
>> Not in the sense of API, it' been coupled since day one from the
>> implementations of every on-chip IOMMU parent driver, namely mlx5_vdpa
>> and vdpa_sim. Because of that, later on the (improper) support for
>> virtio-vdpa, from commit 6f5312f80183 ("vdpa/mlx5: Add support for
>> running with virtio_vdpa") and 6c3d329e6486 ("vdpa_sim: get rid of DMA
>> ops") misused the .reset() op to realize 1:1 mapping, rendering strong
>> coupling between device reset and reset of iotlb mappings. This series
>> try to rectify that implementation deficiency, while keep userspace
>> continuing to work with older kernel behavior.
>>
>>>    So it should have been noticed by the userspace.
>> Yes, userspace had noticed this no-chip IOMMU discrepancy since day one
>> I suppose. Unfortunately there's already code in userspace with this
>> assumption in mind that proactively tears down and sets up iotlb mapping
>> around vdpa device reset...
>>> I guess we can just fix the simulator and mlx5 then we are fine?
>> Only IF we don't care about running new QEMU on older kernels with
>> flawed on-chip iommu behavior around reset. But that's a big IF...
> So what I meant is:
>
> Userspace doesn't know whether the vendor specific mappings (set_map)
> are required or not. And in the implementation of vhost_vdpa, if
> platform IOMMU is used, the mappings are decoupled from the reset. So
> if the Qemu works with parents with platform IOMMU it means Qemu can
> work if we just decouple vendor specific mappings from the parents
> that uses set_map.
I was aware of this, and if you may notice I don't even offer a way 
backward to retain/emulate the flawed vhost-iotlb reset behavior for 
older userspace - I consider it more of a bug in .set_map driver 
implementation of its own rather than what the vhost-vdpa iotlb 
abstraction wishes to expose to userspace in the first place.

If you ever look into QEMU's vhost_vdpa_reset_status() function, you may 
see memory_listener_unregister() will be called to evict all of the 
existing iotlb mappings right after vhost_vdpa_reset_device() across 
device reset, and later on at vhost_vdpa_dev_start(), 
memory_listener_register() will set up all iotlb mappings again. In an 
ideal world without this on-chip iommu deficiency QEMU should not have 
to behave this way - this is what I mentioned earlier that userspace had 
already noticed the discrepancy and it has to "proactively tear down and 
set up iotlb mapping around vdpa device reset". Apparently from 
functionality perspective this trick works completely fine with platform 
IOMMU, however, it's sub-optimal in the performance perspective.

We can't simply fix QEMU by moving this memory_listener_unregister() 
call out of the reset path unconditionally, as we don't want to break 
the already-functioning older kernel even though it's suboptimal in 
performance. Instead, to keep new QEMU continuing to work on top of the 
existing or older kernels, QEMU has to check this IOTLB_PERSIST feature 
flag to decide whether it is safe not to bother flushing and setting up 
iotlb across reset. For the platform IOMMU case, vdpa parent driver 
won't implement either the .set_map or .dma_map op, so it should be 
covered in the vhost_vdpa_has_persistent_map() check I suppose.


Thanks,
-Siwei
> Thanks
>
>> Regards,
>> -Siwei
>>> Thanks
>>>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-16  1:55                             ` Jason Wang
@ 2023-08-17  0:05                               ` Si-Wei Liu
  2023-08-17 15:28                                 ` Eugenio Perez Martin
  0 siblings, 1 reply; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-17  0:05 UTC (permalink / raw)
  To: Jason Wang; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/15/2023 6:55 PM, Jason Wang wrote:
> On Wed, Aug 16, 2023 at 3:49 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/14/2023 7:21 PM, Jason Wang wrote:
>>> On Tue, Aug 15, 2023 at 9:46 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> ---
>>>>    include/linux/vdpa.h | 7 +++++++
>>>>    1 file changed, 7 insertions(+)
>>>>
>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>>> index db1b0ea..3a3878d 100644
>>>> --- a/include/linux/vdpa.h
>>>> +++ b/include/linux/vdpa.h
>>>> @@ -314,6 +314,12 @@ struct vdpa_map_file {
>>>>     *                             @iova: iova to be unmapped
>>>>     *                             @size: size of the area
>>>>     *                             Returns integer: success (0) or error (< 0)
>>>> + * @reset_map:                 Reset device memory mapping (optional)
>>>> + *                             Needed for device that using device
>>>> + *                             specific DMA translation (on-chip IOMMU)
>>> This exposes the device internal to the upper layer which is not optimal.
>> Not sure what does it mean by "device internal", but this op callback
>> just follows existing convention to describe what vdpa parent this API
>> targets.
> I meant the bus tries to hide the differences among vendors. So it
> needs to hide on-chip IOMMU stuff to the upper layer.
>
> We can expose two dimensional IO mappings models but it looks like
> over engineering for this issue. More below.
>
>>    * @set_map:                    Set device memory mapping (optional)
>>    *                              Needed for device that using device
>>    *                              specific DMA translation (on-chip IOMMU)
>> :
>> :
>>    * @dma_map:                    Map an area of PA to IOVA (optional)
>>    *                              Needed for device that using device
>>    *                              specific DMA translation (on-chip IOMMU)
>>    *                              and preferring incremental map.
>> :
>> :
>>    * @dma_unmap:                  Unmap an area of IOVA (optional but
>>    *                              must be implemented with dma_map)
>>    *                              Needed for device that using device
>>    *                              specific DMA translation (on-chip IOMMU)
>>    *                              and preferring incremental unmap.
>>
>>
>>> Btw, what's the difference between this and a simple
>>>
>>> set_map(NULL)?
>> I don't think parent drivers support this today - they can accept
>> non-NULL iotlb containing empty map entry, but not a NULL iotlb. The
>> behavior is undefined or it even causes panic when a NULL iotlb is
>> passed in.
> We can do this simple change if it can work.
If we go with setting up 1:1 DMA mapping at virtio-vdpa .probe() and 
tearing it down at .release(), perhaps set_map(NULL) is not sufficient.
>
>>   Further this doesn't work with .dma_map parent drivers.
> Probably, but I'd remove dma_map as it doesn't have any real users
> except for the simulator.
OK, at a point there was suggestion to get this incremental API extended 
to support batching to be in par with or even replace .set_map, not sure 
if it's too soon to conclude. But I'm okay with the removal if need be.
>
>> The reason why a new op is needed or better is because it allows
>> userspace to tell apart different reset behavior from the older kernel
>> (via the F_IOTLB_PERSIST feature bit in patch 4), while this behavior
>> could vary between parent drivers.
> I'm ok with a new feature flag, but we need to first seek a way to
> reuse the existing API.
A feature flag is needed anyway. I'm fine with reusing but guess I'd 
want to converge on the direction first.

Thanks,
-Siwei
>
> Thanks
>
>> Regards,
>> -Siwei
>>
>>> Thanks
>>>
>>>> + *                             @vdev: vdpa device
>>>> + *                             @asid: address space identifier
>>>> + *                             Returns integer: success (0) or error (< 0)
>>>>     * @get_vq_dma_dev:            Get the dma device for a specific
>>>>     *                             virtqueue (optional)
>>>>     *                             @vdev: vdpa device
>>>> @@ -390,6 +396,7 @@ struct vdpa_config_ops {
>>>>                          u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
>>>>           int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
>>>>                            u64 iova, u64 size);
>>>> +       int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
>>>>           int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
>>>>                                 unsigned int asid);
>>>>           struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
>>>> --
>>>> 1.8.3.1
>>>>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-17  0:05                               ` Si-Wei Liu
@ 2023-08-17 15:28                                 ` Eugenio Perez Martin
  2023-08-21 22:31                                   ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Eugenio Perez Martin @ 2023-08-17 15:28 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: Jason Wang, gal, linux-kernel, mst, virtualization, xuanzhuo

On Thu, Aug 17, 2023 at 2:05 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/15/2023 6:55 PM, Jason Wang wrote:
> > On Wed, Aug 16, 2023 at 3:49 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 8/14/2023 7:21 PM, Jason Wang wrote:
> >>> On Tue, Aug 15, 2023 at 9:46 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >>>> ---
> >>>>    include/linux/vdpa.h | 7 +++++++
> >>>>    1 file changed, 7 insertions(+)
> >>>>
> >>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> >>>> index db1b0ea..3a3878d 100644
> >>>> --- a/include/linux/vdpa.h
> >>>> +++ b/include/linux/vdpa.h
> >>>> @@ -314,6 +314,12 @@ struct vdpa_map_file {
> >>>>     *                             @iova: iova to be unmapped
> >>>>     *                             @size: size of the area
> >>>>     *                             Returns integer: success (0) or error (< 0)
> >>>> + * @reset_map:                 Reset device memory mapping (optional)
> >>>> + *                             Needed for device that using device
> >>>> + *                             specific DMA translation (on-chip IOMMU)
> >>> This exposes the device internal to the upper layer which is not optimal.
> >> Not sure what does it mean by "device internal", but this op callback
> >> just follows existing convention to describe what vdpa parent this API
> >> targets.
> > I meant the bus tries to hide the differences among vendors. So it
> > needs to hide on-chip IOMMU stuff to the upper layer.
> >
> > We can expose two dimensional IO mappings models but it looks like
> > over engineering for this issue. More below.
> >
> >>    * @set_map:                    Set device memory mapping (optional)
> >>    *                              Needed for device that using device
> >>    *                              specific DMA translation (on-chip IOMMU)
> >> :
> >> :
> >>    * @dma_map:                    Map an area of PA to IOVA (optional)
> >>    *                              Needed for device that using device
> >>    *                              specific DMA translation (on-chip IOMMU)
> >>    *                              and preferring incremental map.
> >> :
> >> :
> >>    * @dma_unmap:                  Unmap an area of IOVA (optional but
> >>    *                              must be implemented with dma_map)
> >>    *                              Needed for device that using device
> >>    *                              specific DMA translation (on-chip IOMMU)
> >>    *                              and preferring incremental unmap.
> >>
> >>
> >>> Btw, what's the difference between this and a simple
> >>>
> >>> set_map(NULL)?
> >> I don't think parent drivers support this today - they can accept
> >> non-NULL iotlb containing empty map entry, but not a NULL iotlb. The
> >> behavior is undefined or it even causes panic when a NULL iotlb is
> >> passed in.
> > We can do this simple change if it can work.
> If we go with setting up 1:1 DMA mapping at virtio-vdpa .probe() and
> tearing it down at .release(), perhaps set_map(NULL) is not sufficient.
> >
> >>   Further this doesn't work with .dma_map parent drivers.
> > Probably, but I'd remove dma_map as it doesn't have any real users
> > except for the simulator.
> OK, at a point there was suggestion to get this incremental API extended
> to support batching to be in par with or even replace .set_map, not sure
> if it's too soon to conclude. But I'm okay with the removal if need be.

Yes, I think the right move in the long run is to delegate the
batching to the parent driver. This allows drivers like mlx to add
memory (like hotplugged memory) without the need of tearing down all
the old maps.

Having said that, maybe we can work on top if we need to remove
.dma_map for now.

> >
> >> The reason why a new op is needed or better is because it allows
> >> userspace to tell apart different reset behavior from the older kernel
> >> (via the F_IOTLB_PERSIST feature bit in patch 4), while this behavior
> >> could vary between parent drivers.
> > I'm ok with a new feature flag, but we need to first seek a way to
> > reuse the existing API.
> A feature flag is needed anyway. I'm fine with reusing but guess I'd
> want to converge on the direction first.
>
> Thanks,
> -Siwei
> >
> > Thanks
> >
> >> Regards,
> >> -Siwei
> >>
> >>> Thanks
> >>>
> >>>> + *                             @vdev: vdpa device
> >>>> + *                             @asid: address space identifier
> >>>> + *                             Returns integer: success (0) or error (< 0)
> >>>>     * @get_vq_dma_dev:            Get the dma device for a specific
> >>>>     *                             virtqueue (optional)
> >>>>     *                             @vdev: vdpa device
> >>>> @@ -390,6 +396,7 @@ struct vdpa_config_ops {
> >>>>                          u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
> >>>>           int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
> >>>>                            u64 iova, u64 size);
> >>>> +       int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
> >>>>           int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
> >>>>                                 unsigned int asid);
> >>>>           struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
> >>>> --
> >>>> 1.8.3.1
> >>>>
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
  2023-08-17 15:28                                 ` Eugenio Perez Martin
@ 2023-08-21 22:31                                   ` Si-Wei Liu
  0 siblings, 0 replies; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-21 22:31 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Jason Wang, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/17/2023 8:28 AM, Eugenio Perez Martin wrote:
> On Thu, Aug 17, 2023 at 2:05 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/15/2023 6:55 PM, Jason Wang wrote:
>>> On Wed, Aug 16, 2023 at 3:49 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>
>>>> On 8/14/2023 7:21 PM, Jason Wang wrote:
>>>>> On Tue, Aug 15, 2023 at 9:46 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>> ---
>>>>>>     include/linux/vdpa.h | 7 +++++++
>>>>>>     1 file changed, 7 insertions(+)
>>>>>>
>>>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>>>>> index db1b0ea..3a3878d 100644
>>>>>> --- a/include/linux/vdpa.h
>>>>>> +++ b/include/linux/vdpa.h
>>>>>> @@ -314,6 +314,12 @@ struct vdpa_map_file {
>>>>>>      *                             @iova: iova to be unmapped
>>>>>>      *                             @size: size of the area
>>>>>>      *                             Returns integer: success (0) or error (< 0)
>>>>>> + * @reset_map:                 Reset device memory mapping (optional)
>>>>>> + *                             Needed for device that using device
>>>>>> + *                             specific DMA translation (on-chip IOMMU)
>>>>> This exposes the device internal to the upper layer which is not optimal.
>>>> Not sure what does it mean by "device internal", but this op callback
>>>> just follows existing convention to describe what vdpa parent this API
>>>> targets.
>>> I meant the bus tries to hide the differences among vendors. So it
>>> needs to hide on-chip IOMMU stuff to the upper layer.
>>>
>>> We can expose two dimensional IO mappings models but it looks like
>>> over engineering for this issue. More below.
>>>
>>>>     * @set_map:                    Set device memory mapping (optional)
>>>>     *                              Needed for device that using device
>>>>     *                              specific DMA translation (on-chip IOMMU)
>>>> :
>>>> :
>>>>     * @dma_map:                    Map an area of PA to IOVA (optional)
>>>>     *                              Needed for device that using device
>>>>     *                              specific DMA translation (on-chip IOMMU)
>>>>     *                              and preferring incremental map.
>>>> :
>>>> :
>>>>     * @dma_unmap:                  Unmap an area of IOVA (optional but
>>>>     *                              must be implemented with dma_map)
>>>>     *                              Needed for device that using device
>>>>     *                              specific DMA translation (on-chip IOMMU)
>>>>     *                              and preferring incremental unmap.
>>>>
>>>>
>>>>> Btw, what's the difference between this and a simple
>>>>>
>>>>> set_map(NULL)?
>>>> I don't think parent drivers support this today - they can accept
>>>> non-NULL iotlb containing empty map entry, but not a NULL iotlb. The
>>>> behavior is undefined or it even causes panic when a NULL iotlb is
>>>> passed in.
>>> We can do this simple change if it can work.
>> If we go with setting up 1:1 DMA mapping at virtio-vdpa .probe() and
>> tearing it down at .release(), perhaps set_map(NULL) is not sufficient.
>>>>    Further this doesn't work with .dma_map parent drivers.
>>> Probably, but I'd remove dma_map as it doesn't have any real users
>>> except for the simulator.
>> OK, at a point there was suggestion to get this incremental API extended
>> to support batching to be in par with or even replace .set_map, not sure
>> if it's too soon to conclude. But I'm okay with the removal if need be.
> Yes, I think the right move in the long run is to delegate the
> batching to the parent driver. This allows drivers like mlx to add
> memory (like hotplugged memory) without the need of tearing down all
> the old maps.
Nods.

>
> Having said that, maybe we can work on top if we need to remove
> .dma_map for now.
I guess for that sake I would keep .dma_map unless there's strong 
objection against it.

Thanks,
-Siwei

>
>>>> The reason why a new op is needed or better is because it allows
>>>> userspace to tell apart different reset behavior from the older kernel
>>>> (via the F_IOTLB_PERSIST feature bit in patch 4), while this behavior
>>>> could vary between parent drivers.
>>> I'm ok with a new feature flag, but we need to first seek a way to
>>> reuse the existing API.
>> A feature flag is needed anyway. I'm fine with reusing but guess I'd
>> want to converge on the direction first.
>>
>> Thanks,
>> -Siwei
>>> Thanks
>>>
>>>> Regards,
>>>> -Siwei
>>>>
>>>>> Thanks
>>>>>
>>>>>> + *                             @vdev: vdpa device
>>>>>> + *                             @asid: address space identifier
>>>>>> + *                             Returns integer: success (0) or error (< 0)
>>>>>>      * @get_vq_dma_dev:            Get the dma device for a specific
>>>>>>      *                             virtqueue (optional)
>>>>>>      *                             @vdev: vdpa device
>>>>>> @@ -390,6 +396,7 @@ struct vdpa_config_ops {
>>>>>>                           u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
>>>>>>            int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
>>>>>>                             u64 iova, u64 size);
>>>>>> +       int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
>>>>>>            int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
>>>>>>                                  unsigned int asid);
>>>>>>            struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
>>>>>> --
>>>>>> 1.8.3.1
>>>>>>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-16 23:43                               ` Si-Wei Liu
@ 2023-08-22  8:54                                 ` Jason Wang
  2023-08-28 23:46                                   ` Si-Wei Liu
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-22  8:54 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo

On Thu, Aug 17, 2023 at 7:44 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 8/15/2023 6:48 PM, Jason Wang wrote:
> > On Wed, Aug 16, 2023 at 6:31 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 8/14/2023 7:25 PM, Jason Wang wrote:
> >>> On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >>>> ---
> >>>>    drivers/vhost/vdpa.c             | 16 +++++++++++++++-
> >>>>    include/uapi/linux/vhost_types.h |  2 ++
> >>>>    2 files changed, 17 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >>>> index 62b0a01..75092a7 100644
> >>>> --- a/drivers/vhost/vdpa.c
> >>>> +++ b/drivers/vhost/vdpa.c
> >>>> @@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
> >>>>           return ops->resume;
> >>>>    }
> >>>>
> >>>> +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
> >>>> +{
> >>>> +       struct vdpa_device *vdpa = v->vdpa;
> >>>> +       const struct vdpa_config_ops *ops = vdpa->config;
> >>>> +
> >>>> +       return (!ops->set_map && !ops->dma_map) || ops->reset_map;
> >>> So this means the IOTLB/IOMMU mappings have already been decoupled
> >>> from the vdpa reset.
> >> Not in the sense of API, it' been coupled since day one from the
> >> implementations of every on-chip IOMMU parent driver, namely mlx5_vdpa
> >> and vdpa_sim. Because of that, later on the (improper) support for
> >> virtio-vdpa, from commit 6f5312f80183 ("vdpa/mlx5: Add support for
> >> running with virtio_vdpa") and 6c3d329e6486 ("vdpa_sim: get rid of DMA
> >> ops") misused the .reset() op to realize 1:1 mapping, rendering strong
> >> coupling between device reset and reset of iotlb mappings. This series
> >> try to rectify that implementation deficiency, while keep userspace
> >> continuing to work with older kernel behavior.
> >>
> >>>    So it should have been noticed by the userspace.
> >> Yes, userspace had noticed this no-chip IOMMU discrepancy since day one
> >> I suppose. Unfortunately there's already code in userspace with this
> >> assumption in mind that proactively tears down and sets up iotlb mapping
> >> around vdpa device reset...
> >>> I guess we can just fix the simulator and mlx5 then we are fine?
> >> Only IF we don't care about running new QEMU on older kernels with
> >> flawed on-chip iommu behavior around reset. But that's a big IF...
> > So what I meant is:
> >
> > Userspace doesn't know whether the vendor specific mappings (set_map)
> > are required or not. And in the implementation of vhost_vdpa, if
> > platform IOMMU is used, the mappings are decoupled from the reset. So
> > if the Qemu works with parents with platform IOMMU it means Qemu can
> > work if we just decouple vendor specific mappings from the parents
> > that uses set_map.
> I was aware of this, and if you may notice I don't even offer a way
> backward to retain/emulate the flawed vhost-iotlb reset behavior for
> older userspace - I consider it more of a bug in .set_map driver
> implementation of its own rather than what the vhost-vdpa iotlb
> abstraction wishes to expose to userspace in the first place.

That's my understanding as well.

>
> If you ever look into QEMU's vhost_vdpa_reset_status() function, you may
> see memory_listener_unregister() will be called to evict all of the
> existing iotlb mappings right after vhost_vdpa_reset_device() across
> device reset, and later on at vhost_vdpa_dev_start(),
> memory_listener_register() will set up all iotlb mappings again. In an
> ideal world without this on-chip iommu deficiency QEMU should not have
> to behave this way - this is what I mentioned earlier that userspace had
> already noticed the discrepancy and it has to "proactively tear down and
> set up iotlb mapping around vdpa device reset". Apparently from
> functionality perspective this trick works completely fine with platform
> IOMMU, however, it's sub-optimal in the performance perspective.

Right.

>
> We can't simply fix QEMU by moving this memory_listener_unregister()
> call out of the reset path unconditionally, as we don't want to break
> the already-functioning older kernel even though it's suboptimal in
> performance.

I'm not sure how things can be broken in this case? Or why it is
specific to parent with set_map.

> Instead, to keep new QEMU continuing to work on top of the
> existing or older kernels, QEMU has to check this IOTLB_PERSIST feature
> flag to decide whether it is safe not to bother flushing and setting up
> iotlb across reset. For the platform IOMMU case, vdpa parent driver
> won't implement either the .set_map or .dma_map op, so it should be
> covered in the vhost_vdpa_has_persistent_map() check I suppose.

Just to make sure we are at the same page.

From the userspace point of view, the IOTLB persists and vhost-vDPA
doesn't reset the IOTLB during vDPA reset. But we have are two levels
of the coupling in other places:

1) Qemu level: memory listener is coupled with DRIVER_OK/reset
2) vDPA parent level: mlx5 build/destroy MR during DRIVER_OK/reset

If I understand you correctly, since we've coupled in 1), Qemu can't
be aware of whether the mapping is coupled with reset or not? If we
simply decouple in 1), memory mappigns might be lost during vDPA rset.

Thanks

>
>
> Thanks,
> -Siwei
> > Thanks
> >
> >> Regards,
> >> -Siwei
> >>> Thanks
> >>>
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  2023-08-22  8:54                                 ` Jason Wang
@ 2023-08-28 23:46                                   ` Si-Wei Liu
  0 siblings, 0 replies; 42+ messages in thread
From: Si-Wei Liu @ 2023-08-28 23:46 UTC (permalink / raw)
  To: Jason Wang; +Cc: eperezma, gal, linux-kernel, mst, virtualization, xuanzhuo



On 8/22/2023 1:54 AM, Jason Wang wrote:
> On Thu, Aug 17, 2023 at 7:44 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 8/15/2023 6:48 PM, Jason Wang wrote:
>>> On Wed, Aug 16, 2023 at 6:31 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>
>>>> On 8/14/2023 7:25 PM, Jason Wang wrote:
>>>>> On Tue, Aug 15, 2023 at 9:45 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>> ---
>>>>>>     drivers/vhost/vdpa.c             | 16 +++++++++++++++-
>>>>>>     include/uapi/linux/vhost_types.h |  2 ++
>>>>>>     2 files changed, 17 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>>>> index 62b0a01..75092a7 100644
>>>>>> --- a/drivers/vhost/vdpa.c
>>>>>> +++ b/drivers/vhost/vdpa.c
>>>>>> @@ -406,6 +406,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
>>>>>>            return ops->resume;
>>>>>>     }
>>>>>>
>>>>>> +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
>>>>>> +{
>>>>>> +       struct vdpa_device *vdpa = v->vdpa;
>>>>>> +       const struct vdpa_config_ops *ops = vdpa->config;
>>>>>> +
>>>>>> +       return (!ops->set_map && !ops->dma_map) || ops->reset_map;
>>>>> So this means the IOTLB/IOMMU mappings have already been decoupled
>>>>> from the vdpa reset.
>>>> Not in the sense of API, it' been coupled since day one from the
>>>> implementations of every on-chip IOMMU parent driver, namely mlx5_vdpa
>>>> and vdpa_sim. Because of that, later on the (improper) support for
>>>> virtio-vdpa, from commit 6f5312f80183 ("vdpa/mlx5: Add support for
>>>> running with virtio_vdpa") and 6c3d329e6486 ("vdpa_sim: get rid of DMA
>>>> ops") misused the .reset() op to realize 1:1 mapping, rendering strong
>>>> coupling between device reset and reset of iotlb mappings. This series
>>>> try to rectify that implementation deficiency, while keep userspace
>>>> continuing to work with older kernel behavior.
>>>>
>>>>>     So it should have been noticed by the userspace.
>>>> Yes, userspace had noticed this no-chip IOMMU discrepancy since day one
>>>> I suppose. Unfortunately there's already code in userspace with this
>>>> assumption in mind that proactively tears down and sets up iotlb mapping
>>>> around vdpa device reset...
>>>>> I guess we can just fix the simulator and mlx5 then we are fine?
>>>> Only IF we don't care about running new QEMU on older kernels with
>>>> flawed on-chip iommu behavior around reset. But that's a big IF...
>>> So what I meant is:
>>>
>>> Userspace doesn't know whether the vendor specific mappings (set_map)
>>> are required or not. And in the implementation of vhost_vdpa, if
>>> platform IOMMU is used, the mappings are decoupled from the reset. So
>>> if the Qemu works with parents with platform IOMMU it means Qemu can
>>> work if we just decouple vendor specific mappings from the parents
>>> that uses set_map.
>> I was aware of this, and if you may notice I don't even offer a way
>> backward to retain/emulate the flawed vhost-iotlb reset behavior for
>> older userspace - I consider it more of a bug in .set_map driver
>> implementation of its own rather than what the vhost-vdpa iotlb
>> abstraction wishes to expose to userspace in the first place.
> That's my understanding as well.
>
>> If you ever look into QEMU's vhost_vdpa_reset_status() function, you may
>> see memory_listener_unregister() will be called to evict all of the
>> existing iotlb mappings right after vhost_vdpa_reset_device() across
>> device reset, and later on at vhost_vdpa_dev_start(),
>> memory_listener_register() will set up all iotlb mappings again. In an
>> ideal world without this on-chip iommu deficiency QEMU should not have
>> to behave this way - this is what I mentioned earlier that userspace had
>> already noticed the discrepancy and it has to "proactively tear down and
>> set up iotlb mapping around vdpa device reset". Apparently from
>> functionality perspective this trick works completely fine with platform
>> IOMMU, however, it's sub-optimal in the performance perspective.
> Right.
>
>> We can't simply fix QEMU by moving this memory_listener_unregister()
>> call out of the reset path unconditionally, as we don't want to break
>> the already-functioning older kernel even though it's suboptimal in
>> performance.
> I'm not sure how things can be broken in this case?
Things won't be broken if we don't care about performance, for example 
reboot a large memory VM (translated to device reset internally) will 
freeze the guest and introduce extra reboot delay unnecessarily. If we 
want to fix the performance by remove memory_listener_unregister() 
unconditionally and we don't have such a flag to distinguish, we will 
break network connectivity entirely after reset - as all mappings are 
purged during reset on older parent driver.

>   Or why it is specific to parent with set_map.
As if without the .reset_map op and corresponding driver implementation 
(in correct way), there's no appropriate means for on-chip iommu parent 
driver to persist iotlb mappings across reset, isn't it? If the driver 
deliberately removes it from .reset, they don't support 1:1 DMA mapping 
for virtio-vdpa on the other hand, for instance.

>
>> Instead, to keep new QEMU continuing to work on top of the
>> existing or older kernels, QEMU has to check this IOTLB_PERSIST feature
>> flag to decide whether it is safe not to bother flushing and setting up
>> iotlb across reset. For the platform IOMMU case, vdpa parent driver
>> won't implement either the .set_map or .dma_map op, so it should be
>> covered in the vhost_vdpa_has_persistent_map() check I suppose.
> Just to make sure we are at the same page.
>
>  From the userspace point of view, the IOTLB persists and vhost-vDPA
> doesn't reset the IOTLB during vDPA reset. But we have are two levels
> of the coupling in other places:
>
> 1) Qemu level: memory listener is coupled with DRIVER_OK/reset
> 2) vDPA parent level: mlx5 build/destroy MR during DRIVER_OK/reset
>
> If I understand you correctly, since we've coupled in 1), Qemu can't
> be aware of whether the mapping is coupled with reset or not?
I suspect it had been already noticed by someone who wrote this QEMU 
code since day one, just that there's lack of a comment documenting it. 
Or any other reason why QEMU had to decouple it in the first place? It 
affects performance across the board for platform IOMMU vdpa providers 
as well.

>   If we
> simply decouple in 1), memory mappigns might be lost during vDPA rset.
I would tend to say 1) is an inadvertent artifact or side effect of 2), 
as I do not see memory listeners are used like this in other QEMU 
subsystems, e.g. vhost, vfio. Consider this coupling in 1) had been in 
the play since day one with neither advanced vDPA features like SVQ nor 
equivalent deficiency in platform IOMMU vdpa providers, it's suspicious 
that mlx5 build/destroy MR during reset was the curlprit then.

Regards,
-Siwei

>
> Thanks
>
>>
>> Thanks,
>> -Siwei
>>> Thanks
>>>
>>>> Regards,
>>>> -Siwei
>>>>> Thanks
>>>>>


^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2023-08-28 23:47 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02 17:12 [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Dragos Tatulea
2023-08-02 17:12 ` [PATCH 1/2] vdpa/mlx5: Fix mr->initialized semantics Dragos Tatulea
2023-08-03  8:03   ` Jason Wang
2023-08-03 11:40     ` Dragos Tatulea
2023-08-08  2:57       ` Jason Wang
2023-08-08  7:24         ` Dragos Tatulea
2023-08-09  1:42           ` Jason Wang
2023-08-14 14:15             ` Dragos Tatulea
2023-08-15  1:28               ` Jason Wang
2023-08-03 17:57     ` Si-Wei Liu
2023-08-08  3:00       ` Jason Wang
2023-08-08 22:58         ` Si-Wei Liu
2023-08-09  6:52           ` Jason Wang
2023-08-10  0:40             ` Si-Wei Liu
2023-08-10  3:10               ` Jason Wang
2023-08-10 22:20                 ` Si-Wei Liu
2023-08-14  2:59                   ` Jason Wang
2023-08-15  1:43                     ` [PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset Si-Wei Liu
2023-08-15  1:43                       ` [PATCH RFC 1/4] vdpa: introduce .reset_map operation callback Si-Wei Liu
2023-08-15  2:21                         ` Jason Wang
2023-08-15 19:49                           ` Si-Wei Liu
2023-08-16  1:55                             ` Jason Wang
2023-08-17  0:05                               ` Si-Wei Liu
2023-08-17 15:28                                 ` Eugenio Perez Martin
2023-08-21 22:31                                   ` Si-Wei Liu
2023-08-15  1:43                       ` [PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op Si-Wei Liu
2023-08-15  8:26                         ` Dragos Tatulea
2023-08-15 23:11                           ` Si-Wei Liu
2023-08-15  1:43                       ` [PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver Si-Wei Liu
2023-08-15  2:32                         ` Jason Wang
2023-08-15 23:09                           ` Si-Wei Liu
2023-08-15  1:43                       ` [PATCH RFC 4/4] vhost-vdpa: introduce IOTLB_PERSIST backend feature bit Si-Wei Liu
2023-08-15  2:25                         ` Jason Wang
2023-08-15 22:30                           ` Si-Wei Liu
2023-08-16  1:48                             ` Jason Wang
2023-08-16 23:43                               ` Si-Wei Liu
2023-08-22  8:54                                 ` Jason Wang
2023-08-28 23:46                                   ` Si-Wei Liu
2023-08-02 17:12 ` [PATCH 2/2] vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary Dragos Tatulea
2023-08-10  8:54 ` [PATCH 0/2] vdpa/mlx5: Fixes for ASID handling Michael S. Tsirkin
2023-08-10  8:59   ` Jason Wang
2023-08-10  9:04   ` Dragos Tatulea

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).