* Re: [PATCH v2 2/6] vdpa/mlx5: function prototype modifications in preparation to control VQ
[not found] ` <20210817060250.188705-3-elic@nvidia.com>
@ 2021-08-19 3:32 ` Jason Wang
0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2021-08-19 3:32 UTC (permalink / raw)
To: Eli Cohen, mst, virtualization; +Cc: eperezma
在 2021/8/17 下午2:02, Eli Cohen 写道:
> Use struct mlx5_vdpa_dev as an argument to setup_driver() and a few
> others in preparation to control virtqueue support in a subsequent
> patch. The control virtqueue is part of struct mlx5_vdpa_dev so this is
> required.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
> ---
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 40 ++++++++++++++++---------------
> 1 file changed, 21 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index b1230fa2f5d1..a1cf58a53d42 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -155,7 +155,7 @@ struct mlx5_vdpa_net {
>
> static void free_resources(struct mlx5_vdpa_net *ndev);
> static void init_mvqs(struct mlx5_vdpa_net *ndev);
> -static int setup_driver(struct mlx5_vdpa_net *ndev);
> +static int setup_driver(struct mlx5_vdpa_dev *mvdev);
> static void teardown_driver(struct mlx5_vdpa_net *ndev);
>
> static bool mlx5_vdpa_debug;
> @@ -1508,12 +1508,13 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
> return 0;
> }
>
> -static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
> +static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> {
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> int err;
> int i;
>
> - for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
> + for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
> err = setup_vq(ndev, &ndev->vqs[i]);
> if (err)
> goto err_vq;
> @@ -1672,8 +1673,9 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
> }
> }
>
> -static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
> +static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> {
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> int err;
>
> suspend_vqs(ndev);
> @@ -1682,58 +1684,59 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *
> goto err_mr;
>
> teardown_driver(ndev);
> - mlx5_vdpa_destroy_mr(&ndev->mvdev);
> - err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
> + mlx5_vdpa_destroy_mr(mvdev);
> + err = mlx5_vdpa_create_mr(mvdev, iotlb);
> if (err)
> goto err_mr;
>
> - if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> return 0;
>
> restore_channels_info(ndev);
> - err = setup_driver(ndev);
> + err = setup_driver(mvdev);
> if (err)
> goto err_setup;
>
> return 0;
>
> err_setup:
> - mlx5_vdpa_destroy_mr(&ndev->mvdev);
> + mlx5_vdpa_destroy_mr(mvdev);
> err_mr:
> return err;
> }
>
> -static int setup_driver(struct mlx5_vdpa_net *ndev)
> +static int setup_driver(struct mlx5_vdpa_dev *mvdev)
> {
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> int err;
>
> mutex_lock(&ndev->reslock);
> if (ndev->setup) {
> - mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
> + mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
> err = 0;
> goto out;
> }
> - err = setup_virtqueues(ndev);
> + err = setup_virtqueues(mvdev);
> if (err) {
> - mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
> + mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
> goto out;
> }
>
> err = create_rqt(ndev);
> if (err) {
> - mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
> + mlx5_vdpa_warn(mvdev, "create_rqt\n");
> goto err_rqt;
> }
>
> err = create_tir(ndev);
> if (err) {
> - mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
> + mlx5_vdpa_warn(mvdev, "create_tir\n");
> goto err_tir;
> }
>
> err = add_fwd_to_tir(ndev);
> if (err) {
> - mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
> + mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
> goto err_fwd;
> }
> ndev->setup = true;
> @@ -1799,7 +1802,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>
> if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
> if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
> - err = setup_driver(ndev);
> + err = setup_driver(mvdev);
> if (err) {
> mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> goto err_setup;
> @@ -1849,7 +1852,6 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
> static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> - struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> bool change_map;
> int err;
>
> @@ -1860,7 +1862,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb
> }
>
> if (change_map)
> - return mlx5_vdpa_change_map(ndev, iotlb);
> + return mlx5_vdpa_change_map(mvdev, iotlb);
>
> return 0;
> }
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 3/6] vdpa/mlx5: Decouple virtqueue callback from struct mlx5_vdpa_virtqueue
[not found] ` <20210817060250.188705-4-elic@nvidia.com>
@ 2021-08-19 3:37 ` Jason Wang
0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2021-08-19 3:37 UTC (permalink / raw)
To: Eli Cohen, mst, virtualization; +Cc: eperezma
在 2021/8/17 下午2:02, Eli Cohen 写道:
> Instead, define an array of struct vdpa_callback on struct mlx5_vdpa_net
> and use it to store callbacks for any virtqueue provided. This is
> required due to the fact that callback configurations arrive before feature
> negotiation. With control VQ and multiqueue introduced next we want to
> save the information until after feature negotiation where we know the
> CVQ index.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
A question: Do we need to reset the cb during device reset?
Thanks
> ---
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 16 ++++++++--------
> 1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index a1cf58a53d42..222ddfbde116 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -90,7 +90,6 @@ struct mlx5_vq_restore_info {
> u16 avail_index;
> u16 used_index;
> bool ready;
> - struct vdpa_callback cb;
> bool restore;
> };
>
> @@ -100,7 +99,6 @@ struct mlx5_vdpa_virtqueue {
> u64 device_addr;
> u64 driver_addr;
> u32 num_ent;
> - struct vdpa_callback event_cb;
>
> /* Resources for implementing the notification channel from the device
> * to the driver. fwqp is the firmware end of an RC connection; the
> @@ -140,6 +138,7 @@ struct mlx5_vdpa_net {
> struct mlx5_vdpa_net_resources res;
> struct virtio_net_config config;
> struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> + struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>
> /* Serialize vq resources creation and destruction. This is required
> * since memory map might change and we need to destroy and create
> @@ -481,6 +480,10 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
>
> static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
> {
> + struct mlx5_vdpa_net *ndev = mvq->ndev;
> + struct vdpa_callback *event_cb;
> +
> + event_cb = &ndev->event_cbs[mvq->index];
> mlx5_cq_set_ci(&mvq->cq.mcq);
>
> /* make sure CQ cosumer update is visible to the hardware before updating
> @@ -488,8 +491,8 @@ static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int nu
> */
> dma_wmb();
> rx_post(&mvq->vqqp, num);
> - if (mvq->event_cb.callback)
> - mvq->event_cb.callback(mvq->event_cb.private);
> + if (event_cb->callback)
> + event_cb->callback(event_cb->private);
> }
>
> static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
> @@ -1385,9 +1388,8 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
>
> - vq->event_cb = *cb;
> + ndev->event_cbs[idx] = *cb;
> }
>
> static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
> @@ -1624,7 +1626,6 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> ri->desc_addr = mvq->desc_addr;
> ri->device_addr = mvq->device_addr;
> ri->driver_addr = mvq->driver_addr;
> - ri->cb = mvq->event_cb;
> ri->restore = true;
> return 0;
> }
> @@ -1669,7 +1670,6 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
> mvq->desc_addr = ri->desc_addr;
> mvq->device_addr = ri->device_addr;
> mvq->driver_addr = ri->driver_addr;
> - mvq->event_cb = ri->cb;
> }
> }
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 4/6] vdpa/mlx5: Ensure valid indices are provided
[not found] ` <20210817060250.188705-5-elic@nvidia.com>
@ 2021-08-19 3:40 ` Jason Wang
0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2021-08-19 3:40 UTC (permalink / raw)
To: Eli Cohen, mst, virtualization; +Cc: eperezma
在 2021/8/17 下午2:02, Eli Cohen 写道:
> Following patches add control virtuqeue and multiqueue support. We want
> to verify that the index value to callbacks referencing a virtqueue is
> valid.
>
> The logic defining valid indices is as follows:
> CVQ clear: 0 and 1.
> CVQ set, MQ clear: 0, 1 and 2
> CVQ set, MQ set: 0..nvq where nvq is whatever provided to
> _vdpa_register_device()
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 48 ++++++++++++++++++++++++++++++
> 2 files changed, 49 insertions(+)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 8d0a6f2cb3f0..41b20855ed31 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -56,6 +56,7 @@ struct mlx5_vdpa_dev {
> u64 actual_features;
> u8 status;
> u32 max_vqs;
> + u16 max_idx;
> u32 generation;
>
> struct mlx5_vdpa_mr mr;
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 222ddfbde116..0fe7cd370e4b 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -45,6 +45,8 @@ MODULE_LICENSE("Dual BSD/GPL");
> (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
> VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
>
> +#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
> +
> struct mlx5_vdpa_net_resources {
> u32 tisn;
> u32 tdn;
> @@ -133,6 +135,14 @@ struct mlx5_vdpa_virtqueue {
> */
> #define MLX5_MAX_SUPPORTED_VQS 16
>
> +static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> +{
> + if (unlikely(idx > mvdev->max_idx))
> + return false;
> +
> + return true;
> +}
> +
> struct mlx5_vdpa_net {
> struct mlx5_vdpa_dev mvdev;
> struct mlx5_vdpa_net_resources res;
> @@ -1355,6 +1365,9 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>
> + if (!is_index_valid(mvdev, idx))
> + return;
> +
> if (unlikely(!mvq->ready))
> return;
>
> @@ -1368,6 +1381,9 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>
> + if (!is_index_valid(mvdev, idx))
> + return -EINVAL;
> +
> mvq->desc_addr = desc_area;
> mvq->device_addr = device_area;
> mvq->driver_addr = driver_area;
> @@ -1380,6 +1396,9 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq;
>
> + if (!is_index_valid(mvdev, idx))
> + return;
> +
> mvq = &ndev->vqs[idx];
> mvq->num_ent = num;
> }
> @@ -1398,6 +1417,9 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>
> + if (!is_index_valid(mvdev, idx))
> + return;
> +
> if (!ready)
> suspend_vq(ndev, mvq);
>
> @@ -1410,6 +1432,9 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>
> + if (!is_index_valid(mvdev, idx))
> + return false;
> +
> return mvq->ready;
> }
>
> @@ -1420,6 +1445,9 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>
> + if (!is_index_valid(mvdev, idx))
> + return -EINVAL;
> +
> if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
> mlx5_vdpa_warn(mvdev, "can't modify available index\n");
> return -EINVAL;
> @@ -1438,6 +1466,9 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
> struct mlx5_virtq_attr attr;
> int err;
>
> + if (!is_index_valid(mvdev, idx))
> + return -EINVAL;
> +
> /* If the virtq object was destroyed, use the value saved at
> * the last minute of suspend_vq. This caters for userspace
> * that cares about emulating the index after vq is stopped.
> @@ -1557,6 +1588,18 @@ static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
> return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
> }
>
> +static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
> +{
> + if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
> + if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
> + mvdev->max_idx = mvdev->max_vqs;
> + else
> + mvdev->max_idx = 2;
> + } else {
> + mvdev->max_idx = 1;
> + }
> +}
Nit: it might be better to add a comment to explain the logic here.
E.g we know index 0 and 1 should always valid.
Other than this:
Acked-by: Jason Wang <jasowang@redhat.com>
> +
> static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -1572,6 +1615,7 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
> ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
> ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
> ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
> + update_cvq_info(mvdev);
> return err;
> }
>
> @@ -1792,6 +1836,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> mlx5_vdpa_destroy_mr(&ndev->mvdev);
> ndev->mvdev.status = 0;
> ndev->mvdev.mlx_features = 0;
> + ndev->mvdev.actual_features = 0;
> ++mvdev->generation;
> if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> if (mlx5_vdpa_create_mr(mvdev, NULL))
> @@ -1892,6 +1937,9 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
> struct mlx5_vdpa_net *ndev;
> phys_addr_t addr;
>
> + if (!is_index_valid(mvdev, idx))
> + return ret;
> +
> /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
> * notification to avoid the risk of mapping pages that contain BAR of more
> * than one SF
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting
[not found] ` <20210817060250.188705-6-elic@nvidia.com>
@ 2021-08-19 4:04 ` Jason Wang
[not found] ` <20210819060641.GA161591@mtl-vdi-166.wap.labs.mlnx>
0 siblings, 1 reply; 10+ messages in thread
From: Jason Wang @ 2021-08-19 4:04 UTC (permalink / raw)
To: Eli Cohen, mst, virtualization; +Cc: eperezma
在 2021/8/17 下午2:02, Eli Cohen 写道:
> Add support to handle control virtqueue configurations per virtio
> specification. The control virtqueue is implemented in software and no
> hardware offloading is involved.
>
> Control VQ configuration need task context, therefore all configurations
> are handled in a workqueue created for the purpose.
I think all the current callers are already in the the task context (the
caller of virtnet_send_command()).
Any reason for using workqueue here?
I'm not sure if it can work well on UP where the workqueue might not
have a chance to be scheduled (we are doing busy waiting here):
/* Spin for a response, the kick causes an ioport write, trapping
* into the hypervisor, so the request should be handled
immediately.
*/
while (!virtqueue_get_buf(vi->cvq, &tmp) &&
!virtqueue_is_broken(vi->cvq))
cpu_relax();
>
> Modifications are made to the memory registration code to allow for
> saving a copy of itolb to be used by the control VQ to access the vring.
>
> The max number of data virtqueus supported by the driver has been
> updated to 2 since multiqueue is not supported at this stage and we need
> to ensure consistency of VQ indices mapping to either data or control
> VQ.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 23 +++
> drivers/vdpa/mlx5/core/mr.c | 81 +++++++---
> drivers/vdpa/mlx5/core/resources.c | 31 ++++
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 231 +++++++++++++++++++++++++++--
> 4 files changed, 334 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 41b20855ed31..6c43476a69cb 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -5,6 +5,7 @@
> #define __MLX5_VDPA_H__
>
> #include <linux/etherdevice.h>
> +#include <linux/vringh.h>
> #include <linux/vdpa.h>
> #include <linux/mlx5/driver.h>
>
> @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources {
> bool valid;
> };
>
> +struct mlx5_control_vq {
> + struct vhost_iotlb *iotlb;
> + /* spinlock to synchronize iommu table */
> + spinlock_t iommu_lock;
> + struct vringh vring;
> + bool ready;
> + u64 desc_addr;
> + u64 device_addr;
> + u64 driver_addr;
> + struct vdpa_callback event_cb;
> + struct vringh_kiov riov;
> + struct vringh_kiov wiov;
> + unsigned short head;
> +};
> +
> +struct mlx5_ctrl_wq_ent {
> + struct work_struct work;
> + struct mlx5_vdpa_dev *mvdev;
> +};
> +
> struct mlx5_vdpa_dev {
> struct vdpa_device vdev;
> struct mlx5_core_dev *mdev;
> @@ -60,6 +81,8 @@ struct mlx5_vdpa_dev {
> u32 generation;
>
> struct mlx5_vdpa_mr mr;
> + struct mlx5_control_vq cvq;
> + struct workqueue_struct *wq;
> };
>
> int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> index e59135fa867e..da013b8082bc 100644
> --- a/drivers/vdpa/mlx5/core/mr.c
> +++ b/drivers/vdpa/mlx5/core/mr.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> /* Copyright (c) 2020 Mellanox Technologies Ltd. */
>
> +#include <linux/vhost_types.h>
> #include <linux/vdpa.h>
> #include <linux/gcd.h>
> #include <linux/string.h>
> @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
> }
>
> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
> {
> - struct mlx5_vdpa_mr *mr = &mvdev->mr;
> + struct vhost_iotlb_map *map;
> + u64 start = 0ULL, last = 0ULL - 1;
> int err;
>
> - if (mr->initialized)
> - return 0;
> -
> - if (iotlb)
> - err = create_user_mr(mvdev, iotlb);
> - else
> - err = create_dma_mr(mvdev, mr);
> -
> - if (!err)
> - mr->initialized = true;
> + if (!src) {
> + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
> + return err;
> + }
>
> - return err;
> + for (map = vhost_iotlb_itree_first(src, start, last); map;
> + map = vhost_iotlb_itree_next(map, start, last)) {
> + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
> + map->addr, map->perm);
> + if (err)
> + return err;
> + }
> + return 0;
> }
>
> -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
> {
> - int err;
> -
> - mutex_lock(&mvdev->mr.mkey_mtx);
> - err = _mlx5_vdpa_create_mr(mvdev, iotlb);
> - mutex_unlock(&mvdev->mr.mkey_mtx);
> - return err;
> + vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1);
It's better to use ULLONG_MAX.
> }
>
> static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> if (!mr->initialized)
> goto out;
>
> + prune_iotlb(mvdev);
> if (mr->user_mr)
> destroy_user_mr(mvdev, mr);
> else
> @@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> mutex_unlock(&mr->mkey_mtx);
> }
>
> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +{
> + struct mlx5_vdpa_mr *mr = &mvdev->mr;
> + int err;
> +
> + if (mr->initialized)
> + return 0;
> +
> + if (iotlb)
> + err = create_user_mr(mvdev, iotlb);
> + else
> + err = create_dma_mr(mvdev, mr);
> +
> + if (err)
> + return err;
> +
> + err = dup_iotlb(mvdev, iotlb);
> + if (err)
> + goto out_err;
> +
> + mr->initialized = true;
> + return 0;
> +
> +out_err:
> + if (iotlb)
> + destroy_user_mr(mvdev, mr);
> + else
> + destroy_dma_mr(mvdev, mr);
> +
> + return err;
> +}
> +
> +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> +{
> + int err;
> +
> + mutex_lock(&mvdev->mr.mkey_mtx);
> + err = _mlx5_vdpa_create_mr(mvdev, iotlb);
> + mutex_unlock(&mvdev->mr.mkey_mtx);
> + return err;
> +}
> +
> int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
> bool *change_map)
> {
> diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
> index d4606213f88a..d24ae1a85159 100644
> --- a/drivers/vdpa/mlx5/core/resources.c
> +++ b/drivers/vdpa/mlx5/core/resources.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> /* Copyright (c) 2020 Mellanox Technologies Ltd. */
>
> +#include <linux/iova.h>
> #include <linux/mlx5/driver.h>
> #include "mlx5_vdpa.h"
>
> @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
> return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
> }
>
> +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
> +{
> + int err;
> +
> + mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
> + if (!mvdev->cvq.iotlb)
> + return -ENOMEM;
> +
> + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
> + err = iova_cache_get();
Any reason for using iova cache here?
> + if (err)
> + vhost_iotlb_free(mvdev->cvq.iotlb);
> +
> + return err;
> +}
> +
> +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
> +{
> + iova_cache_put();
> + vhost_iotlb_free(mvdev->cvq.iotlb);
> +}
> +
> int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
> {
> u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
> @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
> err = -ENOMEM;
> goto err_key;
> }
> +
> + err = init_ctrl_vq(mvdev);
> + if (err)
> + goto err_ctrl;
> +
> res->valid = true;
>
> return 0;
>
> +err_ctrl:
> + iounmap(res->kick_addr);
> err_key:
> dealloc_pd(mvdev, res->pdn, res->uid);
> err_pd:
> @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
> if (!res->valid)
> return;
>
> + cleanup_ctrl_vq(mvdev);
> iounmap(res->kick_addr);
> res->kick_addr = NULL;
> dealloc_pd(mvdev, res->pdn, res->uid);
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 0fe7cd370e4b..e18665781135 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
> /* We will remove this limitation once mlx5_vdpa_alloc_resources()
> * provides for driver space allocation
> */
> -#define MLX5_MAX_SUPPORTED_VQS 16
> +#define MLX5_MAX_SUPPORTED_VQS 2
>
> static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> {
> @@ -160,6 +160,7 @@ struct mlx5_vdpa_net {
> struct mlx5_flow_handle *rx_rule;
> bool setup;
> u16 mtu;
> + u32 cur_num_vqs;
> };
>
> static void free_resources(struct mlx5_vdpa_net *ndev);
> @@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev);
>
> static bool mlx5_vdpa_debug;
>
> +#define MLX5_CVQ_MAX_ENT 16
> +
> #define MLX5_LOG_VIO_FLAG(_feature) \
> do { \
> if (features & BIT_ULL(_feature)) \
> @@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
> return max_vqs / 2;
> }
>
> +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
> +{
> + return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
> +}
> +
> +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
> +{
> + return idx == ctrl_vq_idx(mvdev);
> +}
> +
> static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
> {
> if (status & ~VALID_STATUS_MASK)
> @@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
> ndev->rx_rule = NULL;
> }
>
> +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
> +{
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + struct mlx5_control_vq *cvq = &mvdev->cvq;
> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> + struct mlx5_core_dev *pfmdev;
> + size_t read;
> + u8 mac[ETH_ALEN];
> +
> + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
> + switch (cmd) {
> + case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
> + if (read != ETH_ALEN)
> + break;
> +
> + if (!memcmp(ndev->config.mac, mac, 6)) {
> + status = VIRTIO_NET_OK;
> + break;
> + }
> +
> + if (!is_zero_ether_addr(ndev->config.mac)) {
> + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
> + mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
> + ndev->config.mac);
> + break;
> + }
> + }
> +
> + if (mlx5_mpfs_add_mac(pfmdev, mac)) {
> + mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
> + mac);
> + break;
> + }
> +
> + memcpy(ndev->config.mac, mac, ETH_ALEN);
> + status = VIRTIO_NET_OK;
> + break;
> +
> + default:
> + break;
> + }
> +
> + return status;
> +}
> +
> +static void mlx5_cvq_kick_handler(struct work_struct *work)
> +{
> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> + struct virtio_net_ctrl_hdr ctrl;
> + struct mlx5_ctrl_wq_ent *wqent;
> + struct mlx5_vdpa_dev *mvdev;
> + struct mlx5_control_vq *cvq;
> + struct mlx5_vdpa_net *ndev;
> + size_t read, write;
> + int err;
> +
> + wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
> + mvdev = wqent->mvdev;
> + ndev = to_mlx5_vdpa_ndev(mvdev);
> + cvq = &mvdev->cvq;
> + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> + goto out;
> +
> + if (!cvq->ready)
> + goto out;
> +
> + while (true) {
> + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
> + GFP_ATOMIC);
> + if (err <= 0)
> + break;
> +
> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
> + if (read != sizeof(ctrl))
> + break;
> +
> + switch (ctrl.class) {
> + case VIRTIO_NET_CTRL_MAC:
> + status = handle_ctrl_mac(mvdev, ctrl.cmd);
> + break;
> +
> + default:
> + break;
> + }
> +
> + /* Make sure data is written before advancing index */
> + smp_wmb();
> +
> + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
> + vringh_complete_iotlb(&cvq->vring, cvq->head, write);
> + vringh_kiov_cleanup(&cvq->riov);
> + vringh_kiov_cleanup(&cvq->wiov);
> +
> + if (vringh_need_notify_iotlb(&cvq->vring))
> + vringh_notify(&cvq->vring);
> + }
> +out:
> + kfree(wqent);
> +}
> +
> static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> + struct mlx5_vdpa_virtqueue *mvq;
> + struct mlx5_ctrl_wq_ent *wqent;
>
> if (!is_index_valid(mvdev, idx))
> return;
>
> + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
> + if (!mvdev->cvq.ready)
> + return;
> +
> + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
> + if (!wqent)
> + return;
> +
> + wqent->mvdev = mvdev;
> + INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
> + queue_work(mvdev->wq, &wqent->work);
> + return;
> + }
> +
> + mvq = &ndev->vqs[idx];
> if (unlikely(!mvq->ready))
> return;
>
> @@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> + struct mlx5_vdpa_virtqueue *mvq;
>
> if (!is_index_valid(mvdev, idx))
> return -EINVAL;
>
> + if (is_ctrl_vq_idx(mvdev, idx)) {
> + mvdev->cvq.desc_addr = desc_area;
> + mvdev->cvq.device_addr = device_area;
> + mvdev->cvq.driver_addr = driver_area;
> + return 0;
> + }
> +
> + mvq = &ndev->vqs[idx];
> mvq->desc_addr = desc_area;
> mvq->device_addr = device_area;
> mvq->driver_addr = driver_area;
> @@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct mlx5_vdpa_virtqueue *mvq;
>
> - if (!is_index_valid(mvdev, idx))
> + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
> return;
>
> mvq = &ndev->vqs[idx];
> @@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
> ndev->event_cbs[idx] = *cb;
> }
>
> +static void mlx5_cvq_notify(struct vringh *vring)
> +{
> + struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
> +
> + if (!cvq->event_cb.callback)
> + return;
> +
> + cvq->event_cb.callback(cvq->event_cb.private);
> +}
> +
> +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
> +{
> + struct mlx5_control_vq *cvq = &mvdev->cvq;
> +
> + cvq->ready = ready;
> + if (!ready)
> + return;
> +
> + cvq->vring.notify = mlx5_cvq_notify;
> +}
> +
> static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> + struct mlx5_vdpa_virtqueue *mvq;
>
> if (!is_index_valid(mvdev, idx))
> return;
>
> + if (is_ctrl_vq_idx(mvdev, idx)) {
> + set_cvq_ready(mvdev, ready);
> + return;
> + }
> +
> + mvq = &ndev->vqs[idx];
> if (!ready)
> suspend_vq(ndev, mvq);
>
> @@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>
> if (!is_index_valid(mvdev, idx))
> return false;
>
> - return mvq->ready;
> + if (is_ctrl_vq_idx(mvdev, idx))
> + return mvdev->cvq.ready;
> +
> + return ndev->vqs[idx].ready;
> }
>
> static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> @@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> + struct mlx5_vdpa_virtqueue *mvq;
>
> if (!is_index_valid(mvdev, idx))
> return -EINVAL;
>
> + if (is_ctrl_vq_idx(mvdev, idx)) {
> + mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
Question, is packed virtqueue supported by current mlx5e?
If no, this is fine.
If yes, we should disable packed and re-enable it after vringh supports
packed virtqueue.
Other looks good.
Thanks
> + return 0;
> + }
> +
> + mvq = &ndev->vqs[idx];
> if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
> mlx5_vdpa_warn(mvdev, "can't modify available index\n");
> return -EINVAL;
> @@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> + struct mlx5_vdpa_virtqueue *mvq;
> struct mlx5_virtq_attr attr;
> int err;
>
> if (!is_index_valid(mvdev, idx))
> return -EINVAL;
>
> + if (is_ctrl_vq_idx(mvdev, idx)) {
> + state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
> + return 0;
> + }
> +
> + mvq = &ndev->vqs[idx];
> /* If the virtq object was destroyed, use the value saved at
> * the last minute of suspend_vq. This caters for userspace
> * that cares about emulating the index after vq is stopped.
> @@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
> u16 dev_features;
>
> dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
> - ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
> + ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
> if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> +
> print_features(mvdev, ndev->mvdev.mlx_features, false);
> return ndev->mvdev.mlx_features;
> }
> @@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
> static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> {
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + struct mlx5_control_vq *cvq = &mvdev->cvq;
> int err;
> int i;
>
> @@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> goto err_vq;
> }
>
> + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> + MLX5_CVQ_MAX_ENT, false,
> + (struct vring_desc *)(uintptr_t)cvq->desc_addr,
> + (struct vring_avail *)(uintptr_t)cvq->driver_addr,
> + (struct vring_used *)(uintptr_t)cvq->device_addr);
> + if (err)
> + goto err_vq;
> + }
> +
> return 0;
>
> err_vq:
> @@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
> struct mlx5_vdpa_net *ndev;
> phys_addr_t addr;
>
> - if (!is_index_valid(mvdev, idx))
> + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
> return ret;
>
> /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
> @@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> err = mlx5_mpfs_add_mac(pfmdev, config->mac);
> if (err)
> goto err_mtu;
> +
> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
> }
>
> + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
> mvdev->vdev.dma_dev = &mdev->pdev->dev;
> err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
> if (err)
> @@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> if (err)
> goto err_mr;
>
> + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
> + if (!mvdev->wq) {
> + err = -ENOMEM;
> + goto err_res2;
> + }
> +
> + ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
> mvdev->vdev.mdev = &mgtdev->mgtdev;
> - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
> + err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
> if (err)
> goto err_reg;
>
> @@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> return 0;
>
> err_reg:
> + destroy_workqueue(mvdev->wq);
> +err_res2:
> free_resources(ndev);
> err_mr:
> mlx5_vdpa_destroy_mr(mvdev);
> @@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
> {
> struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
> + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
>
> + destroy_workqueue(mvdev->wq);
> _vdpa_unregister_device(dev);
> mgtdev->ndev = NULL;
> }
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 6/6] vdpa/mlx5: Add multiqueue support
[not found] ` <20210817060250.188705-7-elic@nvidia.com>
@ 2021-08-19 4:07 ` Jason Wang
[not found] ` <20210819061547.GD161591@mtl-vdi-166.wap.labs.mlnx>
0 siblings, 1 reply; 10+ messages in thread
From: Jason Wang @ 2021-08-19 4:07 UTC (permalink / raw)
To: Eli Cohen, mst, virtualization; +Cc: eperezma
在 2021/8/17 下午2:02, Eli Cohen 写道:
> Multiqueue support requires additional virtio_net_q objects to be added
> or removed per the configured number of queue pairs. In addition the RQ
> tables needs to be modified to match the number of configured receive
> queues so the packets are dispatched to the right virtqueue according to
> the hash result.
>
> Note: qemu v6.0.0 is broken when the device requests more than two data
> queues; no net device will be created for the vdpa device. To avoid
> this, one should specify mq=off to qemu. In this case it will end up
> with a single queue.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
> drivers/vdpa/mlx5/core/resources.c | 10 ++
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 189 ++++++++++++++++++++++++-----
> 3 files changed, 169 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 6c43476a69cb..01a848adf590 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -91,6 +91,7 @@ int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey);
> int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
> void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
> int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
> +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn);
> void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
> int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn);
> void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
> diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
> index d24ae1a85159..bbdcf9a01a6d 100644
> --- a/drivers/vdpa/mlx5/core/resources.c
> +++ b/drivers/vdpa/mlx5/core/resources.c
> @@ -129,6 +129,16 @@ int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *
> return err;
> }
>
> +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn)
> +{
> + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
> +
> + MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid);
> + MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
> + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
> + return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
> +}
> +
> void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
> {
> u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index e18665781135..9cff3a49552f 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
> /* We will remove this limitation once mlx5_vdpa_alloc_resources()
> * provides for driver space allocation
> */
> -#define MLX5_MAX_SUPPORTED_VQS 2
> +#define MLX5_MAX_SUPPORTED_VQS 16
I wonder if we can stick this unchanged, since previous patch change it
from 16 to 2.
Other than this.
Acked-by: Jason Wang <jasowang@redhat.com>
>
> static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> {
> @@ -184,6 +184,23 @@ static bool mlx5_vdpa_debug;
> mlx5_vdpa_info(mvdev, "%s\n", #_status); \
> } while (0)
>
> +/* TODO: cross-endian support */
> +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
> +{
> + return virtio_legacy_is_little_endian() ||
> + (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
> +}
> +
> +static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
> +{
> + return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
> +}
> +
> +static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
> +{
> + return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
> +}
> +
> static inline u32 mlx5_vdpa_max_qps(int max_vqs)
> {
> return max_vqs / 2;
> @@ -191,6 +208,9 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
>
> static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
> {
> + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> + return 2;
> +
> return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
> }
>
> @@ -1127,10 +1147,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> if (!mvq->num_ent)
> return 0;
>
> - if (mvq->initialized) {
> - mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
> - return -EINVAL;
> - }
> + if (mvq->initialized)
> + return 0;
>
> err = cq_create(ndev, idx, mvq->num_ent);
> if (err)
> @@ -1217,19 +1235,20 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
>
> static int create_rqt(struct mlx5_vdpa_net *ndev)
> {
> - int log_max_rqt;
> __be32 *list;
> + int max_rqt;
> void *rqtc;
> int inlen;
> void *in;
> int i, j;
> int err;
>
> - log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> - if (log_max_rqt < 1)
> + max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> + if (max_rqt < 1)
> return -EOPNOTSUPP;
>
> - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
> + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
> in = kzalloc(inlen, GFP_KERNEL);
> if (!in)
> return -ENOMEM;
> @@ -1238,10 +1257,9 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
>
> MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
> - MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
> - MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
> + MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
> list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> - for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
> + for (i = 0, j = 0; j < max_rqt; j++) {
> if (!ndev->vqs[j].initialized)
> continue;
>
> @@ -1250,6 +1268,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> i++;
> }
> }
> + MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
>
> err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
> kfree(in);
> @@ -1259,6 +1278,52 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> return 0;
> }
>
> +#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
> +
> +int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
> +{
> + __be32 *list;
> + int max_rqt;
> + void *rqtc;
> + int inlen;
> + void *in;
> + int i, j;
> + int err;
> +
> + max_rqt = min_t(int, ndev->cur_num_vqs / 2,
> + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> + if (max_rqt < 1)
> + return -EOPNOTSUPP;
> +
> + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
> + in = kzalloc(inlen, GFP_KERNEL);
> + if (!in)
> + return -ENOMEM;
> +
> + MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
> + MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
> + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
> + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
> +
> + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> + for (i = 0, j = 0; j < num; j++) {
> + if (!ndev->vqs[j].initialized)
> + continue;
> +
> + if (!vq_is_tx(ndev->vqs[j].index)) {
> + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
> + i++;
> + }
> + }
> + MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
> + err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
> + kfree(in);
> + if (err)
> + return err;
> +
> + return 0;
> +}
> +
> static void destroy_rqt(struct mlx5_vdpa_net *ndev)
> {
> mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
> @@ -1418,6 +1483,77 @@ virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
> return status;
> }
>
> +static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
> +{
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + int cur_qps = ndev->cur_num_vqs / 2;
> + int err;
> + int i;
> +
> + if (cur_qps > newqps) {
> + err = modify_rqt(ndev, 2 * newqps);
> + if (err)
> + return err;
> +
> + for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
> + teardown_vq(ndev, &ndev->vqs[i]);
> +
> + ndev->cur_num_vqs = 2 * newqps;
> + } else {
> + ndev->cur_num_vqs = 2 * newqps;
> + for (i = cur_qps * 2; i < 2 * newqps; i++) {
> + err = setup_vq(ndev, &ndev->vqs[i]);
> + if (err)
> + goto clean_added;
> + }
> + err = modify_rqt(ndev, 2 * newqps);
> + if (err)
> + goto clean_added;
> + }
> + return 0;
> +
> +clean_added:
> + for (--i; i >= cur_qps; --i)
> + teardown_vq(ndev, &ndev->vqs[i]);
> +
> + return err;
> +}
> +
> +virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
> +{
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> + struct mlx5_control_vq *cvq = &mvdev->cvq;
> + struct virtio_net_ctrl_mq mq;
> + size_t read;
> + u16 newqps;
> +
> + switch (cmd) {
> + case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
> + if (read != sizeof(mq))
> + break;
> +
> + newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
> + if (ndev->cur_num_vqs == 2 * newqps) {
> + status = VIRTIO_NET_OK;
> + break;
> + }
> +
> + if (newqps & (newqps - 1))
> + break;
> +
> + if (!change_num_qps(mvdev, newqps))
> + status = VIRTIO_NET_OK;
> +
> + break;
> + default:
> + break;
> + }
> +
> + return status;
> +}
> +
> static void mlx5_cvq_kick_handler(struct work_struct *work)
> {
> virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> @@ -1453,6 +1589,9 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
> case VIRTIO_NET_CTRL_MAC:
> status = handle_ctrl_mac(mvdev, ctrl.cmd);
> break;
> + case VIRTIO_NET_CTRL_MQ:
> + status = handle_ctrl_mq(mvdev, ctrl.cmd);
> + break;
>
> default:
> break;
> @@ -1710,6 +1849,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>
> print_features(mvdev, ndev->mvdev.mlx_features, false);
> return ndev->mvdev.mlx_features;
> @@ -1769,18 +1909,6 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
> }
> }
>
> -/* TODO: cross-endian support */
> -static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
> -{
> - return virtio_legacy_is_little_endian() ||
> - (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
> -}
> -
> -static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
> -{
> - return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
> -}
> -
> static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
> {
> if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
> @@ -1846,15 +1974,14 @@ static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
> static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> struct mlx5_vq_restore_info *ri = &mvq->ri;
> - struct mlx5_virtq_attr attr;
> + struct mlx5_virtq_attr attr = {};
> int err;
>
> - if (!mvq->initialized)
> - return 0;
> -
> - err = query_virtqueue(ndev, mvq, &attr);
> - if (err)
> - return err;
> + if (mvq->initialized) {
> + err = query_virtqueue(ndev, mvq, &attr);
> + if (err)
> + return err;
> + }
>
> ri->avail_index = attr.available_index;
> ri->used_index = attr.used_index;
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting
[not found] ` <20210819060641.GA161591@mtl-vdi-166.wap.labs.mlnx>
@ 2021-08-19 7:12 ` Jason Wang
[not found] ` <20210819072145.GA166182@mtl-vdi-166.wap.labs.mlnx>
0 siblings, 1 reply; 10+ messages in thread
From: Jason Wang @ 2021-08-19 7:12 UTC (permalink / raw)
To: Eli Cohen; +Cc: eperezma, virtualization, mst
在 2021/8/19 下午2:06, Eli Cohen 写道:
> On Thu, Aug 19, 2021 at 12:04:10PM +0800, Jason Wang wrote:
>> 在 2021/8/17 下午2:02, Eli Cohen 写道:
>>> Add support to handle control virtqueue configurations per virtio
>>> specification. The control virtqueue is implemented in software and no
>>> hardware offloading is involved.
>>>
>>> Control VQ configuration need task context, therefore all configurations
>>> are handled in a workqueue created for the purpose.
>>
>> I think all the current callers are already in the the task context (the
>> caller of virtnet_send_command()).
>>
>> Any reason for using workqueue here?
>>
> I am running code that might sleep and the call has, IIRC, irqs disabled. The
> kernel complained about this.
I see.
>
>> I'm not sure if it can work well on UP where the workqueue might not have a
>> chance to be scheduled (we are doing busy waiting here):
>>
>> /* Spin for a response, the kick causes an ioport write, trapping
>> * into the hypervisor, so the request should be handled
>> immediately.
>> */
>> while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>> !virtqueue_is_broken(vi->cvq))
>> cpu_relax();
>>
> I think we have two different cases here:
> 1. When the net device is running in a VM. In this case we do not have
> any issue since the loop is running at the guest kernel and the work is
> done at the host kernel and would end at a finite time.
>
> 2. When we're using virtio_vdpa and the device is on the host we have an
> issue if we're using UP processor. Maybe we should avoid supporting CVQ
> in this case?
Maybe we can switch to use interrupt and sleep here, will have a look.
>
>>> Modifications are made to the memory registration code to allow for
>>> saving a copy of itolb to be used by the control VQ to access the vring.
>>>
>>> The max number of data virtqueus supported by the driver has been
>>> updated to 2 since multiqueue is not supported at this stage and we need
>>> to ensure consistency of VQ indices mapping to either data or control
>>> VQ.
>>>
>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>> ---
>>> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 23 +++
>>> drivers/vdpa/mlx5/core/mr.c | 81 +++++++---
>>> drivers/vdpa/mlx5/core/resources.c | 31 ++++
>>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 231 +++++++++++++++++++++++++++--
>>> 4 files changed, 334 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> index 41b20855ed31..6c43476a69cb 100644
>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> @@ -5,6 +5,7 @@
>>> #define __MLX5_VDPA_H__
>>> #include <linux/etherdevice.h>
>>> +#include <linux/vringh.h>
>>> #include <linux/vdpa.h>
>>> #include <linux/mlx5/driver.h>
>>> @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources {
>>> bool valid;
>>> };
>>> +struct mlx5_control_vq {
>>> + struct vhost_iotlb *iotlb;
>>> + /* spinlock to synchronize iommu table */
>>> + spinlock_t iommu_lock;
>>> + struct vringh vring;
>>> + bool ready;
>>> + u64 desc_addr;
>>> + u64 device_addr;
>>> + u64 driver_addr;
>>> + struct vdpa_callback event_cb;
>>> + struct vringh_kiov riov;
>>> + struct vringh_kiov wiov;
>>> + unsigned short head;
>>> +};
>>> +
>>> +struct mlx5_ctrl_wq_ent {
>>> + struct work_struct work;
>>> + struct mlx5_vdpa_dev *mvdev;
>>> +};
>>> +
>>> struct mlx5_vdpa_dev {
>>> struct vdpa_device vdev;
>>> struct mlx5_core_dev *mdev;
>>> @@ -60,6 +81,8 @@ struct mlx5_vdpa_dev {
>>> u32 generation;
>>> struct mlx5_vdpa_mr mr;
>>> + struct mlx5_control_vq cvq;
>>> + struct workqueue_struct *wq;
>>> };
>>> int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
>>> index e59135fa867e..da013b8082bc 100644
>>> --- a/drivers/vdpa/mlx5/core/mr.c
>>> +++ b/drivers/vdpa/mlx5/core/mr.c
>>> @@ -1,6 +1,7 @@
>>> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>>> /* Copyright (c) 2020 Mellanox Technologies Ltd. */
>>> +#include <linux/vhost_types.h>
>>> #include <linux/vdpa.h>
>>> #include <linux/gcd.h>
>>> #include <linux/string.h>
>>> @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
>>> mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
>>> }
>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>>> +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
>>> {
>>> - struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>> + struct vhost_iotlb_map *map;
>>> + u64 start = 0ULL, last = 0ULL - 1;
>>> int err;
>>> - if (mr->initialized)
>>> - return 0;
>>> -
>>> - if (iotlb)
>>> - err = create_user_mr(mvdev, iotlb);
>>> - else
>>> - err = create_dma_mr(mvdev, mr);
>>> -
>>> - if (!err)
>>> - mr->initialized = true;
>>> + if (!src) {
>>> + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
>>> + return err;
>>> + }
>>> - return err;
>>> + for (map = vhost_iotlb_itree_first(src, start, last); map;
>>> + map = vhost_iotlb_itree_next(map, start, last)) {
>>> + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
>>> + map->addr, map->perm);
>>> + if (err)
>>> + return err;
>>> + }
>>> + return 0;
>>> }
>>> -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>>> +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
>>> {
>>> - int err;
>>> -
>>> - mutex_lock(&mvdev->mr.mkey_mtx);
>>> - err = _mlx5_vdpa_create_mr(mvdev, iotlb);
>>> - mutex_unlock(&mvdev->mr.mkey_mtx);
>>> - return err;
>>> + vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1);
>>
>> It's better to use ULLONG_MAX.
> Will change.
>
>>
>>> }
>>> static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
>>> @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>> if (!mr->initialized)
>>> goto out;
>>> + prune_iotlb(mvdev);
>>> if (mr->user_mr)
>>> destroy_user_mr(mvdev, mr);
>>> else
>>> @@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
>>> mutex_unlock(&mr->mkey_mtx);
>>> }
>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>>> +{
>>> + struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>> + int err;
>>> +
>>> + if (mr->initialized)
>>> + return 0;
>>> +
>>> + if (iotlb)
>>> + err = create_user_mr(mvdev, iotlb);
>>> + else
>>> + err = create_dma_mr(mvdev, mr);
>>> +
>>> + if (err)
>>> + return err;
>>> +
>>> + err = dup_iotlb(mvdev, iotlb);
>>> + if (err)
>>> + goto out_err;
>>> +
>>> + mr->initialized = true;
>>> + return 0;
>>> +
>>> +out_err:
>>> + if (iotlb)
>>> + destroy_user_mr(mvdev, mr);
>>> + else
>>> + destroy_dma_mr(mvdev, mr);
>>> +
>>> + return err;
>>> +}
>>> +
>>> +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
>>> +{
>>> + int err;
>>> +
>>> + mutex_lock(&mvdev->mr.mkey_mtx);
>>> + err = _mlx5_vdpa_create_mr(mvdev, iotlb);
>>> + mutex_unlock(&mvdev->mr.mkey_mtx);
>>> + return err;
>>> +}
>>> +
>>> int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
>>> bool *change_map)
>>> {
>>> diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
>>> index d4606213f88a..d24ae1a85159 100644
>>> --- a/drivers/vdpa/mlx5/core/resources.c
>>> +++ b/drivers/vdpa/mlx5/core/resources.c
>>> @@ -1,6 +1,7 @@
>>> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>>> /* Copyright (c) 2020 Mellanox Technologies Ltd. */
>>> +#include <linux/iova.h>
>>> #include <linux/mlx5/driver.h>
>>> #include "mlx5_vdpa.h"
>>> @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
>>> return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
>>> }
>>> +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> + int err;
>>> +
>>> + mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
>>> + if (!mvdev->cvq.iotlb)
>>> + return -ENOMEM;
>>> +
>>> + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
>>> + err = iova_cache_get();
>>
>> Any reason for using iova cache here?
> Isn't it required? Aren't we allocating buffers for the CVQ from
> iommu_iova kmem cache?
I may miss something here but which buffer did you refer here?
>
>>
>>> + if (err)
>>> + vhost_iotlb_free(mvdev->cvq.iotlb);
>>> +
>>> + return err;
>>> +}
>>> +
>>> +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> + iova_cache_put();
>>> + vhost_iotlb_free(mvdev->cvq.iotlb);
>>> +}
>>> +
>>> int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
>>> {
>>> u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
>>> @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
>>> err = -ENOMEM;
>>> goto err_key;
>>> }
>>> +
>>> + err = init_ctrl_vq(mvdev);
>>> + if (err)
>>> + goto err_ctrl;
>>> +
>>> res->valid = true;
>>> return 0;
>>> +err_ctrl:
>>> + iounmap(res->kick_addr);
>>> err_key:
>>> dealloc_pd(mvdev, res->pdn, res->uid);
>>> err_pd:
>>> @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
>>> if (!res->valid)
>>> return;
>>> + cleanup_ctrl_vq(mvdev);
>>> iounmap(res->kick_addr);
>>> res->kick_addr = NULL;
>>> dealloc_pd(mvdev, res->pdn, res->uid);
>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> index 0fe7cd370e4b..e18665781135 100644
>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
>>> /* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>> * provides for driver space allocation
>>> */
>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>> +#define MLX5_MAX_SUPPORTED_VQS 2
>>> static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>> {
>>> @@ -160,6 +160,7 @@ struct mlx5_vdpa_net {
>>> struct mlx5_flow_handle *rx_rule;
>>> bool setup;
>>> u16 mtu;
>>> + u32 cur_num_vqs;
>>> };
>>> static void free_resources(struct mlx5_vdpa_net *ndev);
>>> @@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev);
>>> static bool mlx5_vdpa_debug;
>>> +#define MLX5_CVQ_MAX_ENT 16
>>> +
>>> #define MLX5_LOG_VIO_FLAG(_feature) \
>>> do { \
>>> if (features & BIT_ULL(_feature)) \
>>> @@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
>>> return max_vqs / 2;
>>> }
>>> +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> + return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
>>> +}
>>> +
>>> +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>> +{
>>> + return idx == ctrl_vq_idx(mvdev);
>>> +}
>>> +
>>> static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
>>> {
>>> if (status & ~VALID_STATUS_MASK)
>>> @@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
>>> ndev->rx_rule = NULL;
>>> }
>>> +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
>>> +{
>>> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> + struct mlx5_control_vq *cvq = &mvdev->cvq;
>>> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
>>> + struct mlx5_core_dev *pfmdev;
>>> + size_t read;
>>> + u8 mac[ETH_ALEN];
>>> +
>>> + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
>>> + switch (cmd) {
>>> + case VIRTIO_NET_CTRL_MAC_ADDR_SET:
>>> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
>>> + if (read != ETH_ALEN)
>>> + break;
>>> +
>>> + if (!memcmp(ndev->config.mac, mac, 6)) {
>>> + status = VIRTIO_NET_OK;
>>> + break;
>>> + }
>>> +
>>> + if (!is_zero_ether_addr(ndev->config.mac)) {
>>> + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
>>> + mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
>>> + ndev->config.mac);
>>> + break;
>>> + }
>>> + }
>>> +
>>> + if (mlx5_mpfs_add_mac(pfmdev, mac)) {
>>> + mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
>>> + mac);
>>> + break;
>>> + }
>>> +
>>> + memcpy(ndev->config.mac, mac, ETH_ALEN);
>>> + status = VIRTIO_NET_OK;
>>> + break;
>>> +
>>> + default:
>>> + break;
>>> + }
>>> +
>>> + return status;
>>> +}
>>> +
>>> +static void mlx5_cvq_kick_handler(struct work_struct *work)
>>> +{
>>> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
>>> + struct virtio_net_ctrl_hdr ctrl;
>>> + struct mlx5_ctrl_wq_ent *wqent;
>>> + struct mlx5_vdpa_dev *mvdev;
>>> + struct mlx5_control_vq *cvq;
>>> + struct mlx5_vdpa_net *ndev;
>>> + size_t read, write;
>>> + int err;
>>> +
>>> + wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
>>> + mvdev = wqent->mvdev;
>>> + ndev = to_mlx5_vdpa_ndev(mvdev);
>>> + cvq = &mvdev->cvq;
>>> + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
>>> + goto out;
>>> +
>>> + if (!cvq->ready)
>>> + goto out;
>>> +
>>> + while (true) {
>>> + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
>>> + GFP_ATOMIC);
>>> + if (err <= 0)
>>> + break;
>>> +
>>> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
>>> + if (read != sizeof(ctrl))
>>> + break;
>>> +
>>> + switch (ctrl.class) {
>>> + case VIRTIO_NET_CTRL_MAC:
>>> + status = handle_ctrl_mac(mvdev, ctrl.cmd);
>>> + break;
>>> +
>>> + default:
>>> + break;
>>> + }
>>> +
>>> + /* Make sure data is written before advancing index */
>>> + smp_wmb();
>>> +
>>> + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
>>> + vringh_complete_iotlb(&cvq->vring, cvq->head, write);
>>> + vringh_kiov_cleanup(&cvq->riov);
>>> + vringh_kiov_cleanup(&cvq->wiov);
>>> +
>>> + if (vringh_need_notify_iotlb(&cvq->vring))
>>> + vringh_notify(&cvq->vring);
>>> + }
>>> +out:
>>> + kfree(wqent);
>>> +}
>>> +
>>> static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
>>> {
>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> + struct mlx5_vdpa_virtqueue *mvq;
>>> + struct mlx5_ctrl_wq_ent *wqent;
>>> if (!is_index_valid(mvdev, idx))
>>> return;
>>> + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
>>> + if (!mvdev->cvq.ready)
>>> + return;
>>> +
>>> + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
>>> + if (!wqent)
>>> + return;
>>> +
>>> + wqent->mvdev = mvdev;
>>> + INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
>>> + queue_work(mvdev->wq, &wqent->work);
>>> + return;
>>> + }
>>> +
>>> + mvq = &ndev->vqs[idx];
>>> if (unlikely(!mvq->ready))
>>> return;
>>> @@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
>>> {
>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> + struct mlx5_vdpa_virtqueue *mvq;
>>> if (!is_index_valid(mvdev, idx))
>>> return -EINVAL;
>>> + if (is_ctrl_vq_idx(mvdev, idx)) {
>>> + mvdev->cvq.desc_addr = desc_area;
>>> + mvdev->cvq.device_addr = device_area;
>>> + mvdev->cvq.driver_addr = driver_area;
>>> + return 0;
>>> + }
>>> +
>>> + mvq = &ndev->vqs[idx];
>>> mvq->desc_addr = desc_area;
>>> mvq->device_addr = device_area;
>>> mvq->driver_addr = driver_area;
>>> @@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> struct mlx5_vdpa_virtqueue *mvq;
>>> - if (!is_index_valid(mvdev, idx))
>>> + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
>>> return;
>>> mvq = &ndev->vqs[idx];
>>> @@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
>>> ndev->event_cbs[idx] = *cb;
>>> }
>>> +static void mlx5_cvq_notify(struct vringh *vring)
>>> +{
>>> + struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
>>> +
>>> + if (!cvq->event_cb.callback)
>>> + return;
>>> +
>>> + cvq->event_cb.callback(cvq->event_cb.private);
>>> +}
>>> +
>>> +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
>>> +{
>>> + struct mlx5_control_vq *cvq = &mvdev->cvq;
>>> +
>>> + cvq->ready = ready;
>>> + if (!ready)
>>> + return;
>>> +
>>> + cvq->vring.notify = mlx5_cvq_notify;
>>> +}
>>> +
>>> static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
>>> {
>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> + struct mlx5_vdpa_virtqueue *mvq;
>>> if (!is_index_valid(mvdev, idx))
>>> return;
>>> + if (is_ctrl_vq_idx(mvdev, idx)) {
>>> + set_cvq_ready(mvdev, ready);
>>> + return;
>>> + }
>>> +
>>> + mvq = &ndev->vqs[idx];
>>> if (!ready)
>>> suspend_vq(ndev, mvq);
>>> @@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
>>> {
>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> if (!is_index_valid(mvdev, idx))
>>> return false;
>>> - return mvq->ready;
>>> + if (is_ctrl_vq_idx(mvdev, idx))
>>> + return mvdev->cvq.ready;
>>> +
>>> + return ndev->vqs[idx].ready;
>>> }
>>> static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
>>> @@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
>>> {
>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> + struct mlx5_vdpa_virtqueue *mvq;
>>> if (!is_index_valid(mvdev, idx))
>>> return -EINVAL;
>>> + if (is_ctrl_vq_idx(mvdev, idx)) {
>>> + mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
>>
>> Question, is packed virtqueue supported by current mlx5e?
>>
>> If no, this is fine.
> We don't. The hardware might support but the device driver does not
> advertise packed virtqueue support.
Good to know this. So we're fine.
Thanks
>
>> If yes, we should disable packed and re-enable it after vringh supports
>> packed virtqueue.
>>
>> Other looks good.
>>
>> Thanks
>>
>>
>>> + return 0;
>>> + }
>>> +
>>> + mvq = &ndev->vqs[idx];
>>> if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
>>> mlx5_vdpa_warn(mvdev, "can't modify available index\n");
>>> return -EINVAL;
>>> @@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
>>> {
>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> + struct mlx5_vdpa_virtqueue *mvq;
>>> struct mlx5_virtq_attr attr;
>>> int err;
>>> if (!is_index_valid(mvdev, idx))
>>> return -EINVAL;
>>> + if (is_ctrl_vq_idx(mvdev, idx)) {
>>> + state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
>>> + return 0;
>>> + }
>>> +
>>> + mvq = &ndev->vqs[idx];
>>> /* If the virtq object was destroyed, use the value saved at
>>> * the last minute of suspend_vq. This caters for userspace
>>> * that cares about emulating the index after vq is stopped.
>>> @@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
>>> u16 dev_features;
>>> dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
>>> - ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
>>> + ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
>>> if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
>>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
>>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
>>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
>>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
>>> +
>>> print_features(mvdev, ndev->mvdev.mlx_features, false);
>>> return ndev->mvdev.mlx_features;
>>> }
>>> @@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
>>> static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
>>> {
>>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> + struct mlx5_control_vq *cvq = &mvdev->cvq;
>>> int err;
>>> int i;
>>> @@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
>>> goto err_vq;
>>> }
>>> + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
>>> + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>>> + MLX5_CVQ_MAX_ENT, false,
>>> + (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>> + (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>> + (struct vring_used *)(uintptr_t)cvq->device_addr);
>>> + if (err)
>>> + goto err_vq;
>>> + }
>>> +
>>> return 0;
>>> err_vq:
>>> @@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
>>> struct mlx5_vdpa_net *ndev;
>>> phys_addr_t addr;
>>> - if (!is_index_valid(mvdev, idx))
>>> + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
>>> return ret;
>>> /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
>>> @@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
>>> err = mlx5_mpfs_add_mac(pfmdev, config->mac);
>>> if (err)
>>> goto err_mtu;
>>> +
>>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
>>> }
>>> + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
>>> mvdev->vdev.dma_dev = &mdev->pdev->dev;
>>> err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
>>> if (err)
>>> @@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
>>> if (err)
>>> goto err_mr;
>>> + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
>>> + if (!mvdev->wq) {
>>> + err = -ENOMEM;
>>> + goto err_res2;
>>> + }
>>> +
>>> + ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
>>> mvdev->vdev.mdev = &mgtdev->mgtdev;
>>> - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
>>> + err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
>>> if (err)
>>> goto err_reg;
>>> @@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
>>> return 0;
>>> err_reg:
>>> + destroy_workqueue(mvdev->wq);
>>> +err_res2:
>>> free_resources(ndev);
>>> err_mr:
>>> mlx5_vdpa_destroy_mr(mvdev);
>>> @@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
>>> static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
>>> {
>>> struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
>>> + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
>>> + destroy_workqueue(mvdev->wq);
>>> _vdpa_unregister_device(dev);
>>> mgtdev->ndev = NULL;
>>> }
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 6/6] vdpa/mlx5: Add multiqueue support
[not found] ` <20210819061547.GD161591@mtl-vdi-166.wap.labs.mlnx>
@ 2021-08-19 7:12 ` Jason Wang
0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2021-08-19 7:12 UTC (permalink / raw)
To: Eli Cohen; +Cc: eperezma, virtualization, mst
在 2021/8/19 下午2:15, Eli Cohen 写道:
> On Thu, Aug 19, 2021 at 12:07:03PM +0800, Jason Wang wrote:
>> 在 2021/8/17 下午2:02, Eli Cohen 写道:
>>> Multiqueue support requires additional virtio_net_q objects to be added
>>> or removed per the configured number of queue pairs. In addition the RQ
>>> tables needs to be modified to match the number of configured receive
>>> queues so the packets are dispatched to the right virtqueue according to
>>> the hash result.
>>>
>>> Note: qemu v6.0.0 is broken when the device requests more than two data
>>> queues; no net device will be created for the vdpa device. To avoid
>>> this, one should specify mq=off to qemu. In this case it will end up
>>> with a single queue.
>>>
>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>> ---
>>> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
>>> drivers/vdpa/mlx5/core/resources.c | 10 ++
>>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 189 ++++++++++++++++++++++++-----
>>> 3 files changed, 169 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> index 6c43476a69cb..01a848adf590 100644
>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> @@ -91,6 +91,7 @@ int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey);
>>> int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
>>> void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
>>> int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
>>> +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn);
>>> void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
>>> int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn);
>>> void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
>>> diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
>>> index d24ae1a85159..bbdcf9a01a6d 100644
>>> --- a/drivers/vdpa/mlx5/core/resources.c
>>> +++ b/drivers/vdpa/mlx5/core/resources.c
>>> @@ -129,6 +129,16 @@ int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *
>>> return err;
>>> }
>>> +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn)
>>> +{
>>> + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
>>> +
>>> + MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid);
>>> + MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
>>> + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
>>> + return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
>>> +}
>>> +
>>> void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
>>> {
>>> u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> index e18665781135..9cff3a49552f 100644
>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
>>> /* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>> * provides for driver space allocation
>>> */
>>> -#define MLX5_MAX_SUPPORTED_VQS 2
>>> +#define MLX5_MAX_SUPPORTED_VQS 16
>>
>> I wonder if we can stick this unchanged, since previous patch change it from
>> 16 to 2.
>>
> I prefer to leave it like this since in the previous patch I introduced
> control VQ but not MQ so I prefered to have only 2 data queues.
Ok. That's fine.
Thanks
>
>> Other than this.
>>
>> Acked-by: Jason Wang <jasowang@redhat.com>
>>
>>
>>> static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>> {
>>> @@ -184,6 +184,23 @@ static bool mlx5_vdpa_debug;
>>> mlx5_vdpa_info(mvdev, "%s\n", #_status); \
>>> } while (0)
>>> +/* TODO: cross-endian support */
>>> +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> + return virtio_legacy_is_little_endian() ||
>>> + (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
>>> +}
>>> +
>>> +static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
>>> +{
>>> + return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
>>> +}
>>> +
>>> +static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
>>> +{
>>> + return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
>>> +}
>>> +
>>> static inline u32 mlx5_vdpa_max_qps(int max_vqs)
>>> {
>>> return max_vqs / 2;
>>> @@ -191,6 +208,9 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
>>> static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
>>> {
>>> + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>>> + return 2;
>>> +
>>> return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
>>> }
>>> @@ -1127,10 +1147,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>>> if (!mvq->num_ent)
>>> return 0;
>>> - if (mvq->initialized) {
>>> - mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
>>> - return -EINVAL;
>>> - }
>>> + if (mvq->initialized)
>>> + return 0;
>>> err = cq_create(ndev, idx, mvq->num_ent);
>>> if (err)
>>> @@ -1217,19 +1235,20 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
>>> static int create_rqt(struct mlx5_vdpa_net *ndev)
>>> {
>>> - int log_max_rqt;
>>> __be32 *list;
>>> + int max_rqt;
>>> void *rqtc;
>>> int inlen;
>>> void *in;
>>> int i, j;
>>> int err;
>>> - log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>> - if (log_max_rqt < 1)
>>> + max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>>> + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>> + if (max_rqt < 1)
>>> return -EOPNOTSUPP;
>>> - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
>>> + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
>>> in = kzalloc(inlen, GFP_KERNEL);
>>> if (!in)
>>> return -ENOMEM;
>>> @@ -1238,10 +1257,9 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>> rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
>>> MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
>>> - MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
>>> - MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
>>> + MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>> list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>> - for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
>>> + for (i = 0, j = 0; j < max_rqt; j++) {
>>> if (!ndev->vqs[j].initialized)
>>> continue;
>>> @@ -1250,6 +1268,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>> i++;
>>> }
>>> }
>>> + MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
>>> err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
>>> kfree(in);
>>> @@ -1259,6 +1278,52 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>> return 0;
>>> }
>>> +#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
>>> +
>>> +int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
>>> +{
>>> + __be32 *list;
>>> + int max_rqt;
>>> + void *rqtc;
>>> + int inlen;
>>> + void *in;
>>> + int i, j;
>>> + int err;
>>> +
>>> + max_rqt = min_t(int, ndev->cur_num_vqs / 2,
>>> + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>> + if (max_rqt < 1)
>>> + return -EOPNOTSUPP;
>>> +
>>> + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
>>> + in = kzalloc(inlen, GFP_KERNEL);
>>> + if (!in)
>>> + return -ENOMEM;
>>> +
>>> + MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
>>> + MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
>>> + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
>>> + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
>>> +
>>> + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>> + for (i = 0, j = 0; j < num; j++) {
>>> + if (!ndev->vqs[j].initialized)
>>> + continue;
>>> +
>>> + if (!vq_is_tx(ndev->vqs[j].index)) {
>>> + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
>>> + i++;
>>> + }
>>> + }
>>> + MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
>>> + err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
>>> + kfree(in);
>>> + if (err)
>>> + return err;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static void destroy_rqt(struct mlx5_vdpa_net *ndev)
>>> {
>>> mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
>>> @@ -1418,6 +1483,77 @@ virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
>>> return status;
>>> }
>>> +static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
>>> +{
>>> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> + int cur_qps = ndev->cur_num_vqs / 2;
>>> + int err;
>>> + int i;
>>> +
>>> + if (cur_qps > newqps) {
>>> + err = modify_rqt(ndev, 2 * newqps);
>>> + if (err)
>>> + return err;
>>> +
>>> + for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
>>> + teardown_vq(ndev, &ndev->vqs[i]);
>>> +
>>> + ndev->cur_num_vqs = 2 * newqps;
>>> + } else {
>>> + ndev->cur_num_vqs = 2 * newqps;
>>> + for (i = cur_qps * 2; i < 2 * newqps; i++) {
>>> + err = setup_vq(ndev, &ndev->vqs[i]);
>>> + if (err)
>>> + goto clean_added;
>>> + }
>>> + err = modify_rqt(ndev, 2 * newqps);
>>> + if (err)
>>> + goto clean_added;
>>> + }
>>> + return 0;
>>> +
>>> +clean_added:
>>> + for (--i; i >= cur_qps; --i)
>>> + teardown_vq(ndev, &ndev->vqs[i]);
>>> +
>>> + return err;
>>> +}
>>> +
>>> +virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
>>> +{
>>> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
>>> + struct mlx5_control_vq *cvq = &mvdev->cvq;
>>> + struct virtio_net_ctrl_mq mq;
>>> + size_t read;
>>> + u16 newqps;
>>> +
>>> + switch (cmd) {
>>> + case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
>>> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
>>> + if (read != sizeof(mq))
>>> + break;
>>> +
>>> + newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
>>> + if (ndev->cur_num_vqs == 2 * newqps) {
>>> + status = VIRTIO_NET_OK;
>>> + break;
>>> + }
>>> +
>>> + if (newqps & (newqps - 1))
>>> + break;
>>> +
>>> + if (!change_num_qps(mvdev, newqps))
>>> + status = VIRTIO_NET_OK;
>>> +
>>> + break;
>>> + default:
>>> + break;
>>> + }
>>> +
>>> + return status;
>>> +}
>>> +
>>> static void mlx5_cvq_kick_handler(struct work_struct *work)
>>> {
>>> virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
>>> @@ -1453,6 +1589,9 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
>>> case VIRTIO_NET_CTRL_MAC:
>>> status = handle_ctrl_mac(mvdev, ctrl.cmd);
>>> break;
>>> + case VIRTIO_NET_CTRL_MQ:
>>> + status = handle_ctrl_mq(mvdev, ctrl.cmd);
>>> + break;
>>> default:
>>> break;
>>> @@ -1710,6 +1849,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
>>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
>>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
>>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
>>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>>> print_features(mvdev, ndev->mvdev.mlx_features, false);
>>> return ndev->mvdev.mlx_features;
>>> @@ -1769,18 +1909,6 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
>>> }
>>> }
>>> -/* TODO: cross-endian support */
>>> -static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
>>> -{
>>> - return virtio_legacy_is_little_endian() ||
>>> - (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
>>> -}
>>> -
>>> -static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
>>> -{
>>> - return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
>>> -}
>>> -
>>> static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
>>> {
>>> if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
>>> @@ -1846,15 +1974,14 @@ static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
>>> static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>>> {
>>> struct mlx5_vq_restore_info *ri = &mvq->ri;
>>> - struct mlx5_virtq_attr attr;
>>> + struct mlx5_virtq_attr attr = {};
>>> int err;
>>> - if (!mvq->initialized)
>>> - return 0;
>>> -
>>> - err = query_virtqueue(ndev, mvq, &attr);
>>> - if (err)
>>> - return err;
>>> + if (mvq->initialized) {
>>> + err = query_virtqueue(ndev, mvq, &attr);
>>> + if (err)
>>> + return err;
>>> + }
>>> ri->avail_index = attr.available_index;
>>> ri->used_index = attr.used_index;
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting
[not found] ` <20210819072145.GA166182@mtl-vdi-166.wap.labs.mlnx>
@ 2021-08-19 7:37 ` Jason Wang
[not found] ` <20210819072438.GB166182@mtl-vdi-166.wap.labs.mlnx>
1 sibling, 0 replies; 10+ messages in thread
From: Jason Wang @ 2021-08-19 7:37 UTC (permalink / raw)
To: Eli Cohen; +Cc: eperezma, virtualization, mst
On Thu, Aug 19, 2021 at 3:22 PM Eli Cohen <elic@nvidia.com> wrote:
>
> On Thu, Aug 19, 2021 at 03:12:14PM +0800, Jason Wang wrote:
> >
> > 在 2021/8/19 下午2:06, Eli Cohen 写道:
> > > On Thu, Aug 19, 2021 at 12:04:10PM +0800, Jason Wang wrote:
> > > > 在 2021/8/17 下午2:02, Eli Cohen 写道:
> > > > > Add support to handle control virtqueue configurations per virtio
> > > > > specification. The control virtqueue is implemented in software and no
> > > > > hardware offloading is involved.
> > > > >
> > > > > Control VQ configuration need task context, therefore all configurations
> > > > > are handled in a workqueue created for the purpose.
> > > >
> > > > I think all the current callers are already in the the task context (the
> > > > caller of virtnet_send_command()).
> > > >
> > > > Any reason for using workqueue here?
> > > >
> > > I am running code that might sleep and the call has, IIRC, irqs disabled. The
> > > kernel complained about this.
> >
> >
> > I see.
> >
> >
> > >
> > > > I'm not sure if it can work well on UP where the workqueue might not have a
> > > > chance to be scheduled (we are doing busy waiting here):
> > > >
> > > > /* Spin for a response, the kick causes an ioport write, trapping
> > > > * into the hypervisor, so the request should be handled
> > > > immediately.
> > > > */
> > > > while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > !virtqueue_is_broken(vi->cvq))
> > > > cpu_relax();
> > > >
> > > I think we have two different cases here:
> > > 1. When the net device is running in a VM. In this case we do not have
> > > any issue since the loop is running at the guest kernel and the work is
> > > done at the host kernel and would end at a finite time.
> > >
> > > 2. When we're using virtio_vdpa and the device is on the host we have an
> > > issue if we're using UP processor. Maybe we should avoid supporting CVQ
> > > in this case?
> >
> >
> > Maybe we can switch to use interrupt and sleep here, will have a look.
> >
>
> Won't it hurt latency?
I'm not sure, but usually we don't care about the performance of control vq.
Thanks
>
> >
> >
> > >
> > > > > Modifications are made to the memory registration code to allow for
> > > > > saving a copy of itolb to be used by the control VQ to access the vring.
> > > > >
> > > > > The max number of data virtqueus supported by the driver has been
> > > > > updated to 2 since multiqueue is not supported at this stage and we need
> > > > > to ensure consistency of VQ indices mapping to either data or control
> > > > > VQ.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > ---
> > > > > drivers/vdpa/mlx5/core/mlx5_vdpa.h | 23 +++
> > > > > drivers/vdpa/mlx5/core/mr.c | 81 +++++++---
> > > > > drivers/vdpa/mlx5/core/resources.c | 31 ++++
> > > > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 231 +++++++++++++++++++++++++++--
> > > > > 4 files changed, 334 insertions(+), 32 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > index 41b20855ed31..6c43476a69cb 100644
> > > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > @@ -5,6 +5,7 @@
> > > > > #define __MLX5_VDPA_H__
> > > > > #include <linux/etherdevice.h>
> > > > > +#include <linux/vringh.h>
> > > > > #include <linux/vdpa.h>
> > > > > #include <linux/mlx5/driver.h>
> > > > > @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources {
> > > > > bool valid;
> > > > > };
> > > > > +struct mlx5_control_vq {
> > > > > + struct vhost_iotlb *iotlb;
> > > > > + /* spinlock to synchronize iommu table */
> > > > > + spinlock_t iommu_lock;
> > > > > + struct vringh vring;
> > > > > + bool ready;
> > > > > + u64 desc_addr;
> > > > > + u64 device_addr;
> > > > > + u64 driver_addr;
> > > > > + struct vdpa_callback event_cb;
> > > > > + struct vringh_kiov riov;
> > > > > + struct vringh_kiov wiov;
> > > > > + unsigned short head;
> > > > > +};
> > > > > +
> > > > > +struct mlx5_ctrl_wq_ent {
> > > > > + struct work_struct work;
> > > > > + struct mlx5_vdpa_dev *mvdev;
> > > > > +};
> > > > > +
> > > > > struct mlx5_vdpa_dev {
> > > > > struct vdpa_device vdev;
> > > > > struct mlx5_core_dev *mdev;
> > > > > @@ -60,6 +81,8 @@ struct mlx5_vdpa_dev {
> > > > > u32 generation;
> > > > > struct mlx5_vdpa_mr mr;
> > > > > + struct mlx5_control_vq cvq;
> > > > > + struct workqueue_struct *wq;
> > > > > };
> > > > > int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> > > > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> > > > > index e59135fa867e..da013b8082bc 100644
> > > > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > > > @@ -1,6 +1,7 @@
> > > > > // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> > > > > /* Copyright (c) 2020 Mellanox Technologies Ltd. */
> > > > > +#include <linux/vhost_types.h>
> > > > > #include <linux/vdpa.h>
> > > > > #include <linux/gcd.h>
> > > > > #include <linux/string.h>
> > > > > @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> > > > > mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
> > > > > }
> > > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
> > > > > {
> > > > > - struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > + struct vhost_iotlb_map *map;
> > > > > + u64 start = 0ULL, last = 0ULL - 1;
> > > > > int err;
> > > > > - if (mr->initialized)
> > > > > - return 0;
> > > > > -
> > > > > - if (iotlb)
> > > > > - err = create_user_mr(mvdev, iotlb);
> > > > > - else
> > > > > - err = create_dma_mr(mvdev, mr);
> > > > > -
> > > > > - if (!err)
> > > > > - mr->initialized = true;
> > > > > + if (!src) {
> > > > > + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
> > > > > + return err;
> > > > > + }
> > > > > - return err;
> > > > > + for (map = vhost_iotlb_itree_first(src, start, last); map;
> > > > > + map = vhost_iotlb_itree_next(map, start, last)) {
> > > > > + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
> > > > > + map->addr, map->perm);
> > > > > + if (err)
> > > > > + return err;
> > > > > + }
> > > > > + return 0;
> > > > > }
> > > > > -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
> > > > > {
> > > > > - int err;
> > > > > -
> > > > > - mutex_lock(&mvdev->mr.mkey_mtx);
> > > > > - err = _mlx5_vdpa_create_mr(mvdev, iotlb);
> > > > > - mutex_unlock(&mvdev->mr.mkey_mtx);
> > > > > - return err;
> > > > > + vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1);
> > > >
> > > > It's better to use ULLONG_MAX.
> > > Will change.
> > >
> > > >
> > > > > }
> > > > > static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> > > > > @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > if (!mr->initialized)
> > > > > goto out;
> > > > > + prune_iotlb(mvdev);
> > > > > if (mr->user_mr)
> > > > > destroy_user_mr(mvdev, mr);
> > > > > else
> > > > > @@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > > mutex_unlock(&mr->mkey_mtx);
> > > > > }
> > > > > +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +{
> > > > > + struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > + int err;
> > > > > +
> > > > > + if (mr->initialized)
> > > > > + return 0;
> > > > > +
> > > > > + if (iotlb)
> > > > > + err = create_user_mr(mvdev, iotlb);
> > > > > + else
> > > > > + err = create_dma_mr(mvdev, mr);
> > > > > +
> > > > > + if (err)
> > > > > + return err;
> > > > > +
> > > > > + err = dup_iotlb(mvdev, iotlb);
> > > > > + if (err)
> > > > > + goto out_err;
> > > > > +
> > > > > + mr->initialized = true;
> > > > > + return 0;
> > > > > +
> > > > > +out_err:
> > > > > + if (iotlb)
> > > > > + destroy_user_mr(mvdev, mr);
> > > > > + else
> > > > > + destroy_dma_mr(mvdev, mr);
> > > > > +
> > > > > + return err;
> > > > > +}
> > > > > +
> > > > > +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +{
> > > > > + int err;
> > > > > +
> > > > > + mutex_lock(&mvdev->mr.mkey_mtx);
> > > > > + err = _mlx5_vdpa_create_mr(mvdev, iotlb);
> > > > > + mutex_unlock(&mvdev->mr.mkey_mtx);
> > > > > + return err;
> > > > > +}
> > > > > +
> > > > > int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
> > > > > bool *change_map)
> > > > > {
> > > > > diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
> > > > > index d4606213f88a..d24ae1a85159 100644
> > > > > --- a/drivers/vdpa/mlx5/core/resources.c
> > > > > +++ b/drivers/vdpa/mlx5/core/resources.c
> > > > > @@ -1,6 +1,7 @@
> > > > > // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> > > > > /* Copyright (c) 2020 Mellanox Technologies Ltd. */
> > > > > +#include <linux/iova.h>
> > > > > #include <linux/mlx5/driver.h>
> > > > > #include "mlx5_vdpa.h"
> > > > > @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
> > > > > return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
> > > > > }
> > > > > +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > + int err;
> > > > > +
> > > > > + mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
> > > > > + if (!mvdev->cvq.iotlb)
> > > > > + return -ENOMEM;
> > > > > +
> > > > > + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
> > > > > + err = iova_cache_get();
> > > >
> > > > Any reason for using iova cache here?
> > > Isn't it required? Aren't we allocating buffers for the CVQ from
> > > iommu_iova kmem cache?
> >
> >
> > I may miss something here but which buffer did you refer here?
> >
>
> Aren't the data buffers for the control VQ allocated from this cache?
>
> >
> > >
> > > >
> > > > > + if (err)
> > > > > + vhost_iotlb_free(mvdev->cvq.iotlb);
> > > > > +
> > > > > + return err;
> > > > > +}
> > > > > +
> > > > > +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > + iova_cache_put();
> > > > > + vhost_iotlb_free(mvdev->cvq.iotlb);
> > > > > +}
> > > > > +
> > > > > int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
> > > > > {
> > > > > u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
> > > > > @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
> > > > > err = -ENOMEM;
> > > > > goto err_key;
> > > > > }
> > > > > +
> > > > > + err = init_ctrl_vq(mvdev);
> > > > > + if (err)
> > > > > + goto err_ctrl;
> > > > > +
> > > > > res->valid = true;
> > > > > return 0;
> > > > > +err_ctrl:
> > > > > + iounmap(res->kick_addr);
> > > > > err_key:
> > > > > dealloc_pd(mvdev, res->pdn, res->uid);
> > > > > err_pd:
> > > > > @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
> > > > > if (!res->valid)
> > > > > return;
> > > > > + cleanup_ctrl_vq(mvdev);
> > > > > iounmap(res->kick_addr);
> > > > > res->kick_addr = NULL;
> > > > > dealloc_pd(mvdev, res->pdn, res->uid);
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index 0fe7cd370e4b..e18665781135 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
> > > > > /* We will remove this limitation once mlx5_vdpa_alloc_resources()
> > > > > * provides for driver space allocation
> > > > > */
> > > > > -#define MLX5_MAX_SUPPORTED_VQS 16
> > > > > +#define MLX5_MAX_SUPPORTED_VQS 2
> > > > > static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > {
> > > > > @@ -160,6 +160,7 @@ struct mlx5_vdpa_net {
> > > > > struct mlx5_flow_handle *rx_rule;
> > > > > bool setup;
> > > > > u16 mtu;
> > > > > + u32 cur_num_vqs;
> > > > > };
> > > > > static void free_resources(struct mlx5_vdpa_net *ndev);
> > > > > @@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev);
> > > > > static bool mlx5_vdpa_debug;
> > > > > +#define MLX5_CVQ_MAX_ENT 16
> > > > > +
> > > > > #define MLX5_LOG_VIO_FLAG(_feature) \
> > > > > do { \
> > > > > if (features & BIT_ULL(_feature)) \
> > > > > @@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
> > > > > return max_vqs / 2;
> > > > > }
> > > > > +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > + return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
> > > > > +}
> > > > > +
> > > > > +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > +{
> > > > > + return idx == ctrl_vq_idx(mvdev);
> > > > > +}
> > > > > +
> > > > > static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
> > > > > {
> > > > > if (status & ~VALID_STATUS_MASK)
> > > > > @@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
> > > > > ndev->rx_rule = NULL;
> > > > > }
> > > > > +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
> > > > > +{
> > > > > + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > + struct mlx5_control_vq *cvq = &mvdev->cvq;
> > > > > + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > > > + struct mlx5_core_dev *pfmdev;
> > > > > + size_t read;
> > > > > + u8 mac[ETH_ALEN];
> > > > > +
> > > > > + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
> > > > > + switch (cmd) {
> > > > > + case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > > > > + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
> > > > > + if (read != ETH_ALEN)
> > > > > + break;
> > > > > +
> > > > > + if (!memcmp(ndev->config.mac, mac, 6)) {
> > > > > + status = VIRTIO_NET_OK;
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + if (!is_zero_ether_addr(ndev->config.mac)) {
> > > > > + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
> > > > > + mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
> > > > > + ndev->config.mac);
> > > > > + break;
> > > > > + }
> > > > > + }
> > > > > +
> > > > > + if (mlx5_mpfs_add_mac(pfmdev, mac)) {
> > > > > + mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
> > > > > + mac);
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + memcpy(ndev->config.mac, mac, ETH_ALEN);
> > > > > + status = VIRTIO_NET_OK;
> > > > > + break;
> > > > > +
> > > > > + default:
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + return status;
> > > > > +}
> > > > > +
> > > > > +static void mlx5_cvq_kick_handler(struct work_struct *work)
> > > > > +{
> > > > > + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > > > + struct virtio_net_ctrl_hdr ctrl;
> > > > > + struct mlx5_ctrl_wq_ent *wqent;
> > > > > + struct mlx5_vdpa_dev *mvdev;
> > > > > + struct mlx5_control_vq *cvq;
> > > > > + struct mlx5_vdpa_net *ndev;
> > > > > + size_t read, write;
> > > > > + int err;
> > > > > +
> > > > > + wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
> > > > > + mvdev = wqent->mvdev;
> > > > > + ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > + cvq = &mvdev->cvq;
> > > > > + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > + goto out;
> > > > > +
> > > > > + if (!cvq->ready)
> > > > > + goto out;
> > > > > +
> > > > > + while (true) {
> > > > > + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
> > > > > + GFP_ATOMIC);
> > > > > + if (err <= 0)
> > > > > + break;
> > > > > +
> > > > > + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
> > > > > + if (read != sizeof(ctrl))
> > > > > + break;
> > > > > +
> > > > > + switch (ctrl.class) {
> > > > > + case VIRTIO_NET_CTRL_MAC:
> > > > > + status = handle_ctrl_mac(mvdev, ctrl.cmd);
> > > > > + break;
> > > > > +
> > > > > + default:
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + /* Make sure data is written before advancing index */
> > > > > + smp_wmb();
> > > > > +
> > > > > + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
> > > > > + vringh_complete_iotlb(&cvq->vring, cvq->head, write);
> > > > > + vringh_kiov_cleanup(&cvq->riov);
> > > > > + vringh_kiov_cleanup(&cvq->wiov);
> > > > > +
> > > > > + if (vringh_need_notify_iotlb(&cvq->vring))
> > > > > + vringh_notify(&cvq->vring);
> > > > > + }
> > > > > +out:
> > > > > + kfree(wqent);
> > > > > +}
> > > > > +
> > > > > static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
> > > > > {
> > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > + struct mlx5_vdpa_virtqueue *mvq;
> > > > > + struct mlx5_ctrl_wq_ent *wqent;
> > > > > if (!is_index_valid(mvdev, idx))
> > > > > return;
> > > > > + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
> > > > > + if (!mvdev->cvq.ready)
> > > > > + return;
> > > > > +
> > > > > + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
> > > > > + if (!wqent)
> > > > > + return;
> > > > > +
> > > > > + wqent->mvdev = mvdev;
> > > > > + INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
> > > > > + queue_work(mvdev->wq, &wqent->work);
> > > > > + return;
> > > > > + }
> > > > > +
> > > > > + mvq = &ndev->vqs[idx];
> > > > > if (unlikely(!mvq->ready))
> > > > > return;
> > > > > @@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
> > > > > {
> > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > + struct mlx5_vdpa_virtqueue *mvq;
> > > > > if (!is_index_valid(mvdev, idx))
> > > > > return -EINVAL;
> > > > > + if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > + mvdev->cvq.desc_addr = desc_area;
> > > > > + mvdev->cvq.device_addr = device_area;
> > > > > + mvdev->cvq.driver_addr = driver_area;
> > > > > + return 0;
> > > > > + }
> > > > > +
> > > > > + mvq = &ndev->vqs[idx];
> > > > > mvq->desc_addr = desc_area;
> > > > > mvq->device_addr = device_area;
> > > > > mvq->driver_addr = driver_area;
> > > > > @@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > struct mlx5_vdpa_virtqueue *mvq;
> > > > > - if (!is_index_valid(mvdev, idx))
> > > > > + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
> > > > > return;
> > > > > mvq = &ndev->vqs[idx];
> > > > > @@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
> > > > > ndev->event_cbs[idx] = *cb;
> > > > > }
> > > > > +static void mlx5_cvq_notify(struct vringh *vring)
> > > > > +{
> > > > > + struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
> > > > > +
> > > > > + if (!cvq->event_cb.callback)
> > > > > + return;
> > > > > +
> > > > > + cvq->event_cb.callback(cvq->event_cb.private);
> > > > > +}
> > > > > +
> > > > > +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
> > > > > +{
> > > > > + struct mlx5_control_vq *cvq = &mvdev->cvq;
> > > > > +
> > > > > + cvq->ready = ready;
> > > > > + if (!ready)
> > > > > + return;
> > > > > +
> > > > > + cvq->vring.notify = mlx5_cvq_notify;
> > > > > +}
> > > > > +
> > > > > static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
> > > > > {
> > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > + struct mlx5_vdpa_virtqueue *mvq;
> > > > > if (!is_index_valid(mvdev, idx))
> > > > > return;
> > > > > + if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > + set_cvq_ready(mvdev, ready);
> > > > > + return;
> > > > > + }
> > > > > +
> > > > > + mvq = &ndev->vqs[idx];
> > > > > if (!ready)
> > > > > suspend_vq(ndev, mvq);
> > > > > @@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
> > > > > {
> > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > if (!is_index_valid(mvdev, idx))
> > > > > return false;
> > > > > - return mvq->ready;
> > > > > + if (is_ctrl_vq_idx(mvdev, idx))
> > > > > + return mvdev->cvq.ready;
> > > > > +
> > > > > + return ndev->vqs[idx].ready;
> > > > > }
> > > > > static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> > > > > @@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> > > > > {
> > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > + struct mlx5_vdpa_virtqueue *mvq;
> > > > > if (!is_index_valid(mvdev, idx))
> > > > > return -EINVAL;
> > > > > + if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > + mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
> > > >
> > > > Question, is packed virtqueue supported by current mlx5e?
> > > >
> > > > If no, this is fine.
> > > We don't. The hardware might support but the device driver does not
> > > advertise packed virtqueue support.
> >
> >
> > Good to know this. So we're fine.
> >
> > Thanks
> >
> >
> > >
> > > > If yes, we should disable packed and re-enable it after vringh supports
> > > > packed virtqueue.
> > > >
> > > > Other looks good.
> > > >
> > > > Thanks
> > > >
> > > >
> > > > > + return 0;
> > > > > + }
> > > > > +
> > > > > + mvq = &ndev->vqs[idx];
> > > > > if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
> > > > > mlx5_vdpa_warn(mvdev, "can't modify available index\n");
> > > > > return -EINVAL;
> > > > > @@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
> > > > > {
> > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > + struct mlx5_vdpa_virtqueue *mvq;
> > > > > struct mlx5_virtq_attr attr;
> > > > > int err;
> > > > > if (!is_index_valid(mvdev, idx))
> > > > > return -EINVAL;
> > > > > + if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > + state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
> > > > > + return 0;
> > > > > + }
> > > > > +
> > > > > + mvq = &ndev->vqs[idx];
> > > > > /* If the virtq object was destroyed, use the value saved at
> > > > > * the last minute of suspend_vq. This caters for userspace
> > > > > * that cares about emulating the index after vq is stopped.
> > > > > @@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
> > > > > u16 dev_features;
> > > > > dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
> > > > > - ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
> > > > > + ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
> > > > > if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
> > > > > ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
> > > > > ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> > > > > + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> > > > > + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> > > > > +
> > > > > print_features(mvdev, ndev->mvdev.mlx_features, false);
> > > > > return ndev->mvdev.mlx_features;
> > > > > }
> > > > > @@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
> > > > > static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> > > > > {
> > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > + struct mlx5_control_vq *cvq = &mvdev->cvq;
> > > > > int err;
> > > > > int i;
> > > > > @@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> > > > > goto err_vq;
> > > > > }
> > > > > + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> > > > > + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> > > > > + MLX5_CVQ_MAX_ENT, false,
> > > > > + (struct vring_desc *)(uintptr_t)cvq->desc_addr,
> > > > > + (struct vring_avail *)(uintptr_t)cvq->driver_addr,
> > > > > + (struct vring_used *)(uintptr_t)cvq->device_addr);
> > > > > + if (err)
> > > > > + goto err_vq;
> > > > > + }
> > > > > +
> > > > > return 0;
> > > > > err_vq:
> > > > > @@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
> > > > > struct mlx5_vdpa_net *ndev;
> > > > > phys_addr_t addr;
> > > > > - if (!is_index_valid(mvdev, idx))
> > > > > + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
> > > > > return ret;
> > > > > /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
> > > > > @@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > err = mlx5_mpfs_add_mac(pfmdev, config->mac);
> > > > > if (err)
> > > > > goto err_mtu;
> > > > > +
> > > > > + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
> > > > > }
> > > > > + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
> > > > > mvdev->vdev.dma_dev = &mdev->pdev->dev;
> > > > > err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
> > > > > if (err)
> > > > > @@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > if (err)
> > > > > goto err_mr;
> > > > > + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
> > > > > + if (!mvdev->wq) {
> > > > > + err = -ENOMEM;
> > > > > + goto err_res2;
> > > > > + }
> > > > > +
> > > > > + ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
> > > > > mvdev->vdev.mdev = &mgtdev->mgtdev;
> > > > > - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
> > > > > + err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
> > > > > if (err)
> > > > > goto err_reg;
> > > > > @@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > return 0;
> > > > > err_reg:
> > > > > + destroy_workqueue(mvdev->wq);
> > > > > +err_res2:
> > > > > free_resources(ndev);
> > > > > err_mr:
> > > > > mlx5_vdpa_destroy_mr(mvdev);
> > > > > @@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
> > > > > {
> > > > > struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
> > > > > + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
> > > > > + destroy_workqueue(mvdev->wq);
> > > > > _vdpa_unregister_device(dev);
> > > > > mgtdev->ndev = NULL;
> > > > > }
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting
[not found] ` <20210819072438.GB166182@mtl-vdi-166.wap.labs.mlnx>
@ 2021-08-19 7:41 ` Jason Wang
[not found] ` <20210819074819.GA167253@mtl-vdi-166.wap.labs.mlnx>
0 siblings, 1 reply; 10+ messages in thread
From: Jason Wang @ 2021-08-19 7:41 UTC (permalink / raw)
To: Eli Cohen; +Cc: eperezma, virtualization, mst
在 2021/8/19 下午3:24, Eli Cohen 写道:
>>>>> + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
>>>>> + err = iova_cache_get();
>>>> Any reason for using iova cache here?
>>> Isn't it required? Aren't we allocating buffers for the CVQ from
>>> iommu_iova kmem cache?
>> I may miss something here but which buffer did you refer here?
>>
> Aren't the data buffers for the control VQ allocated from this cache?
Sorry, but you mean the buffer managed by the virtio-net or the mlx5e here?
Thanks
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting
[not found] ` <20210819081535.GB167253@mtl-vdi-166.wap.labs.mlnx>
@ 2021-08-19 8:19 ` Jason Wang
0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2021-08-19 8:19 UTC (permalink / raw)
To: Eli Cohen; +Cc: eperezma, virtualization, mst
On Thu, Aug 19, 2021 at 4:15 PM Eli Cohen <elic@nvidia.com> wrote:
>
> On Thu, Aug 19, 2021 at 10:48:19AM +0300, Eli Cohen wrote:
> > On Thu, Aug 19, 2021 at 03:41:52PM +0800, Jason Wang wrote:
> > >
> > > 在 2021/8/19 下午3:24, Eli Cohen 写道:
> > > > > > > > + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
> > > > > > > > + err = iova_cache_get();
> > > > > > > Any reason for using iova cache here?
> > > > > > Isn't it required? Aren't we allocating buffers for the CVQ from
> > > > > > iommu_iova kmem cache?
> > > > > I may miss something here but which buffer did you refer here?
> > > > >
> > > > Aren't the data buffers for the control VQ allocated from this cache?
> > >
> > >
> > > Sorry, but you mean the buffer managed by the virtio-net or the mlx5e here?
> >
> > I assume both directions: virtio_net sends a control message and uses a
> > a buffer to put information in. mlx5_vdpa responds and uses data buffer
> > for the response.
> >
> > Is it not so?
> >
>
> Seems like removing it does not hurt.
> Will remove.
Yes, my understanding is that it is only required if you're using the
iova allocator.
Thanks
>
> > >
> > > Thanks
> > >
> > >
> > > >
> > >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2021-08-19 8:19 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20210817060250.188705-1-elic@nvidia.com>
[not found] ` <20210817060250.188705-3-elic@nvidia.com>
2021-08-19 3:32 ` [PATCH v2 2/6] vdpa/mlx5: function prototype modifications in preparation to control VQ Jason Wang
[not found] ` <20210817060250.188705-4-elic@nvidia.com>
2021-08-19 3:37 ` [PATCH v2 3/6] vdpa/mlx5: Decouple virtqueue callback from struct mlx5_vdpa_virtqueue Jason Wang
[not found] ` <20210817060250.188705-5-elic@nvidia.com>
2021-08-19 3:40 ` [PATCH v2 4/6] vdpa/mlx5: Ensure valid indices are provided Jason Wang
[not found] ` <20210817060250.188705-6-elic@nvidia.com>
2021-08-19 4:04 ` [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting Jason Wang
[not found] ` <20210819060641.GA161591@mtl-vdi-166.wap.labs.mlnx>
2021-08-19 7:12 ` Jason Wang
[not found] ` <20210819072145.GA166182@mtl-vdi-166.wap.labs.mlnx>
2021-08-19 7:37 ` Jason Wang
[not found] ` <20210819072438.GB166182@mtl-vdi-166.wap.labs.mlnx>
2021-08-19 7:41 ` Jason Wang
[not found] ` <20210819074819.GA167253@mtl-vdi-166.wap.labs.mlnx>
[not found] ` <20210819081535.GB167253@mtl-vdi-166.wap.labs.mlnx>
2021-08-19 8:19 ` Jason Wang
[not found] ` <20210817060250.188705-7-elic@nvidia.com>
2021-08-19 4:07 ` [PATCH v2 6/6] vdpa/mlx5: Add multiqueue support Jason Wang
[not found] ` <20210819061547.GD161591@mtl-vdi-166.wap.labs.mlnx>
2021-08-19 7:12 ` Jason Wang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.