All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
       [not found] ` <20220105114646.577224-4-elic@nvidia.com>
@ 2022-01-07  0:33   ` Si-Wei Liu
  2022-01-07  5:08     ` Jason Wang
       [not found]     ` <20220109140956.GA70879@mtl-vdi-166.wap.labs.mlnx>
  0 siblings, 2 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-07  0:33 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization; +Cc: lvivier, eperezma



On 1/5/2022 3:46 AM, Eli Cohen wrote:
> Add wrappers to get/set status and protect these operations with
> cf_mutex to serialize these operations with respect to get/set config
> operations.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>   drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>   drivers/vhost/vdpa.c         |  7 +++----
>   drivers/virtio/virtio_vdpa.c |  3 +--
>   include/linux/vdpa.h         |  3 +++
>   4 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 42d71d60d5dc..5134c83c4a22 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>   static DEFINE_MUTEX(vdpa_dev_mutex);
>   static DEFINE_IDA(vdpa_index_ida);
>   
> +u8 vdpa_get_status(struct vdpa_device *vdev)
> +{
> +	u8 status;
> +
> +	mutex_lock(&vdev->cf_mutex);
> +	status = vdev->config->get_status(vdev);
> +	mutex_unlock(&vdev->cf_mutex);
> +	return status;
> +}
> +EXPORT_SYMBOL(vdpa_get_status);
> +
> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
> +{
> +	mutex_lock(&vdev->cf_mutex);
> +	vdev->config->set_status(vdev, status);
> +	mutex_unlock(&vdev->cf_mutex);
> +}
> +EXPORT_SYMBOL(vdpa_set_status);
> +
>   static struct genl_family vdpa_nl_family;
>   
>   static int vdpa_dev_probe(struct device *d)
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index ebaa373e9b82..d9d499465e2e 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
>   static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
>   {
>   	struct vdpa_device *vdpa = v->vdpa;
> -	const struct vdpa_config_ops *ops = vdpa->config;
>   	u8 status;
>   
> -	status = ops->get_status(vdpa);
> +	status = vdpa_get_status(vdpa);
Not sure why we need to take cf_mutex here. Appears to me only setters 
(set_status and reset) need to take the lock in this function.

>   
>   	if (copy_to_user(statusp, &status, sizeof(status)))
>   		return -EFAULT;
> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
>   	if (copy_from_user(&status, statusp, sizeof(status)))
>   		return -EFAULT;
>   
> -	status_old = ops->get_status(vdpa);
> +	status_old = vdpa_get_status(vdpa);
Ditto.

>   
>   	/*
>   	 * Userspace shouldn't remove status bits unless reset the
> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
>   		if (ret)
>   			return ret;
>   	} else
> -		ops->set_status(vdpa, status);
> +		vdpa_set_status(vdpa, status);
The reset() call in the if branch above needs to take the cf_mutex, the 
same way as that for set_status(). The reset() is effectively 
set_status(vdpa, 0).

Thanks,
-Siwei

>   
>   	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
>   		for (i = 0; i < nvqs; i++)
> diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> index a84b04ba3195..76504559bc25 100644
> --- a/drivers/virtio/virtio_vdpa.c
> +++ b/drivers/virtio/virtio_vdpa.c
> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct virtio_device *vdev)
>   static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status)
>   {
>   	struct vdpa_device *vdpa = vd_get_vdpa(vdev);
> -	const struct vdpa_config_ops *ops = vdpa->config;
>   
> -	return ops->set_status(vdpa, status);
> +	return vdpa_set_status(vdpa, status);
>   }
>   
>   static void virtio_vdpa_reset(struct virtio_device *vdev)
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 9cc4291a79b3..ae047fae2603 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
>   		     void *buf, unsigned int len);
>   void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>   		     const void *buf, unsigned int length);
> +u8 vdpa_get_status(struct vdpa_device *vdev);
> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
> +
>   /**
>    * struct vdpa_mgmtdev_ops - vdpa device ops
>    * @dev_add: Add a vdpa device using alloc and register

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 05/14] vdpa: Allow to configure max data virtqueues
       [not found] ` <20220105114646.577224-6-elic@nvidia.com>
@ 2022-01-07  1:25   ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-07  1:25 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization; +Cc: lvivier, eperezma



On 1/5/2022 3:46 AM, Eli Cohen wrote:
> Add netlink support to configure the max virtqueue pairs for a device.
> At least one pair is required. The maximum is dictated by the device.
>
> Example:
> $ vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 4
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> v6->v7:
> 1.Serialize set_features and reset using cf_mutex to ensure consistency
> with netlink set/get
>
>   drivers/vdpa/vdpa.c          | 15 +++++++++++++--
>   drivers/vhost/vdpa.c         |  2 +-
>   drivers/virtio/virtio_vdpa.c |  2 +-
>   include/linux/vdpa.h         | 19 ++++++++++++++++---
>   4 files changed, 31 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 4494325cae91..96d31b80fdce 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -404,7 +404,7 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
>   	 * If it does happen we assume a legacy guest.
>   	 */
>   	if (!vdev->features_valid)
> -		vdpa_set_features(vdev, 0);
> +		vdpa_set_features(vdev, 0, true);
Can we do it here with an internal unlocked version 
vdpa_set_features_unlocked() without taking the cf_mutex? Such that all 
API users for vdpa_set_features() won't have to change the prototype. It 
looks to me the only place that needs the unlocked API is the vdpa core 
itself, which doesn't need to expose the internal API to other modules, 
is my understanding correct?

In addition, it seems more appropriate to move the vdpa_set_features() 
related changes to a separate patch like patch #3. It's not obvious to 
me how it's logically connected to the code change for the max_vqp 
feature here (if there's anything it may be more relevant to patch #8 of 
this series).

>   	ops->get_config(vdev, offset, buf, len);
>   }
>   
> @@ -581,7 +581,8 @@ vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
>   }
>   
>   #define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
> -				 (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
> +				 (1 << VDPA_ATTR_DEV_NET_CFG_MTU) | \
> +				 (1 << VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
>   
>   static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
>   {
> @@ -607,6 +608,16 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
>   			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
>   		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU);
>   	}
> +	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) {
> +		config.net.max_vq_pairs =
> +			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]);
> +		if (!config.net.max_vq_pairs) {
> +			NL_SET_ERR_MSG_MOD(info->extack,
> +					   "At least one pair of VQs is required");
> +			return -EINVAL;
> +		}
> +		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
> +	}
>   
>   	/* Skip checking capability if user didn't prefer to configure any
>   	 * device networking attributes. It is likely that user might have used
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index d9d499465e2e..c37a63ba620a 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -285,7 +285,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
>   	if (copy_from_user(&features, featurep, sizeof(features)))
>   		return -EFAULT;
>   
> -	if (vdpa_set_features(vdpa, features))
> +	if (vdpa_set_features(vdpa, features, false))
>   		return -EINVAL;
>   
>   	return 0;
> diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> index 76504559bc25..7767a7f0119b 100644
> --- a/drivers/virtio/virtio_vdpa.c
> +++ b/drivers/virtio/virtio_vdpa.c
> @@ -317,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
>   	/* Give virtio_ring a chance to accept features. */
>   	vring_transport_features(vdev);
>   
> -	return vdpa_set_features(vdpa, vdev->features);
> +	return vdpa_set_features(vdpa, vdev->features, false);
>   }
>   
>   static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index ae047fae2603..6d4d7e4fe208 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -101,6 +101,7 @@ struct vdpa_dev_set_config {
>   	struct {
>   		u8 mac[ETH_ALEN];
>   		u16 mtu;
> +		u16 max_vq_pairs;
>   	} net;
>   	u64 mask;
>   };
> @@ -391,17 +392,29 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
>   static inline int vdpa_reset(struct vdpa_device *vdev)
>   {
>   	const struct vdpa_config_ops *ops = vdev->config;
> +	int ret;
>   
> +	mutex_lock(&vdev->cf_mutex);
>   	vdev->features_valid = false;
> -	return ops->reset(vdev);
> +	ret = ops->reset(vdev);
> +	mutex_unlock(&vdev->cf_mutex);
> +	return ret;
>   }
Can we move the vdpa_reset() code change here to patch #3? i.e. to be in 
parallel with set_status() changes.

-Siwei

>   
> -static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
> +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked)
>   {
>   	const struct vdpa_config_ops *ops = vdev->config;
> +	int ret;
> +
> +	if (!locked)
> +		mutex_lock(&vdev->cf_mutex);
>   
>   	vdev->features_valid = true;
> -	return ops->set_driver_features(vdev, features);
> +	ret = ops->set_driver_features(vdev, features);
> +	if (!locked)
> +		mutex_unlock(&vdev->cf_mutex);
> +
> +	return ret;
>   }
>   
>   void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
       [not found] ` <20220105114646.577224-8-elic@nvidia.com>
@ 2022-01-07  1:27   ` Si-Wei Liu
  2022-01-07  1:50     ` Si-Wei Liu
  2022-01-07 18:01     ` Nathan Chancellor
  0 siblings, 2 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-07  1:27 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization; +Cc: lvivier, eperezma



On 1/5/2022 3:46 AM, Eli Cohen wrote:
> Check whether the max number of data virtqueue pairs was provided when a
> adding a new device and verify the new value does not exceed device
> capabilities.
>
> In addition, change the arrays holding virtqueue and callback contexts
> to be dynamically allocated.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> v6 -> v7:
> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>
>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>   1 file changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 4a2149f70f1e..d4720444bf78 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>   	struct mlx5_vq_restore_info ri;
>   };
>   
> -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
> - * provides for driver space allocation
> - */
> -#define MLX5_MAX_SUPPORTED_VQS 16
> -
>   static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>   {
>   	if (unlikely(idx > mvdev->max_idx))
> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>   	struct mlx5_vdpa_dev mvdev;
>   	struct mlx5_vdpa_net_resources res;
>   	struct virtio_net_config config;
> -	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> -	struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
> +	struct mlx5_vdpa_virtqueue *vqs;
> +	struct vdpa_callback *event_cbs;
>   
>   	/* Serialize vq resources creation and destruction. This is required
>   	 * since memory map might change and we need to destroy and create
> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev)
>   {
>   	int i;
>   
> -	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
> +	for (i = 0; i < ndev->mvdev.max_vqs; i++)
>   		suspend_vq(ndev, &ndev->vqs[i]);
>   }
>   
> @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>   	void *in;
>   	int i, j;
>   	int err;
> +	int num;
>   
> -	max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> +	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> +		num = 1;
> +	else
> +		num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
> +
> +	max_rqt = min_t(int, roundup_pow_of_two(num),
>   			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>   	if (max_rqt < 1)
>   		return -EOPNOTSUPP;
> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>   	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>   	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>   	for (i = 0, j = 0; i < max_rqt; i++, j += 2)
> -		list[i] = cpu_to_be32(ndev->vqs[j % ndev->mvdev.max_vqs].virtq_id);
> +		list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
Good catch. LGTM.

Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>

>   
>   	MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>   	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>   	clear_vqs_ready(ndev);
>   	mlx5_vdpa_destroy_mr(&ndev->mvdev);
>   	ndev->mvdev.status = 0;
> -	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
> +	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>   	ndev->mvdev.actual_features = 0;
>   	++mvdev->generation;
>   	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
>   	}
>   	mlx5_vdpa_free_resources(&ndev->mvdev);
>   	mutex_destroy(&ndev->reslock);
> +	kfree(ndev->event_cbs);
> +	kfree(ndev->vqs);
>   }
>   
>   static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   		return -EOPNOTSUPP;
>   	}
>   
> -	/* we save one virtqueue for control virtqueue should we require it */
>   	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
> -	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> +	if (max_vqs < 2) {
> +		dev_warn(mdev->device,
> +			 "%d virtqueues are supported. At least 2 are required\n",
> +			 max_vqs);
> +		return -EAGAIN;
> +	}
> +
> +	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
> +		if (add_config->net.max_vq_pairs > max_vqs / 2)
> +			return -EINVAL;
> +		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
> +	} else {
> +		max_vqs = 2;
> +	}
>   
>   	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
>   				 name, false);
>   	if (IS_ERR(ndev))
>   		return PTR_ERR(ndev);
>   
> +	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
> +	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
> +	if (!ndev->vqs || !ndev->event_cbs) {
> +		err = -ENOMEM;
> +		goto err_alloc;
> +	}
>   	ndev->mvdev.max_vqs = max_vqs;
>   	mvdev = &ndev->mvdev;
>   	mvdev->mdev = mdev;
> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   		mlx5_mpfs_del_mac(pfmdev, config->mac);
>   err_mtu:
>   	mutex_destroy(&ndev->reslock);
> +err_alloc:
>   	put_device(&mvdev->vdev.dev);
>   	return err;
>   }
> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>   	mgtdev->mgtdev.ops = &mdev_ops;
>   	mgtdev->mgtdev.device = mdev->device;
>   	mgtdev->mgtdev.id_table = id_table;
> -	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
> +	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
> +					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>   	mgtdev->madev = madev;
>   
>   	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-07  1:27   ` [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue Si-Wei Liu
@ 2022-01-07  1:50     ` Si-Wei Liu
  2022-01-07  5:43       ` Jason Wang
       [not found]       ` <20220109141023.GB70879@mtl-vdi-166.wap.labs.mlnx>
  2022-01-07 18:01     ` Nathan Chancellor
  1 sibling, 2 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-07  1:50 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization; +Cc: lvivier, eperezma



On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
>
>
> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>> Check whether the max number of data virtqueue pairs was provided when a
>> adding a new device and verify the new value does not exceed device
>> capabilities.
>>
>> In addition, change the arrays holding virtqueue and callback contexts
>> to be dynamically allocated.
>>
>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>> ---
>> v6 -> v7:
>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>
>>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>>   1 file changed, 37 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 4a2149f70f1e..d4720444bf78 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>       struct mlx5_vq_restore_info ri;
>>   };
>>   -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>> - * provides for driver space allocation
>> - */
>> -#define MLX5_MAX_SUPPORTED_VQS 16
>> -
>>   static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>   {
>>       if (unlikely(idx > mvdev->max_idx))
>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>       struct mlx5_vdpa_dev mvdev;
>>       struct mlx5_vdpa_net_resources res;
>>       struct virtio_net_config config;
>> -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>> -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>> +    struct mlx5_vdpa_virtqueue *vqs;
>> +    struct vdpa_callback *event_cbs;
>>         /* Serialize vq resources creation and destruction. This is 
>> required
>>        * since memory map might change and we need to destroy and create
>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net 
>> *ndev)
>>   {
>>       int i;
>>   -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>> +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>           suspend_vq(ndev, &ndev->vqs[i]);
>>   }
>>   @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net 
>> *ndev)
>>       void *in;
>>       int i, j;
>>       int err;
>> +    int num;
>>   -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>> +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>> +        num = 1;
>> +    else
>> +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>> +
>> +    max_rqt = min_t(int, roundup_pow_of_two(num),
>>               1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>       if (max_rqt < 1)
>>           return -EOPNOTSUPP;
>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>       MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>       list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>> -        list[i] = cpu_to_be32(ndev->vqs[j % 
>> ndev->mvdev.max_vqs].virtq_id);
>> +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
> Good catch. LGTM.
>
> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>
Apologies to reply to myself. It looks to me we need to set cur_num_vqs 
to the negotiated num of queues. Otherwise any site referencing 
cur_num_vqs won't work properly. Further, we need to validate 
VIRTIO_NET_F_MQ is present in handle_ctrl_mq() before changing the 
number of queue pairs.

So just disregard my previous R-b for this patch.

Thanks,
-Siwei


>
>>         MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>       err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, 
>> &ndev->res.rqtn);
>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device 
>> *vdev)
>>       clear_vqs_ready(ndev);
>>       mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>       ndev->mvdev.status = 0;
>> -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>> +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * 
>> (mvdev->max_vqs + 1));
>>       ndev->mvdev.actual_features = 0;
>>       ++mvdev->generation;
>>       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device 
>> *vdev)
>>       }
>>       mlx5_vdpa_free_resources(&ndev->mvdev);
>>       mutex_destroy(&ndev->reslock);
>> +    kfree(ndev->event_cbs);
>> +    kfree(ndev->vqs);
>>   }
>>     static struct vdpa_notification_area 
>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct 
>> vdpa_mgmt_dev *v_mdev, const char *name,
>>           return -EOPNOTSUPP;
>>       }
>>   -    /* we save one virtqueue for control virtqueue should we 
>> require it */
>>       max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, 
>> max_num_virtio_queues);
>> -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>> +    if (max_vqs < 2) {
>> +        dev_warn(mdev->device,
>> +             "%d virtqueues are supported. At least 2 are required\n",
>> +             max_vqs);
>> +        return -EAGAIN;
>> +    }
>> +
>> +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>> +        if (add_config->net.max_vq_pairs > max_vqs / 2)
>> +            return -EINVAL;
>> +        max_vqs = min_t(u32, max_vqs, 2 * 
>> add_config->net.max_vq_pairs);
>> +    } else {
>> +        max_vqs = 2;
>> +    }
>>         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, 
>> mdev->device, &mlx5_vdpa_ops,
>>                    name, false);
>>       if (IS_ERR(ndev))
>>           return PTR_ERR(ndev);
>>   +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>> +    ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), 
>> GFP_KERNEL);
>> +    if (!ndev->vqs || !ndev->event_cbs) {
>> +        err = -ENOMEM;
>> +        goto err_alloc;
>> +    }
>>       ndev->mvdev.max_vqs = max_vqs;
>>       mvdev = &ndev->mvdev;
>>       mvdev->mdev = mdev;
>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct 
>> vdpa_mgmt_dev *v_mdev, const char *name,
>>           mlx5_mpfs_del_mac(pfmdev, config->mac);
>>   err_mtu:
>>       mutex_destroy(&ndev->reslock);
>> +err_alloc:
>>       put_device(&mvdev->vdev.dev);
>>       return err;
>>   }
>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device 
>> *adev,
>>       mgtdev->mgtdev.ops = &mdev_ops;
>>       mgtdev->mgtdev.device = mdev->device;
>>       mgtdev->mgtdev.id_table = id_table;
>> -    mgtdev->mgtdev.config_attr_mask = 
>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>> +    mgtdev->mgtdev.config_attr_mask = 
>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>> +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>       mgtdev->madev = madev;
>>         err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 10/14] vdpa: Support reporting max device capabilities
       [not found] ` <20220105114646.577224-11-elic@nvidia.com>
@ 2022-01-07  2:12   ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-07  2:12 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization; +Cc: lvivier, eperezma



On 1/5/2022 3:46 AM, Eli Cohen wrote:
> Add max_supported_vqs and supported_features fields to struct
> vdpa_mgmt_dev. Upstream drivers need to feel these values according to
> the device capabilities.
>
> These values are reported back in a netlink message when showing management
> devices.
>
> Examples:
>
> $ auxiliary/mlx5_core.sf.1:
Missing the exact 'vdpa mgmtdev show ...' command, otherwise:

Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>    supported_classes net
>    max_supported_vqs 257
>    dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
>                 CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
>
> $ vdpa -j mgmtdev show
> {"mgmtdev":{"auxiliary/mlx5_core.sf.1":{"supported_classes":["net"], \
>    "max_supported_vqs":257,"dev_features":["CSUM","GUEST_CSUM","MTU", \
>    "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR", \
>    "VERSION_1","ACCESS_PLATFORM"]}}}
>
> $ vdpa -jp mgmtdev show
> {
>      "mgmtdev": {
>          "auxiliary/mlx5_core.sf.1": {
>              "supported_classes": [ "net" ],
>              "max_supported_vqs": 257,
>              "dev_features": ["CSUM","GUEST_CSUM","MTU","HOST_TSO4", \
>                               "HOST_TSO6","STATUS","CTRL_VQ","MQ", \
>                               "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM"]
>          }
>      }
> }
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> v6 -> v7:
> 1. Fix comment
> 2. Fix error flow
> 3. Add device features
>
>   drivers/vdpa/vdpa.c       | 10 ++++++++++
>   include/linux/vdpa.h      |  2 ++
>   include/uapi/linux/vdpa.h |  2 ++
>   3 files changed, 14 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 60cf821175fa..34fa251db8cc 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -514,6 +514,16 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m
>   		err = -EMSGSIZE;
>   		goto msg_err;
>   	}
> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,
> +			mdev->max_supported_vqs)) {
> +		err = -EMSGSIZE;
> +		goto msg_err;
> +	}
> +	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_SUPPORTED_FEATURES,
> +			      mdev->supported_features, VDPA_ATTR_PAD)) {
> +		err = -EMSGSIZE;
> +		goto msg_err;
> +	}
>   
>   	genlmsg_end(msg, hdr);
>   	return 0;
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 6d4d7e4fe208..a6047fd6cf12 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -460,6 +460,8 @@ struct vdpa_mgmt_dev {
>   	const struct virtio_device_id *id_table;
>   	u64 config_attr_mask;
>   	struct list_head list;
> +	u64 supported_features;
> +	u32 max_supported_vqs;
>   };
>   
>   int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
> index db3738ef3beb..1061d8d2d09d 100644
> --- a/include/uapi/linux/vdpa.h
> +++ b/include/uapi/linux/vdpa.h
> @@ -44,6 +44,8 @@ enum vdpa_attr {
>   	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */
>   
>   	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
> +	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
> +	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>   	/* new attributes must be added above here */
>   	VDPA_ATTR_MAX,
>   };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 11/14] vdpa/mlx5: Report max device capabilities
       [not found] ` <20220105114646.577224-12-elic@nvidia.com>
@ 2022-01-07  2:12   ` Si-Wei Liu
  2022-01-07  5:49     ` Jason Wang
  0 siblings, 1 reply; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-07  2:12 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization; +Cc: lvivier, eperezma



On 1/5/2022 3:46 AM, Eli Cohen wrote:
> Configure max supported virtqueues and features on the management
> device.
> This info can be retrieved using:
>
> $ vdpa mgmtdev show
> auxiliary/mlx5_core.sf.1:
>    supported_classes net
>    max_supported_vqs 257
>    dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
>                 CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
> ---
> v6 -> v7:
> 1. Add supported features
>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 35 ++++++++++++++++++++-----------
>   1 file changed, 23 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index ac4f6794c03c..a0f808ee24d6 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -1867,22 +1867,29 @@ static u64 mlx_to_vritio_features(u16 dev_features)
>   	return result;
>   }
>   
> +static u64 get_supported_features(struct mlx5_core_dev *mdev)
> +{
> +	u64 mlx_vdpa_features = 0;
> +	u16 dev_features;
> +
> +	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
> +	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
> +	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
> +		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
> +	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> +	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> +	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> +	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
> +	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
> +	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
> +
> +	return mlx_vdpa_features;
> +}
> +
>   static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
>   {
>   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> -	u16 dev_features;
> -
> -	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
> -	ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
> -	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
> -		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
> -	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> -	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> -	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> -	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
> -	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
> -	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MTU);
>   
>   	print_features(mvdev, ndev->mvdev.mlx_features, false);
>   	return ndev->mvdev.mlx_features;
> @@ -2570,6 +2577,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   		err = -ENOMEM;
>   		goto err_alloc;
>   	}
> +	ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
>   	ndev->mvdev.max_vqs = max_vqs;
>   	mvdev = &ndev->mvdev;
>   	mvdev->mdev = mdev;
> @@ -2695,6 +2703,9 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>   	mgtdev->mgtdev.id_table = id_table;
>   	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>   					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
> +	mgtdev->mgtdev.max_supported_vqs =
> +		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
> +	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
>   	mgtdev->madev = madev;
>   
>   	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-07  0:33   ` [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex Si-Wei Liu
@ 2022-01-07  5:08     ` Jason Wang
  2022-01-08  1:23       ` Si-Wei Liu
       [not found]     ` <20220109140956.GA70879@mtl-vdi-166.wap.labs.mlnx>
  1 sibling, 1 reply; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:08 UTC (permalink / raw)
  To: Si-Wei Liu, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma


在 2022/1/7 上午8:33, Si-Wei Liu 写道:
>
>
> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>> Add wrappers to get/set status and protect these operations with
>> cf_mutex to serialize these operations with respect to get/set config
>> operations.
>>
>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>> ---
>>   drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>>   drivers/vhost/vdpa.c         |  7 +++----
>>   drivers/virtio/virtio_vdpa.c |  3 +--
>>   include/linux/vdpa.h         |  3 +++
>>   4 files changed, 26 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>> index 42d71d60d5dc..5134c83c4a22 100644
>> --- a/drivers/vdpa/vdpa.c
>> +++ b/drivers/vdpa/vdpa.c
>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>>   static DEFINE_MUTEX(vdpa_dev_mutex);
>>   static DEFINE_IDA(vdpa_index_ida);
>>   +u8 vdpa_get_status(struct vdpa_device *vdev)
>> +{
>> +    u8 status;
>> +
>> +    mutex_lock(&vdev->cf_mutex);
>> +    status = vdev->config->get_status(vdev);
>> +    mutex_unlock(&vdev->cf_mutex);
>> +    return status;
>> +}
>> +EXPORT_SYMBOL(vdpa_get_status);
>> +
>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
>> +{
>> +    mutex_lock(&vdev->cf_mutex);
>> +    vdev->config->set_status(vdev, status);
>> +    mutex_unlock(&vdev->cf_mutex);
>> +}
>> +EXPORT_SYMBOL(vdpa_set_status);
>> +
>>   static struct genl_family vdpa_nl_family;
>>     static int vdpa_dev_probe(struct device *d)
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index ebaa373e9b82..d9d499465e2e 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct 
>> vhost_vdpa *v, u8 __user *argp)
>>   static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user 
>> *statusp)
>>   {
>>       struct vdpa_device *vdpa = v->vdpa;
>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>       u8 status;
>>   -    status = ops->get_status(vdpa);
>> +    status = vdpa_get_status(vdpa);
> Not sure why we need to take cf_mutex here. Appears to me only setters 
> (set_status and reset) need to take the lock in this function.


You may be right but it doesn't harm and it is guaranteed to be correct 
if we protect it with mutex here.

Thanks


>
>>         if (copy_to_user(statusp, &status, sizeof(status)))
>>           return -EFAULT;
>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct 
>> vhost_vdpa *v, u8 __user *statusp)
>>       if (copy_from_user(&status, statusp, sizeof(status)))
>>           return -EFAULT;
>>   -    status_old = ops->get_status(vdpa);
>> +    status_old = vdpa_get_status(vdpa);
> Ditto.
>
>>         /*
>>        * Userspace shouldn't remove status bits unless reset the
>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct 
>> vhost_vdpa *v, u8 __user *statusp)
>>           if (ret)
>>               return ret;
>>       } else
>> -        ops->set_status(vdpa, status);
>> +        vdpa_set_status(vdpa, status);
> The reset() call in the if branch above needs to take the cf_mutex, 
> the same way as that for set_status(). The reset() is effectively 
> set_status(vdpa, 0).
>
> Thanks,
> -Siwei
>
>>         if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & 
>> VIRTIO_CONFIG_S_DRIVER_OK))
>>           for (i = 0; i < nvqs; i++)
>> diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
>> index a84b04ba3195..76504559bc25 100644
>> --- a/drivers/virtio/virtio_vdpa.c
>> +++ b/drivers/virtio/virtio_vdpa.c
>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct 
>> virtio_device *vdev)
>>   static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 
>> status)
>>   {
>>       struct vdpa_device *vdpa = vd_get_vdpa(vdev);
>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>   -    return ops->set_status(vdpa, status);
>> +    return vdpa_set_status(vdpa, status);
>>   }
>>     static void virtio_vdpa_reset(struct virtio_device *vdev)
>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>> index 9cc4291a79b3..ae047fae2603 100644
>> --- a/include/linux/vdpa.h
>> +++ b/include/linux/vdpa.h
>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, 
>> unsigned int offset,
>>                void *buf, unsigned int len);
>>   void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>>                const void *buf, unsigned int length);
>> +u8 vdpa_get_status(struct vdpa_device *vdev);
>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
>> +
>>   /**
>>    * struct vdpa_mgmtdev_ops - vdpa device ops
>>    * @dev_add: Add a vdpa device using alloc and register
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 04/14] vdpa: Read device configuration only if FEATURES_OK
       [not found] ` <20220105114646.577224-5-elic@nvidia.com>
@ 2022-01-07  5:14   ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:14 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma, si-wei.liu


在 2022/1/5 下午7:46, Eli Cohen 写道:
> Avoid reading device configuration during feature negotiation. Read
> device status and verify that VIRTIO_CONFIG_S_FEATURES_OK is set.
>
> Protect the entire operation, including configuration read with cf_mutex
> to ensure integrity of the results.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>


Acked-by: Jason Wang <jasowang@redhat.com>


> ---
> v5 -> v6:
> 1. Use cf_mutex to serialize the entire operations
> ---
>   drivers/vdpa/vdpa.c | 45 +++++++++++++++++++++++++++++++++------------
>   1 file changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 5134c83c4a22..4494325cae91 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -393,6 +393,21 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
>   }
>   EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
>   
> +static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
> +				     unsigned int offset,
> +				     void *buf, unsigned int len)
> +{
> +	const struct vdpa_config_ops *ops = vdev->config;
> +
> +	/*
> +	 * Config accesses aren't supposed to trigger before features are set.
> +	 * If it does happen we assume a legacy guest.
> +	 */
> +	if (!vdev->features_valid)
> +		vdpa_set_features(vdev, 0);
> +	ops->get_config(vdev, offset, buf, len);
> +}
> +
>   /**
>    * vdpa_get_config - Get one or more device configuration fields.
>    * @vdev: vdpa device to operate on
> @@ -403,16 +418,8 @@ EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
>   void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
>   		     void *buf, unsigned int len)
>   {
> -	const struct vdpa_config_ops *ops = vdev->config;
> -
>   	mutex_lock(&vdev->cf_mutex);
> -	/*
> -	 * Config accesses aren't supposed to trigger before features are set.
> -	 * If it does happen we assume a legacy guest.
> -	 */
> -	if (!vdev->features_valid)
> -		vdpa_set_features(vdev, 0);
> -	ops->get_config(vdev, offset, buf, len);
> +	vdpa_get_config_unlocked(vdev, offset, buf, len);
>   	mutex_unlock(&vdev->cf_mutex);
>   }
>   EXPORT_SYMBOL_GPL(vdpa_get_config);
> @@ -813,7 +820,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
>   	u64 features;
>   	u16 val_u16;
>   
> -	vdpa_get_config(vdev, 0, &config, sizeof(config));
> +	vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
>   
>   	if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac),
>   		    config.mac))
> @@ -838,12 +845,23 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
>   {
>   	u32 device_id;
>   	void *hdr;
> +	u8 status;
>   	int err;
>   
> +	mutex_lock(&vdev->cf_mutex);
> +	status = vdev->config->get_status(vdev);
> +	if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
> +		NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
> +		err = -EAGAIN;
> +		goto out;
> +	}
> +
>   	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>   			  VDPA_CMD_DEV_CONFIG_GET);
> -	if (!hdr)
> -		return -EMSGSIZE;
> +	if (!hdr) {
> +		err = -EMSGSIZE;
> +		goto out;
> +	}
>   
>   	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
>   		err = -EMSGSIZE;
> @@ -867,11 +885,14 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
>   	if (err)
>   		goto msg_err;
>   
> +	mutex_unlock(&vdev->cf_mutex);
>   	genlmsg_end(msg, hdr);
>   	return 0;
>   
>   msg_err:
>   	genlmsg_cancel(msg, hdr);
> +out:
> +	mutex_unlock(&vdev->cf_mutex);
>   	return err;
>   }
>   

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-07  1:50     ` Si-Wei Liu
@ 2022-01-07  5:43       ` Jason Wang
  2022-01-08  1:38         ` Si-Wei Liu
       [not found]       ` <20220109141023.GB70879@mtl-vdi-166.wap.labs.mlnx>
  1 sibling, 1 reply; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:43 UTC (permalink / raw)
  To: Si-Wei Liu, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma


在 2022/1/7 上午9:50, Si-Wei Liu 写道:
>
>
> On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
>>
>>
>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>> Check whether the max number of data virtqueue pairs was provided 
>>> when a
>>> adding a new device and verify the new value does not exceed device
>>> capabilities.
>>>
>>> In addition, change the arrays holding virtqueue and callback contexts
>>> to be dynamically allocated.
>>>
>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>> ---
>>> v6 -> v7:
>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>>
>>>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 
>>> ++++++++++++++++++++++---------
>>>   1 file changed, 37 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> index 4a2149f70f1e..d4720444bf78 100644
>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>>       struct mlx5_vq_restore_info ri;
>>>   };
>>>   -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>> - * provides for driver space allocation
>>> - */
>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>> -
>>>   static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>>   {
>>>       if (unlikely(idx > mvdev->max_idx))
>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>>       struct mlx5_vdpa_dev mvdev;
>>>       struct mlx5_vdpa_net_resources res;
>>>       struct virtio_net_config config;
>>> -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>>> -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>>> +    struct mlx5_vdpa_virtqueue *vqs;
>>> +    struct vdpa_callback *event_cbs;
>>>         /* Serialize vq resources creation and destruction. This is 
>>> required
>>>        * since memory map might change and we need to destroy and 
>>> create
>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net 
>>> *ndev)
>>>   {
>>>       int i;
>>>   -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>>> +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>>           suspend_vq(ndev, &ndev->vqs[i]);
>>>   }
>>>   @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net 
>>> *ndev)
>>>       void *in;
>>>       int i, j;
>>>       int err;
>>> +    int num;
>>>   -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>>> +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>>> +        num = 1;
>>> +    else
>>> +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>>> +
>>> +    max_rqt = min_t(int, roundup_pow_of_two(num),
>>>               1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>>       if (max_rqt < 1)
>>>           return -EOPNOTSUPP;
>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>>       MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>>       list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>>       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>>> -        list[i] = cpu_to_be32(ndev->vqs[j % 
>>> ndev->mvdev.max_vqs].virtq_id);
>>> +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>> Good catch. LGTM.
>>
>> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>
> Apologies to reply to myself. It looks to me we need to set 
> cur_num_vqs to the negotiated num of queues. Otherwise any site 
> referencing cur_num_vqs won't work properly. Further, we need to 
> validate VIRTIO_NET_F_MQ is present in handle_ctrl_mq() before 
> changing the number of queue pairs.


Such validation is not mandated in the spec. And if we want to do that, 
it needs to be done in a separate patch.

Thanks


>
> So just disregard my previous R-b for this patch.
>
> Thanks,
> -Siwei
>
>
>>
>>>         MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>>       err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, 
>>> &ndev->res.rqtn);
>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device 
>>> *vdev)
>>>       clear_vqs_ready(ndev);
>>>       mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>>       ndev->mvdev.status = 0;
>>> -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>>> +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * 
>>> (mvdev->max_vqs + 1));
>>>       ndev->mvdev.actual_features = 0;
>>>       ++mvdev->generation;
>>>       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device 
>>> *vdev)
>>>       }
>>>       mlx5_vdpa_free_resources(&ndev->mvdev);
>>>       mutex_destroy(&ndev->reslock);
>>> +    kfree(ndev->event_cbs);
>>> +    kfree(ndev->vqs);
>>>   }
>>>     static struct vdpa_notification_area 
>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct 
>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>           return -EOPNOTSUPP;
>>>       }
>>>   -    /* we save one virtqueue for control virtqueue should we 
>>> require it */
>>>       max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, 
>>> max_num_virtio_queues);
>>> -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>>> +    if (max_vqs < 2) {
>>> +        dev_warn(mdev->device,
>>> +             "%d virtqueues are supported. At least 2 are required\n",
>>> +             max_vqs);
>>> +        return -EAGAIN;
>>> +    }
>>> +
>>> +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>>> +        if (add_config->net.max_vq_pairs > max_vqs / 2)
>>> +            return -EINVAL;
>>> +        max_vqs = min_t(u32, max_vqs, 2 * 
>>> add_config->net.max_vq_pairs);
>>> +    } else {
>>> +        max_vqs = 2;
>>> +    }
>>>         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, 
>>> mdev->device, &mlx5_vdpa_ops,
>>>                    name, false);
>>>       if (IS_ERR(ndev))
>>>           return PTR_ERR(ndev);
>>>   +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>>> +    ndev->event_cbs = kcalloc(max_vqs + 1, 
>>> sizeof(*ndev->event_cbs), GFP_KERNEL);
>>> +    if (!ndev->vqs || !ndev->event_cbs) {
>>> +        err = -ENOMEM;
>>> +        goto err_alloc;
>>> +    }
>>>       ndev->mvdev.max_vqs = max_vqs;
>>>       mvdev = &ndev->mvdev;
>>>       mvdev->mdev = mdev;
>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct 
>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>           mlx5_mpfs_del_mac(pfmdev, config->mac);
>>>   err_mtu:
>>>       mutex_destroy(&ndev->reslock);
>>> +err_alloc:
>>>       put_device(&mvdev->vdev.dev);
>>>       return err;
>>>   }
>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device 
>>> *adev,
>>>       mgtdev->mgtdev.ops = &mdev_ops;
>>>       mgtdev->mgtdev.device = mdev->device;
>>>       mgtdev->mgtdev.id_table = id_table;
>>> -    mgtdev->mgtdev.config_attr_mask = 
>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>>> +    mgtdev->mgtdev.config_attr_mask = 
>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>>> +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>>       mgtdev->madev = madev;
>>>         err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
>>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 08/14] vdpa: Add support for returning device configuration information
       [not found] ` <20220105114646.577224-9-elic@nvidia.com>
@ 2022-01-07  5:46   ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:46 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma, si-wei.liu


在 2022/1/5 下午7:46, Eli Cohen 写道:
> Add netlink attribute to store the negotiated features. This can be used
> by userspace to get the current state of the vdpa instance.
>
> Examples:
>
> $ vdpa dev config show vdpa-a
> vdpa-a: mac 00:00:00:00:88:88 link up link_announce false max_vq_pairs 16 mtu 1500
>    negotiated_features CSUM GUEST_CSUM MTU MAC HOST_TSO4 HOST_TSO6 STATUS \
>    CTRL_VQ MQ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
>
> $ vdpa -j dev config show vdpa-a
> {"config":{"vdpa-a":{"mac":"00:00:00:00:88:88","link ":"up","link_announce":false, \
>   "max_vq_pairs":16,"mtu":1500,"negotiated_features":["CSUM","GUEST_CSUM","MTU","MAC", \
>   "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR","VERSION_1", \
>   "ACCESS_PLATFORM"]}}}
>
> $ vdpa -jp dev config show vdpa-a
> {
>      "config": {
>          "vdpa-a": {
>              "mac": "00:00:00:00:88:88",
>              "link ": "up",
>              "link_announce ": false,
>              "max_vq_pairs": 16,
>              "mtu": 1500,
>              "negotiated_features": [
> "CSUM","GUEST_CSUM","MTU","MAC","HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ", \
> "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM"
> ]
>          }
>      }
> }
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>


Acked-by: Jason Wang <jasowang@redhat.com>


> ---
>   drivers/vdpa/vdpa.c       | 3 +++
>   include/uapi/linux/vdpa.h | 4 ++++
>   2 files changed, 7 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 96d31b80fdce..60cf821175fa 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -846,6 +846,9 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
>   		return -EMSGSIZE;
>   
>   	features = vdev->config->get_driver_features(vdev);
> +	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features,
> +			      VDPA_ATTR_PAD))
> +		return -EMSGSIZE;
>   
>   	return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config);
>   }
> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
> index a252f06f9dfd..db3738ef3beb 100644
> --- a/include/uapi/linux/vdpa.h
> +++ b/include/uapi/linux/vdpa.h
> @@ -23,6 +23,9 @@ enum vdpa_command {
>   enum vdpa_attr {
>   	VDPA_ATTR_UNSPEC,
>   
> +	/* Pad attribute for 64b alignment */
> +	VDPA_ATTR_PAD = VDPA_ATTR_UNSPEC,
> +
>   	/* bus name (optional) + dev name together make the parent device handle */
>   	VDPA_ATTR_MGMTDEV_BUS_NAME,		/* string */
>   	VDPA_ATTR_MGMTDEV_DEV_NAME,		/* string */
> @@ -40,6 +43,7 @@ enum vdpa_attr {
>   	VDPA_ATTR_DEV_NET_CFG_MAX_VQP,		/* u16 */
>   	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */
>   
> +	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>   	/* new attributes must be added above here */
>   	VDPA_ATTR_MAX,
>   };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 11/14] vdpa/mlx5: Report max device capabilities
  2022-01-07  2:12   ` [PATCH v7 11/14] vdpa/mlx5: Report " Si-Wei Liu
@ 2022-01-07  5:49     ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:49 UTC (permalink / raw)
  To: Si-Wei Liu, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma


在 2022/1/7 上午10:12, Si-Wei Liu 写道:
>
>
> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>> Configure max supported virtqueues and features on the management
>> device.
>> This info can be retrieved using:
>>
>> $ vdpa mgmtdev show
>> auxiliary/mlx5_core.sf.1:
>>    supported_classes net
>>    max_supported_vqs 257
>>    dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS 
>> CTRL_VQ MQ \
>>                 CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
>>
>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>


Acked-by: Jason Wang <jasowang@redhat.com>


>> ---
>> v6 -> v7:
>> 1. Add supported features
>>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 35 ++++++++++++++++++++-----------
>>   1 file changed, 23 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index ac4f6794c03c..a0f808ee24d6 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -1867,22 +1867,29 @@ static u64 mlx_to_vritio_features(u16 
>> dev_features)
>>       return result;
>>   }
>>   +static u64 get_supported_features(struct mlx5_core_dev *mdev)
>> +{
>> +    u64 mlx_vdpa_features = 0;
>> +    u16 dev_features;
>> +
>> +    dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, 
>> device_features_bits_mask);
>> +    mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
>> +    if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
>> +        mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
>> +    mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
>> +    mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
>> +    mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
>> +    mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>> +    mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
>> +    mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
>> +
>> +    return mlx_vdpa_features;
>> +}
>> +
>>   static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
>>   {
>>       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>> -    u16 dev_features;
>> -
>> -    dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, 
>> device_features_bits_mask);
>> -    ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
>> -    if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
>> -        ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
>> -    ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
>> -    ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
>> -    ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
>> -    ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>> -    ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
>> -    ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MTU);
>>         print_features(mvdev, ndev->mvdev.mlx_features, false);
>>       return ndev->mvdev.mlx_features;
>> @@ -2570,6 +2577,7 @@ static int mlx5_vdpa_dev_add(struct 
>> vdpa_mgmt_dev *v_mdev, const char *name,
>>           err = -ENOMEM;
>>           goto err_alloc;
>>       }
>> +    ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
>>       ndev->mvdev.max_vqs = max_vqs;
>>       mvdev = &ndev->mvdev;
>>       mvdev->mdev = mdev;
>> @@ -2695,6 +2703,9 @@ static int mlx5v_probe(struct auxiliary_device 
>> *adev,
>>       mgtdev->mgtdev.id_table = id_table;
>>       mgtdev->mgtdev.config_attr_mask = 
>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>>                         BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>> +    mgtdev->mgtdev.max_supported_vqs =
>> +        MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
>> +    mgtdev->mgtdev.supported_features = get_supported_features(mdev);
>>       mgtdev->madev = madev;
>>         err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 12/14] vdpa/vdpa_sim: Configure max supported virtqueues
       [not found] ` <20220105114646.577224-13-elic@nvidia.com>
@ 2022-01-07  5:50   ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:50 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma, si-wei.liu


在 2022/1/5 下午7:46, Eli Cohen 写道:
> Configure max supported virtqueues on the management device.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>


Acked-by: Jason Wang <jasowang@redhat.com>


> ---
>   drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> index 76dd24abc791..46aabc73263a 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> @@ -191,6 +191,7 @@ static struct vdpa_mgmt_dev mgmt_dev = {
>   	.ops = &vdpasim_net_mgmtdev_ops,
>   	.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
>   			     1 << VDPA_ATTR_DEV_NET_CFG_MTU),
> +	.max_supported_vqs = VDPASIM_NET_VQ_NUM,
>   };
>   
>   static int __init vdpasim_net_init(void)

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 13/14] vdpa: Use BIT_ULL for bit operations
       [not found] ` <20220105114646.577224-14-elic@nvidia.com>
@ 2022-01-07  5:51   ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:51 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma, si-wei.liu


在 2022/1/5 下午7:46, Eli Cohen 写道:
> All masks in this file are 64 bits. Change BIT to BIT_ULL.
>
> Other occurences use (1 << val) which yields a 32 bit value. Change them
> to use BIT_ULL too.
>
> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com>
> Signed-off-by: Eli Cohen <elic@nvidia.com>


Acked-by: Jason Wang <jasowang@redhat.com>


> ---
>   drivers/vdpa/vdpa.c | 12 ++++++------
>   1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 34fa251db8cc..4380367d00b5 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -590,9 +590,9 @@ vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
>   	return msg->len;
>   }
>   
> -#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
> -				 (1 << VDPA_ATTR_DEV_NET_CFG_MTU) | \
> -				 (1 << VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
> +#define VDPA_DEV_NET_ATTRS_MASK (BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
> +				 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)     | \
> +				 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
>   
>   static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
>   {
> @@ -611,12 +611,12 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
>   	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
>   		macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
>   		memcpy(config.net.mac, macaddr, sizeof(config.net.mac));
> -		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
> +		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>   	}
>   	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) {
>   		config.net.mtu =
>   			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
> -		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU);
> +		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
>   	}
>   	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) {
>   		config.net.max_vq_pairs =
> @@ -828,7 +828,7 @@ static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev,
>   {
>   	u16 val_u16;
>   
> -	if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0)
> +	if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0)
>   		return 0;
>   
>   	val_u16 = le16_to_cpu(config->max_virtqueue_pairs);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 14/14] vdpa/vdpa_sim_net: Report max device capabilities
       [not found] ` <20220105114646.577224-15-elic@nvidia.com>
@ 2022-01-07  5:51   ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-07  5:51 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma, si-wei.liu


在 2022/1/5 下午7:46, Eli Cohen 写道:
> Configure max supported virtqueues features on the management device.
> This info can be retrieved using:
>
> $ vdpa mgmtdev show
> vdpasim_net:
>    supported_classes net
>    max_supported_vqs 2
>    dev_features MAC ANY_LAYOUT VERSION_1 ACCESS_PLATFORM
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>


Acked-by: Jason Wang <jasowang@redhat.com>


> ---
>   drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> index 46aabc73263a..d5324f6fd8c7 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> @@ -192,6 +192,7 @@ static struct vdpa_mgmt_dev mgmt_dev = {
>   	.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
>   			     1 << VDPA_ATTR_DEV_NET_CFG_MTU),
>   	.max_supported_vqs = VDPASIM_NET_VQ_NUM,
> +	.supported_features = VDPASIM_NET_FEATURES,
>   };
>   
>   static int __init vdpasim_net_init(void)

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-07  1:27   ` [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue Si-Wei Liu
  2022-01-07  1:50     ` Si-Wei Liu
@ 2022-01-07 18:01     ` Nathan Chancellor
  2022-01-08  1:43         ` Si-Wei Liu
  1 sibling, 1 reply; 40+ messages in thread
From: Nathan Chancellor @ 2022-01-07 18:01 UTC (permalink / raw)
  To: Eli Cohen
  Cc: mst, jasowang, virtualization, lvivier, eperezma, llvm, Si-Wei Liu

Apologies if this reply is somewhat mangled. This patch did not appear
to make it to the mailing list so I had to use another reply that did to
base it on.

> On 1/5/2022 3:46 AM, Eli Cohen wrote:
> Check whether the max number of data virtqueue pairs was provided when a
> adding a new device and verify the new value does not exceed device
> capabilities.
> 
> In addition, change the arrays holding virtqueue and callback contexts
> to be dynamically allocated.
> 
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
> v6 -> v7:
> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
> 
>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>   1 file changed, 37 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 4a2149f70f1e..d4720444bf78 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>   	struct mlx5_vq_restore_info ri;
>   };
> -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
> - * provides for driver space allocation
> - */
> -#define MLX5_MAX_SUPPORTED_VQS 16
> -
>   static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>   {
>   	if (unlikely(idx > mvdev->max_idx))
> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>   	struct mlx5_vdpa_dev mvdev;
>   	struct mlx5_vdpa_net_resources res;
>   	struct virtio_net_config config;
> -	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> -	struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
> +	struct mlx5_vdpa_virtqueue *vqs;
> +	struct vdpa_callback *event_cbs;
>   	/* Serialize vq resources creation and destruction. This is required
>   	 * since memory map might change and we need to destroy and create
> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev)
>   {
>   	int i;
> -	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
> +	for (i = 0; i < ndev->mvdev.max_vqs; i++)
>   		suspend_vq(ndev, &ndev->vqs[i]);
>   }
> @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>   	void *in;
>   	int i, j;
>   	int err;
> +	int num;
> -	max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> +	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> +		num = 1;
> +	else
> +		num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
> +
> +	max_rqt = min_t(int, roundup_pow_of_two(num),
>   			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>   	if (max_rqt < 1)
>   		return -EOPNOTSUPP;
> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>   	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>   	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>   	for (i = 0, j = 0; i < max_rqt; i++, j += 2)
> -		list[i] = cpu_to_be32(ndev->vqs[j % ndev->mvdev.max_vqs].virtq_id);
> +		list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>   	MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>   	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>   	clear_vqs_ready(ndev);
>   	mlx5_vdpa_destroy_mr(&ndev->mvdev);
>   	ndev->mvdev.status = 0;
> -	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
> +	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>   	ndev->mvdev.actual_features = 0;
>   	++mvdev->generation;
>   	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
>   	}
>   	mlx5_vdpa_free_resources(&ndev->mvdev);
>   	mutex_destroy(&ndev->reslock);
> +	kfree(ndev->event_cbs);
> +	kfree(ndev->vqs);
>   }
>   static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   		return -EOPNOTSUPP;
>   	}
> -	/* we save one virtqueue for control virtqueue should we require it */
>   	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
> -	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> +	if (max_vqs < 2) {
> +		dev_warn(mdev->device,
> +			 "%d virtqueues are supported. At least 2 are required\n",
> +			 max_vqs);
> +		return -EAGAIN;
> +	}
> +
> +	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
> +		if (add_config->net.max_vq_pairs > max_vqs / 2)
> +			return -EINVAL;
> +		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
> +	} else {
> +		max_vqs = 2;
> +	}
>   	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
>   				 name, false);
>   	if (IS_ERR(ndev))
>   		return PTR_ERR(ndev);
> +	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
> +	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
> +	if (!ndev->vqs || !ndev->event_cbs) {
> +		err = -ENOMEM;
> +		goto err_alloc;
> +	}

Clang warns:

drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
        if (!ndev->vqs || !ndev->event_cbs) {
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/vdpa/mlx5/net/mlx5_vnet.c:2660:14: note: uninitialized use occurs here
        put_device(&mvdev->vdev.dev);
                    ^~~~~
drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:2: note: remove the 'if' if its condition is always false
        if (!ndev->vqs || !ndev->event_cbs) {
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever '||' condition is true [-Werror,-Wsometimes-uninitialized]
        if (!ndev->vqs || !ndev->event_cbs) {
            ^~~~~~~~~~
drivers/vdpa/mlx5/net/mlx5_vnet.c:2660:14: note: uninitialized use occurs here
        put_device(&mvdev->vdev.dev);
                    ^~~~~
drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: note: remove the '||' if its condition is always false
        if (!ndev->vqs || !ndev->event_cbs) {
            ^~~~~~~~~~~~~
drivers/vdpa/mlx5/net/mlx5_vnet.c:2534:29: note: initialize the variable 'mvdev' to silence this warning
        struct mlx5_vdpa_dev *mvdev;
                                   ^
                                    = NULL
2 errors generated.

I was going to send a patch just moving "err_alloc" right above "return err;"
but I don't think that is a proper fix. I think this patch is going to
result in memory leaks on the err_mpfs and err_mtu paths, as ndev->vqs
and ndev->event_cbs will have been allocated but they are only cleaned
up in mlx5_vdpa_free_resources(). Additionally, I don't think the
results of these allocations should be checked together, because one
could succeed and the other could fail, meaning one needs to be cleaned
up while the other doesn't.

Cheers,
Nathan

>   	ndev->mvdev.max_vqs = max_vqs;
>   	mvdev = &ndev->mvdev;
>   	mvdev->mdev = mdev;
> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   		mlx5_mpfs_del_mac(pfmdev, config->mac);
>   err_mtu:
>   	mutex_destroy(&ndev->reslock);
> +err_alloc:
>   	put_device(&mvdev->vdev.dev);
>   	return err;
>   }
> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>   	mgtdev->mgtdev.ops = &mdev_ops;
>   	mgtdev->mgtdev.device = mdev->device;
>   	mgtdev->mgtdev.id_table = id_table;
> -	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
> +	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
> +					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>   	mgtdev->madev = madev;
>   	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-07  5:08     ` Jason Wang
@ 2022-01-08  1:23       ` Si-Wei Liu
  2022-01-10  6:05         ` Jason Wang
  0 siblings, 1 reply; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-08  1:23 UTC (permalink / raw)
  To: Jason Wang, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma



On 1/6/2022 9:08 PM, Jason Wang wrote:
>
> 在 2022/1/7 上午8:33, Si-Wei Liu 写道:
>>
>>
>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>> Add wrappers to get/set status and protect these operations with
>>> cf_mutex to serialize these operations with respect to get/set config
>>> operations.
>>>
>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>> ---
>>>   drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>>>   drivers/vhost/vdpa.c         |  7 +++----
>>>   drivers/virtio/virtio_vdpa.c |  3 +--
>>>   include/linux/vdpa.h         |  3 +++
>>>   4 files changed, 26 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>> index 42d71d60d5dc..5134c83c4a22 100644
>>> --- a/drivers/vdpa/vdpa.c
>>> +++ b/drivers/vdpa/vdpa.c
>>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>>>   static DEFINE_MUTEX(vdpa_dev_mutex);
>>>   static DEFINE_IDA(vdpa_index_ida);
>>>   +u8 vdpa_get_status(struct vdpa_device *vdev)
>>> +{
>>> +    u8 status;
>>> +
>>> +    mutex_lock(&vdev->cf_mutex);
>>> +    status = vdev->config->get_status(vdev);
>>> +    mutex_unlock(&vdev->cf_mutex);
>>> +    return status;
>>> +}
>>> +EXPORT_SYMBOL(vdpa_get_status);
>>> +
>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
>>> +{
>>> +    mutex_lock(&vdev->cf_mutex);
>>> +    vdev->config->set_status(vdev, status);
>>> +    mutex_unlock(&vdev->cf_mutex);
>>> +}
>>> +EXPORT_SYMBOL(vdpa_set_status);
>>> +
>>>   static struct genl_family vdpa_nl_family;
>>>     static int vdpa_dev_probe(struct device *d)
>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>> index ebaa373e9b82..d9d499465e2e 100644
>>> --- a/drivers/vhost/vdpa.c
>>> +++ b/drivers/vhost/vdpa.c
>>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct 
>>> vhost_vdpa *v, u8 __user *argp)
>>>   static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user 
>>> *statusp)
>>>   {
>>>       struct vdpa_device *vdpa = v->vdpa;
>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>       u8 status;
>>>   -    status = ops->get_status(vdpa);
>>> +    status = vdpa_get_status(vdpa);
>> Not sure why we need to take cf_mutex here. Appears to me only 
>> setters (set_status and reset) need to take the lock in this function.
>
>
> You may be right but it doesn't harm and it is guaranteed to be 
> correct if we protect it with mutex here.
Is it more for future proof? Ok, but IMHO it might be better to get some 
comment here, otherwise it's quite confusing why the lock needs to be 
held. vhost_vdpa had done its own serialization in 
vhost_vdpa_unlocked_ioctl() through vhost_dev`mutex.

-Siwei

>
> Thanks
>
>
>>
>>>         if (copy_to_user(statusp, &status, sizeof(status)))
>>>           return -EFAULT;
>>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct 
>>> vhost_vdpa *v, u8 __user *statusp)
>>>       if (copy_from_user(&status, statusp, sizeof(status)))
>>>           return -EFAULT;
>>>   -    status_old = ops->get_status(vdpa);
>>> +    status_old = vdpa_get_status(vdpa);
>> Ditto.
>>
>>>         /*
>>>        * Userspace shouldn't remove status bits unless reset the
>>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct 
>>> vhost_vdpa *v, u8 __user *statusp)
>>>           if (ret)
>>>               return ret;
>>>       } else
>>> -        ops->set_status(vdpa, status);
>>> +        vdpa_set_status(vdpa, status);
>> The reset() call in the if branch above needs to take the cf_mutex, 
>> the same way as that for set_status(). The reset() is effectively 
>> set_status(vdpa, 0).
>>
>> Thanks,
>> -Siwei
>>
>>>         if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & 
>>> VIRTIO_CONFIG_S_DRIVER_OK))
>>>           for (i = 0; i < nvqs; i++)
>>> diff --git a/drivers/virtio/virtio_vdpa.c 
>>> b/drivers/virtio/virtio_vdpa.c
>>> index a84b04ba3195..76504559bc25 100644
>>> --- a/drivers/virtio/virtio_vdpa.c
>>> +++ b/drivers/virtio/virtio_vdpa.c
>>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct 
>>> virtio_device *vdev)
>>>   static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 
>>> status)
>>>   {
>>>       struct vdpa_device *vdpa = vd_get_vdpa(vdev);
>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>   -    return ops->set_status(vdpa, status);
>>> +    return vdpa_set_status(vdpa, status);
>>>   }
>>>     static void virtio_vdpa_reset(struct virtio_device *vdev)
>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>> index 9cc4291a79b3..ae047fae2603 100644
>>> --- a/include/linux/vdpa.h
>>> +++ b/include/linux/vdpa.h
>>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, 
>>> unsigned int offset,
>>>                void *buf, unsigned int len);
>>>   void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>>>                const void *buf, unsigned int length);
>>> +u8 vdpa_get_status(struct vdpa_device *vdev);
>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
>>> +
>>>   /**
>>>    * struct vdpa_mgmtdev_ops - vdpa device ops
>>>    * @dev_add: Add a vdpa device using alloc and register
>>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-07  5:43       ` Jason Wang
@ 2022-01-08  1:38         ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-08  1:38 UTC (permalink / raw)
  To: Jason Wang, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma



On 1/6/2022 9:43 PM, Jason Wang wrote:
>
> 在 2022/1/7 上午9:50, Si-Wei Liu 写道:
>>
>>
>> On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
>>>
>>>
>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>> Check whether the max number of data virtqueue pairs was provided 
>>>> when a
>>>> adding a new device and verify the new value does not exceed device
>>>> capabilities.
>>>>
>>>> In addition, change the arrays holding virtqueue and callback contexts
>>>> to be dynamically allocated.
>>>>
>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>> ---
>>>> v6 -> v7:
>>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>>>
>>>>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 
>>>> ++++++++++++++++++++++---------
>>>>   1 file changed, 37 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> index 4a2149f70f1e..d4720444bf78 100644
>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>>>       struct mlx5_vq_restore_info ri;
>>>>   };
>>>>   -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>>> - * provides for driver space allocation
>>>> - */
>>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>>> -
>>>>   static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>>>   {
>>>>       if (unlikely(idx > mvdev->max_idx))
>>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>>>       struct mlx5_vdpa_dev mvdev;
>>>>       struct mlx5_vdpa_net_resources res;
>>>>       struct virtio_net_config config;
>>>> -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>>>> -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>>>> +    struct mlx5_vdpa_virtqueue *vqs;
>>>> +    struct vdpa_callback *event_cbs;
>>>>         /* Serialize vq resources creation and destruction. This is 
>>>> required
>>>>        * since memory map might change and we need to destroy and 
>>>> create
>>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net 
>>>> *ndev)
>>>>   {
>>>>       int i;
>>>>   -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>>>> +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>>>           suspend_vq(ndev, &ndev->vqs[i]);
>>>>   }
>>>>   @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net 
>>>> *ndev)
>>>>       void *in;
>>>>       int i, j;
>>>>       int err;
>>>> +    int num;
>>>>   -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>>>> +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>>>> +        num = 1;
>>>> +    else
>>>> +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>>>> +
>>>> +    max_rqt = min_t(int, roundup_pow_of_two(num),
>>>>               1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>>>       if (max_rqt < 1)
>>>>           return -EOPNOTSUPP;
>>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net 
>>>> *ndev)
>>>>       MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>>>       list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>>>       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>>>> -        list[i] = cpu_to_be32(ndev->vqs[j % 
>>>> ndev->mvdev.max_vqs].virtq_id);
>>>> +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>>> Good catch. LGTM.
>>>
>>> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>
>> Apologies to reply to myself. It looks to me we need to set 
>> cur_num_vqs to the negotiated num of queues. Otherwise any site 
>> referencing cur_num_vqs won't work properly. Further, we need to 
>> validate VIRTIO_NET_F_MQ is present in handle_ctrl_mq() before 
>> changing the number of queue pairs.
>
>
> Such validation is not mandated in the spec. And if we want to do 
> that, it needs to be done in a separate patch.
Agreed. The userspace (qemu) has similar validation for software 
virtio-net although the spec doesn't mandate.

-Siwei

>
> Thanks
>
>
>>
>> So just disregard my previous R-b for this patch.
>>
>> Thanks,
>> -Siwei
>>
>>
>>>
>>>>         MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>>>       err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, 
>>>> &ndev->res.rqtn);
>>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device 
>>>> *vdev)
>>>>       clear_vqs_ready(ndev);
>>>>       mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>>>       ndev->mvdev.status = 0;
>>>> -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>>>> +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * 
>>>> (mvdev->max_vqs + 1));
>>>>       ndev->mvdev.actual_features = 0;
>>>>       ++mvdev->generation;
>>>>       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device 
>>>> *vdev)
>>>>       }
>>>>       mlx5_vdpa_free_resources(&ndev->mvdev);
>>>>       mutex_destroy(&ndev->reslock);
>>>> +    kfree(ndev->event_cbs);
>>>> +    kfree(ndev->vqs);
>>>>   }
>>>>     static struct vdpa_notification_area 
>>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct 
>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>           return -EOPNOTSUPP;
>>>>       }
>>>>   -    /* we save one virtqueue for control virtqueue should we 
>>>> require it */
>>>>       max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, 
>>>> max_num_virtio_queues);
>>>> -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>>>> +    if (max_vqs < 2) {
>>>> +        dev_warn(mdev->device,
>>>> +             "%d virtqueues are supported. At least 2 are 
>>>> required\n",
>>>> +             max_vqs);
>>>> +        return -EAGAIN;
>>>> +    }
>>>> +
>>>> +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>>>> +        if (add_config->net.max_vq_pairs > max_vqs / 2)
>>>> +            return -EINVAL;
>>>> +        max_vqs = min_t(u32, max_vqs, 2 * 
>>>> add_config->net.max_vq_pairs);
>>>> +    } else {
>>>> +        max_vqs = 2;
>>>> +    }
>>>>         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, 
>>>> mdev->device, &mlx5_vdpa_ops,
>>>>                    name, false);
>>>>       if (IS_ERR(ndev))
>>>>           return PTR_ERR(ndev);
>>>>   +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>>>> +    ndev->event_cbs = kcalloc(max_vqs + 1, 
>>>> sizeof(*ndev->event_cbs), GFP_KERNEL);
>>>> +    if (!ndev->vqs || !ndev->event_cbs) {
>>>> +        err = -ENOMEM;
>>>> +        goto err_alloc;
>>>> +    }
>>>>       ndev->mvdev.max_vqs = max_vqs;
>>>>       mvdev = &ndev->mvdev;
>>>>       mvdev->mdev = mdev;
>>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct 
>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>           mlx5_mpfs_del_mac(pfmdev, config->mac);
>>>>   err_mtu:
>>>>       mutex_destroy(&ndev->reslock);
>>>> +err_alloc:
>>>>       put_device(&mvdev->vdev.dev);
>>>>       return err;
>>>>   }
>>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct 
>>>> auxiliary_device *adev,
>>>>       mgtdev->mgtdev.ops = &mdev_ops;
>>>>       mgtdev->mgtdev.device = mdev->device;
>>>>       mgtdev->mgtdev.id_table = id_table;
>>>> -    mgtdev->mgtdev.config_attr_mask = 
>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>>>> +    mgtdev->mgtdev.config_attr_mask = 
>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>>>> + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>>>       mgtdev->madev = madev;
>>>>         err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
>>>
>>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-07 18:01     ` Nathan Chancellor
@ 2022-01-08  1:43         ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-08  1:43 UTC (permalink / raw)
  To: Nathan Chancellor, Eli Cohen; +Cc: lvivier, mst, llvm, virtualization, eperezma

It's unfortunate. Don't know why this series got pulled into linux-next 
prematurely. The code review is still on going and there were 
outstanding comments that hadn't been addressed yet.

On 1/7/2022 10:01 AM, Nathan Chancellor wrote:
> Apologies if this reply is somewhat mangled. This patch did not appear
> to make it to the mailing list so I had to use another reply that did to
> base it on.
>
>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>> Check whether the max number of data virtqueue pairs was provided when a
>> adding a new device and verify the new value does not exceed device
>> capabilities.
>>
>> In addition, change the arrays holding virtqueue and callback contexts
>> to be dynamically allocated.
>>
>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>> ---
>> v6 -> v7:
>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>
>>    drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>>    1 file changed, 37 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 4a2149f70f1e..d4720444bf78 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>    	struct mlx5_vq_restore_info ri;
>>    };
>> -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>> - * provides for driver space allocation
>> - */
>> -#define MLX5_MAX_SUPPORTED_VQS 16
>> -
>>    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>    {
>>    	if (unlikely(idx > mvdev->max_idx))
>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>    	struct mlx5_vdpa_dev mvdev;
>>    	struct mlx5_vdpa_net_resources res;
>>    	struct virtio_net_config config;
>> -	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>> -	struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>> +	struct mlx5_vdpa_virtqueue *vqs;
>> +	struct vdpa_callback *event_cbs;
>>    	/* Serialize vq resources creation and destruction. This is required
>>    	 * since memory map might change and we need to destroy and create
>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev)
>>    {
>>    	int i;
>> -	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>> +	for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>    		suspend_vq(ndev, &ndev->vqs[i]);
>>    }
>> @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>    	void *in;
>>    	int i, j;
>>    	int err;
>> +	int num;
>> -	max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>> +	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>> +		num = 1;
>> +	else
>> +		num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>> +
>> +	max_rqt = min_t(int, roundup_pow_of_two(num),
>>    			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>    	if (max_rqt < 1)
>>    		return -EOPNOTSUPP;
>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>    	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>    	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>    	for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>> -		list[i] = cpu_to_be32(ndev->vqs[j % ndev->mvdev.max_vqs].virtq_id);
>> +		list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>>    	MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>    	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>>    	clear_vqs_ready(ndev);
>>    	mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>    	ndev->mvdev.status = 0;
>> -	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>> +	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>>    	ndev->mvdev.actual_features = 0;
>>    	++mvdev->generation;
>>    	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
>>    	}
>>    	mlx5_vdpa_free_resources(&ndev->mvdev);
>>    	mutex_destroy(&ndev->reslock);
>> +	kfree(ndev->event_cbs);
>> +	kfree(ndev->vqs);
>>    }
>>    static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>>    		return -EOPNOTSUPP;
>>    	}
>> -	/* we save one virtqueue for control virtqueue should we require it */
>>    	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
>> -	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>> +	if (max_vqs < 2) {
>> +		dev_warn(mdev->device,
>> +			 "%d virtqueues are supported. At least 2 are required\n",
>> +			 max_vqs);
>> +		return -EAGAIN;
>> +	}
>> +
>> +	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>> +		if (add_config->net.max_vq_pairs > max_vqs / 2)
>> +			return -EINVAL;
>> +		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
>> +	} else {
>> +		max_vqs = 2;
>> +	}
>>    	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
>>    				 name, false);
>>    	if (IS_ERR(ndev))
>>    		return PTR_ERR(ndev);
>> +	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>> +	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
>> +	if (!ndev->vqs || !ndev->event_cbs) {
>> +		err = -ENOMEM;
>> +		goto err_alloc;
>> +	}
> Clang warns:
>
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
>          if (!ndev->vqs || !ndev->event_cbs) {
>              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2660:14: note: uninitialized use occurs here
>          put_device(&mvdev->vdev.dev);
>                      ^~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:2: note: remove the 'if' if its condition is always false
>          if (!ndev->vqs || !ndev->event_cbs) {
>          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever '||' condition is true [-Werror,-Wsometimes-uninitialized]
>          if (!ndev->vqs || !ndev->event_cbs) {
>              ^~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2660:14: note: uninitialized use occurs here
>          put_device(&mvdev->vdev.dev);
>                      ^~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: note: remove the '||' if its condition is always false
>          if (!ndev->vqs || !ndev->event_cbs) {
>              ^~~~~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2534:29: note: initialize the variable 'mvdev' to silence this warning
>          struct mlx5_vdpa_dev *mvdev;
>                                     ^
>                                      = NULL
> 2 errors generated.
>
> I was going to send a patch just moving "err_alloc" right above "return err;"
> but I don't think that is a proper fix. I think this patch is going to
> result in memory leaks on the err_mpfs and err_mtu paths, as ndev->vqs
> and ndev->event_cbs will have been allocated but they are only cleaned
> up in mlx5_vdpa_free_resources(). Additionally, I don't think the
> results of these allocations should be checked together, because one
> could succeed and the other could fail, meaning one needs to be cleaned
> up while the other doesn't.
>
> Cheers,
> Nathan
>
>>    	ndev->mvdev.max_vqs = max_vqs;
>>    	mvdev = &ndev->mvdev;
>>    	mvdev->mdev = mdev;
>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>>    		mlx5_mpfs_del_mac(pfmdev, config->mac);
>>    err_mtu:
>>    	mutex_destroy(&ndev->reslock);
>> +err_alloc:
>>    	put_device(&mvdev->vdev.dev);
>>    	return err;
>>    }
>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>>    	mgtdev->mgtdev.ops = &mdev_ops;
>>    	mgtdev->mgtdev.device = mdev->device;
>>    	mgtdev->mgtdev.id_table = id_table;
>> -	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>> +	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>> +					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>    	mgtdev->madev = madev;
>>    	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
@ 2022-01-08  1:43         ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-08  1:43 UTC (permalink / raw)
  To: Nathan Chancellor, Eli Cohen
  Cc: mst, jasowang, virtualization, lvivier, eperezma, llvm

It's unfortunate. Don't know why this series got pulled into linux-next 
prematurely. The code review is still on going and there were 
outstanding comments that hadn't been addressed yet.

On 1/7/2022 10:01 AM, Nathan Chancellor wrote:
> Apologies if this reply is somewhat mangled. This patch did not appear
> to make it to the mailing list so I had to use another reply that did to
> base it on.
>
>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>> Check whether the max number of data virtqueue pairs was provided when a
>> adding a new device and verify the new value does not exceed device
>> capabilities.
>>
>> In addition, change the arrays holding virtqueue and callback contexts
>> to be dynamically allocated.
>>
>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>> ---
>> v6 -> v7:
>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>
>>    drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>>    1 file changed, 37 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 4a2149f70f1e..d4720444bf78 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>    	struct mlx5_vq_restore_info ri;
>>    };
>> -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>> - * provides for driver space allocation
>> - */
>> -#define MLX5_MAX_SUPPORTED_VQS 16
>> -
>>    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>    {
>>    	if (unlikely(idx > mvdev->max_idx))
>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>    	struct mlx5_vdpa_dev mvdev;
>>    	struct mlx5_vdpa_net_resources res;
>>    	struct virtio_net_config config;
>> -	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>> -	struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>> +	struct mlx5_vdpa_virtqueue *vqs;
>> +	struct vdpa_callback *event_cbs;
>>    	/* Serialize vq resources creation and destruction. This is required
>>    	 * since memory map might change and we need to destroy and create
>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev)
>>    {
>>    	int i;
>> -	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>> +	for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>    		suspend_vq(ndev, &ndev->vqs[i]);
>>    }
>> @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>    	void *in;
>>    	int i, j;
>>    	int err;
>> +	int num;
>> -	max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>> +	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>> +		num = 1;
>> +	else
>> +		num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>> +
>> +	max_rqt = min_t(int, roundup_pow_of_two(num),
>>    			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>    	if (max_rqt < 1)
>>    		return -EOPNOTSUPP;
>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>    	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>    	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>    	for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>> -		list[i] = cpu_to_be32(ndev->vqs[j % ndev->mvdev.max_vqs].virtq_id);
>> +		list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>>    	MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>    	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>>    	clear_vqs_ready(ndev);
>>    	mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>    	ndev->mvdev.status = 0;
>> -	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>> +	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>>    	ndev->mvdev.actual_features = 0;
>>    	++mvdev->generation;
>>    	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
>>    	}
>>    	mlx5_vdpa_free_resources(&ndev->mvdev);
>>    	mutex_destroy(&ndev->reslock);
>> +	kfree(ndev->event_cbs);
>> +	kfree(ndev->vqs);
>>    }
>>    static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>>    		return -EOPNOTSUPP;
>>    	}
>> -	/* we save one virtqueue for control virtqueue should we require it */
>>    	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
>> -	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>> +	if (max_vqs < 2) {
>> +		dev_warn(mdev->device,
>> +			 "%d virtqueues are supported. At least 2 are required\n",
>> +			 max_vqs);
>> +		return -EAGAIN;
>> +	}
>> +
>> +	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>> +		if (add_config->net.max_vq_pairs > max_vqs / 2)
>> +			return -EINVAL;
>> +		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
>> +	} else {
>> +		max_vqs = 2;
>> +	}
>>    	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
>>    				 name, false);
>>    	if (IS_ERR(ndev))
>>    		return PTR_ERR(ndev);
>> +	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>> +	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
>> +	if (!ndev->vqs || !ndev->event_cbs) {
>> +		err = -ENOMEM;
>> +		goto err_alloc;
>> +	}
> Clang warns:
>
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
>          if (!ndev->vqs || !ndev->event_cbs) {
>              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2660:14: note: uninitialized use occurs here
>          put_device(&mvdev->vdev.dev);
>                      ^~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:2: note: remove the 'if' if its condition is always false
>          if (!ndev->vqs || !ndev->event_cbs) {
>          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: error: variable 'mvdev' is used uninitialized whenever '||' condition is true [-Werror,-Wsometimes-uninitialized]
>          if (!ndev->vqs || !ndev->event_cbs) {
>              ^~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2660:14: note: uninitialized use occurs here
>          put_device(&mvdev->vdev.dev);
>                      ^~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2574:6: note: remove the '||' if its condition is always false
>          if (!ndev->vqs || !ndev->event_cbs) {
>              ^~~~~~~~~~~~~
> drivers/vdpa/mlx5/net/mlx5_vnet.c:2534:29: note: initialize the variable 'mvdev' to silence this warning
>          struct mlx5_vdpa_dev *mvdev;
>                                     ^
>                                      = NULL
> 2 errors generated.
>
> I was going to send a patch just moving "err_alloc" right above "return err;"
> but I don't think that is a proper fix. I think this patch is going to
> result in memory leaks on the err_mpfs and err_mtu paths, as ndev->vqs
> and ndev->event_cbs will have been allocated but they are only cleaned
> up in mlx5_vdpa_free_resources(). Additionally, I don't think the
> results of these allocations should be checked together, because one
> could succeed and the other could fail, meaning one needs to be cleaned
> up while the other doesn't.
>
> Cheers,
> Nathan
>
>>    	ndev->mvdev.max_vqs = max_vqs;
>>    	mvdev = &ndev->mvdev;
>>    	mvdev->mdev = mdev;
>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>>    		mlx5_mpfs_del_mac(pfmdev, config->mac);
>>    err_mtu:
>>    	mutex_destroy(&ndev->reslock);
>> +err_alloc:
>>    	put_device(&mvdev->vdev.dev);
>>    	return err;
>>    }
>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>>    	mgtdev->mgtdev.ops = &mdev_ops;
>>    	mgtdev->mgtdev.device = mdev->device;
>>    	mgtdev->mgtdev.id_table = id_table;
>> -	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>> +	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>> +					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>    	mgtdev->madev = madev;
>>    	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-08  1:23       ` Si-Wei Liu
@ 2022-01-10  6:05         ` Jason Wang
  2022-01-11  1:30           ` Si-Wei Liu
  0 siblings, 1 reply; 40+ messages in thread
From: Jason Wang @ 2022-01-10  6:05 UTC (permalink / raw)
  To: Si-Wei Liu, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma


在 2022/1/8 上午9:23, Si-Wei Liu 写道:
>
>
> On 1/6/2022 9:08 PM, Jason Wang wrote:
>>
>> 在 2022/1/7 上午8:33, Si-Wei Liu 写道:
>>>
>>>
>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>> Add wrappers to get/set status and protect these operations with
>>>> cf_mutex to serialize these operations with respect to get/set config
>>>> operations.
>>>>
>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>> ---
>>>>   drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>>>>   drivers/vhost/vdpa.c         |  7 +++----
>>>>   drivers/virtio/virtio_vdpa.c |  3 +--
>>>>   include/linux/vdpa.h         |  3 +++
>>>>   4 files changed, 26 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>>> index 42d71d60d5dc..5134c83c4a22 100644
>>>> --- a/drivers/vdpa/vdpa.c
>>>> +++ b/drivers/vdpa/vdpa.c
>>>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>>>>   static DEFINE_MUTEX(vdpa_dev_mutex);
>>>>   static DEFINE_IDA(vdpa_index_ida);
>>>>   +u8 vdpa_get_status(struct vdpa_device *vdev)
>>>> +{
>>>> +    u8 status;
>>>> +
>>>> +    mutex_lock(&vdev->cf_mutex);
>>>> +    status = vdev->config->get_status(vdev);
>>>> +    mutex_unlock(&vdev->cf_mutex);
>>>> +    return status;
>>>> +}
>>>> +EXPORT_SYMBOL(vdpa_get_status);
>>>> +
>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
>>>> +{
>>>> +    mutex_lock(&vdev->cf_mutex);
>>>> +    vdev->config->set_status(vdev, status);
>>>> +    mutex_unlock(&vdev->cf_mutex);
>>>> +}
>>>> +EXPORT_SYMBOL(vdpa_set_status);
>>>> +
>>>>   static struct genl_family vdpa_nl_family;
>>>>     static int vdpa_dev_probe(struct device *d)
>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>> index ebaa373e9b82..d9d499465e2e 100644
>>>> --- a/drivers/vhost/vdpa.c
>>>> +++ b/drivers/vhost/vdpa.c
>>>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct 
>>>> vhost_vdpa *v, u8 __user *argp)
>>>>   static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user 
>>>> *statusp)
>>>>   {
>>>>       struct vdpa_device *vdpa = v->vdpa;
>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>>       u8 status;
>>>>   -    status = ops->get_status(vdpa);
>>>> +    status = vdpa_get_status(vdpa);
>>> Not sure why we need to take cf_mutex here. Appears to me only 
>>> setters (set_status and reset) need to take the lock in this function.
>>
>>
>> You may be right but it doesn't harm and it is guaranteed to be 
>> correct if we protect it with mutex here.
> Is it more for future proof?


I think so.


> Ok, but IMHO it might be better to get some comment here, otherwise 
> it's quite confusing why the lock needs to be held. vhost_vdpa had 
> done its own serialization in vhost_vdpa_unlocked_ioctl() through 
> vhost_dev`mutex.


Right, but they are done for different levels, one is for vhost_dev 
antoher is for vdpa_dev.

Thanks


>
> -Siwei
>
>>
>> Thanks
>>
>>
>>>
>>>>         if (copy_to_user(statusp, &status, sizeof(status)))
>>>>           return -EFAULT;
>>>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct 
>>>> vhost_vdpa *v, u8 __user *statusp)
>>>>       if (copy_from_user(&status, statusp, sizeof(status)))
>>>>           return -EFAULT;
>>>>   -    status_old = ops->get_status(vdpa);
>>>> +    status_old = vdpa_get_status(vdpa);
>>> Ditto.
>>>
>>>>         /*
>>>>        * Userspace shouldn't remove status bits unless reset the
>>>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct 
>>>> vhost_vdpa *v, u8 __user *statusp)
>>>>           if (ret)
>>>>               return ret;
>>>>       } else
>>>> -        ops->set_status(vdpa, status);
>>>> +        vdpa_set_status(vdpa, status);
>>> The reset() call in the if branch above needs to take the cf_mutex, 
>>> the same way as that for set_status(). The reset() is effectively 
>>> set_status(vdpa, 0).
>>>
>>> Thanks,
>>> -Siwei
>>>
>>>>         if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & 
>>>> VIRTIO_CONFIG_S_DRIVER_OK))
>>>>           for (i = 0; i < nvqs; i++)
>>>> diff --git a/drivers/virtio/virtio_vdpa.c 
>>>> b/drivers/virtio/virtio_vdpa.c
>>>> index a84b04ba3195..76504559bc25 100644
>>>> --- a/drivers/virtio/virtio_vdpa.c
>>>> +++ b/drivers/virtio/virtio_vdpa.c
>>>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct 
>>>> virtio_device *vdev)
>>>>   static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 
>>>> status)
>>>>   {
>>>>       struct vdpa_device *vdpa = vd_get_vdpa(vdev);
>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>>   -    return ops->set_status(vdpa, status);
>>>> +    return vdpa_set_status(vdpa, status);
>>>>   }
>>>>     static void virtio_vdpa_reset(struct virtio_device *vdev)
>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>>> index 9cc4291a79b3..ae047fae2603 100644
>>>> --- a/include/linux/vdpa.h
>>>> +++ b/include/linux/vdpa.h
>>>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, 
>>>> unsigned int offset,
>>>>                void *buf, unsigned int len);
>>>>   void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>>>>                const void *buf, unsigned int length);
>>>> +u8 vdpa_get_status(struct vdpa_device *vdev);
>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
>>>> +
>>>>   /**
>>>>    * struct vdpa_mgmtdev_ops - vdpa device ops
>>>>    * @dev_add: Add a vdpa device using alloc and register
>>>
>>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-08  1:43         ` Si-Wei Liu
@ 2022-01-10  6:53           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 40+ messages in thread
From: Michael S. Tsirkin @ 2022-01-10  6:53 UTC (permalink / raw)
  To: Si-Wei Liu
  Cc: Nathan Chancellor, Eli Cohen, jasowang, virtualization, lvivier,
	eperezma, llvm

On Fri, Jan 07, 2022 at 05:43:15PM -0800, Si-Wei Liu wrote:
> It's unfortunate. Don't know why this series got pulled into linux-next
> prematurely. The code review is still on going and there were outstanding
> comments that hadn't been addressed yet.

Most patches got acked, and the merge window is closing.
The only couple of issues seem to be with this specific patch, and I
think I fixed them up.
Still - I can hold them if necessary. What do others think?

-- 
MST


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
@ 2022-01-10  6:53           ` Michael S. Tsirkin
  0 siblings, 0 replies; 40+ messages in thread
From: Michael S. Tsirkin @ 2022-01-10  6:53 UTC (permalink / raw)
  To: Si-Wei Liu
  Cc: lvivier, llvm, virtualization, Nathan Chancellor, eperezma, Eli Cohen

On Fri, Jan 07, 2022 at 05:43:15PM -0800, Si-Wei Liu wrote:
> It's unfortunate. Don't know why this series got pulled into linux-next
> prematurely. The code review is still on going and there were outstanding
> comments that hadn't been addressed yet.

Most patches got acked, and the merge window is closing.
The only couple of issues seem to be with this specific patch, and I
think I fixed them up.
Still - I can hold them if necessary. What do others think?

-- 
MST

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-10  6:53           ` Michael S. Tsirkin
  (?)
@ 2022-01-10  6:58           ` Eli Cohen
  -1 siblings, 0 replies; 40+ messages in thread
From: Eli Cohen @ 2022-01-10  6:58 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Si-Wei Liu, Nathan Chancellor, jasowang, virtualization, lvivier,
	eperezma, llvm

On Mon, Jan 10, 2022 at 01:53:04AM -0500, Michael S. Tsirkin wrote:
> On Fri, Jan 07, 2022 at 05:43:15PM -0800, Si-Wei Liu wrote:
> > It's unfortunate. Don't know why this series got pulled into linux-next
> > prematurely. The code review is still on going and there were outstanding
> > comments that hadn't been addressed yet.
> 
> Most patches got acked, and the merge window is closing.
> The only couple of issues seem to be with this specific patch, and I
> think I fixed them up.
> Still - I can hold them if necessary. What do others think?

Well, you could argue that I am not objective, but I think we should
keep them since they address all interface related concerns raised and
they work.

> 
> -- 
> MST
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs
       [not found] <20220105114646.577224-1-elic@nvidia.com>
                   ` (9 preceding siblings ...)
       [not found] ` <20220105114646.577224-15-elic@nvidia.com>
@ 2022-01-10  7:04 ` Michael S. Tsirkin
  2022-01-10  7:09   ` Jason Wang
                     ` (2 more replies)
  10 siblings, 3 replies; 40+ messages in thread
From: Michael S. Tsirkin @ 2022-01-10  7:04 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, virtualization, eperezma, si-wei.liu

On Wed, Jan 05, 2022 at 01:46:32PM +0200, Eli Cohen wrote:
> Allow the user to configure the max number of virtqueue pairs for a vdpa
> instance. The user can then control the actual number of virtqueue pairs
> using ethtool.

So I put a version of this in linux-next, but I had to squash in
some bugfixes, and resolve some conflicts. Eli, please take a look
and let me know whether it looks sane. If not pls post a new
version.
Jason, what is your take on merging this now? Si-wei here seems to want
to defer, but OTOH it's up to v7 already, most patches are acked and
most comments look like minor improvement suggestions to me.

> Example, set number of VQPs to 2:
> $ ethtool -L ens1 combined 2
> 
> A user can check the max supported virtqueues for a management device by
> running:
> 
> $ $ vdpa mgmtdev show
>     auxiliary/mlx5_core.sf.1:
>       supported_classes net
>       max_supported_vqs 257
>       dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
>                    CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
> 
> and refer to this value when adding a device.
> 
> To create a device with a max of 5 VQPs:
> vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 5
> 
> Please note that for patches that were changed I removed "Reviewed-by"
> and "Acked-by".
> 
> v6 -> v7:
> 1. Make use of cf_mutex for serializing netlink set/get with other
> calls.
> 2. Some fixes (See in each patch)
> 3. Add patch for vdpa_sim to report supported features
> 4. "Reviewed-by" and "Acked-by" removed from patch 0007 since it had
> slightly changed.
> 
> Eli Cohen (14):
>   vdpa: Provide interface to read driver features
>   vdpa/mlx5: Distribute RX virtqueues in RQT object
>   vdpa: Sync calls set/get config/status with cf_mutex
>   vdpa: Read device configuration only if FEATURES_OK
>   vdpa: Allow to configure max data virtqueues
>   vdpa/mlx5: Fix config_attr_mask assignment
>   vdpa/mlx5: Support configuring max data virtqueue
>   vdpa: Add support for returning device configuration information
>   vdpa/mlx5: Restore cur_num_vqs in case of failure in change_num_qps()
>   vdpa: Support reporting max device capabilities
>   vdpa/mlx5: Report max device capabilities
>   vdpa/vdpa_sim: Configure max supported virtqueues
>   vdpa: Use BIT_ULL for bit operations
>   vdpa/vdpa_sim_net: Report max device capabilities
> 
>  drivers/vdpa/alibaba/eni_vdpa.c      |  16 +++-
>  drivers/vdpa/ifcvf/ifcvf_main.c      |  16 +++-
>  drivers/vdpa/mlx5/net/mlx5_vnet.c    | 134 ++++++++++++++++-----------
>  drivers/vdpa/vdpa.c                  | 100 ++++++++++++++++----
>  drivers/vdpa/vdpa_sim/vdpa_sim.c     |  21 +++--
>  drivers/vdpa/vdpa_sim/vdpa_sim_net.c |   2 +
>  drivers/vdpa/vdpa_user/vduse_dev.c   |  16 +++-
>  drivers/vdpa/virtio_pci/vp_vdpa.c    |  16 +++-
>  drivers/vhost/vdpa.c                 |  11 +--
>  drivers/virtio/virtio_vdpa.c         |   7 +-
>  include/linux/vdpa.h                 |  36 +++++--
>  include/uapi/linux/vdpa.h            |   6 ++
>  12 files changed, 271 insertions(+), 110 deletions(-)
> 
> -- 
> 2.34.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs
  2022-01-10  7:04 ` [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs Michael S. Tsirkin
@ 2022-01-10  7:09   ` Jason Wang
  2022-01-11  1:59   ` Si-Wei Liu
       [not found]   ` <20220110074958.GA105688@mtl-vdi-166.wap.labs.mlnx>
  2 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-10  7:09 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Laurent Vivier, virtualization, eperezma, Si-Wei Liu, Eli Cohen

On Mon, Jan 10, 2022 at 3:04 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jan 05, 2022 at 01:46:32PM +0200, Eli Cohen wrote:
> > Allow the user to configure the max number of virtqueue pairs for a vdpa
> > instance. The user can then control the actual number of virtqueue pairs
> > using ethtool.
>
> So I put a version of this in linux-next, but I had to squash in
> some bugfixes, and resolve some conflicts. Eli, please take a look
> and let me know whether it looks sane. If not pls post a new
> version.
> Jason, what is your take on merging this now? Si-wei here seems to want
> to defer, but OTOH it's up to v7 already, most patches are acked and
> most comments look like minor improvement suggestions to me.

I think we can merge them and send patches on top to fix issues if needed.

Thanks

>
> > Example, set number of VQPs to 2:
> > $ ethtool -L ens1 combined 2
> >
> > A user can check the max supported virtqueues for a management device by
> > running:
> >
> > $ $ vdpa mgmtdev show
> >     auxiliary/mlx5_core.sf.1:
> >       supported_classes net
> >       max_supported_vqs 257
> >       dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
> >                    CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
> >
> > and refer to this value when adding a device.
> >
> > To create a device with a max of 5 VQPs:
> > vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 5
> >
> > Please note that for patches that were changed I removed "Reviewed-by"
> > and "Acked-by".
> >
> > v6 -> v7:
> > 1. Make use of cf_mutex for serializing netlink set/get with other
> > calls.
> > 2. Some fixes (See in each patch)
> > 3. Add patch for vdpa_sim to report supported features
> > 4. "Reviewed-by" and "Acked-by" removed from patch 0007 since it had
> > slightly changed.
> >
> > Eli Cohen (14):
> >   vdpa: Provide interface to read driver features
> >   vdpa/mlx5: Distribute RX virtqueues in RQT object
> >   vdpa: Sync calls set/get config/status with cf_mutex
> >   vdpa: Read device configuration only if FEATURES_OK
> >   vdpa: Allow to configure max data virtqueues
> >   vdpa/mlx5: Fix config_attr_mask assignment
> >   vdpa/mlx5: Support configuring max data virtqueue
> >   vdpa: Add support for returning device configuration information
> >   vdpa/mlx5: Restore cur_num_vqs in case of failure in change_num_qps()
> >   vdpa: Support reporting max device capabilities
> >   vdpa/mlx5: Report max device capabilities
> >   vdpa/vdpa_sim: Configure max supported virtqueues
> >   vdpa: Use BIT_ULL for bit operations
> >   vdpa/vdpa_sim_net: Report max device capabilities
> >
> >  drivers/vdpa/alibaba/eni_vdpa.c      |  16 +++-
> >  drivers/vdpa/ifcvf/ifcvf_main.c      |  16 +++-
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c    | 134 ++++++++++++++++-----------
> >  drivers/vdpa/vdpa.c                  | 100 ++++++++++++++++----
> >  drivers/vdpa/vdpa_sim/vdpa_sim.c     |  21 +++--
> >  drivers/vdpa/vdpa_sim/vdpa_sim_net.c |   2 +
> >  drivers/vdpa/vdpa_user/vduse_dev.c   |  16 +++-
> >  drivers/vdpa/virtio_pci/vp_vdpa.c    |  16 +++-
> >  drivers/vhost/vdpa.c                 |  11 +--
> >  drivers/virtio/virtio_vdpa.c         |   7 +-
> >  include/linux/vdpa.h                 |  36 +++++--
> >  include/uapi/linux/vdpa.h            |   6 ++
> >  12 files changed, 271 insertions(+), 110 deletions(-)
> >
> > --
> > 2.34.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
       [not found]       ` <20220109141023.GB70879@mtl-vdi-166.wap.labs.mlnx>
@ 2022-01-11  1:00         ` Si-Wei Liu
       [not found]           ` <20220111073416.GB149570@mtl-vdi-166.wap.labs.mlnx>
  0 siblings, 1 reply; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  1:00 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 1/9/2022 6:10 AM, Eli Cohen wrote:
> On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
>>
>> On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
>>>
>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>> Check whether the max number of data virtqueue pairs was provided when a
>>>> adding a new device and verify the new value does not exceed device
>>>> capabilities.
>>>>
>>>> In addition, change the arrays holding virtqueue and callback contexts
>>>> to be dynamically allocated.
>>>>
>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>> ---
>>>> v6 -> v7:
>>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>>>
>>>>    drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>>>>    1 file changed, 37 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> index 4a2149f70f1e..d4720444bf78 100644
>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>>>        struct mlx5_vq_restore_info ri;
>>>>    };
>>>>    -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>>> - * provides for driver space allocation
>>>> - */
>>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>>> -
>>>>    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>>>    {
>>>>        if (unlikely(idx > mvdev->max_idx))
>>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>>>        struct mlx5_vdpa_dev mvdev;
>>>>        struct mlx5_vdpa_net_resources res;
>>>>        struct virtio_net_config config;
>>>> -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>>>> -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>>>> +    struct mlx5_vdpa_virtqueue *vqs;
>>>> +    struct vdpa_callback *event_cbs;
>>>>          /* Serialize vq resources creation and destruction. This is
>>>> required
>>>>         * since memory map might change and we need to destroy and create
>>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
>>>> *ndev)
>>>>    {
>>>>        int i;
>>>>    -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>>>> +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>>>            suspend_vq(ndev, &ndev->vqs[i]);
>>>>    }
>>>>    @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
>>>> *ndev)
>>>>        void *in;
>>>>        int i, j;
>>>>        int err;
>>>> +    int num;
>>>>    -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>>>> +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>>>> +        num = 1;
>>>> +    else
>>>> +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>>>> +
>>>> +    max_rqt = min_t(int, roundup_pow_of_two(num),
>>>>                1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>>>        if (max_rqt < 1)
>>>>            return -EOPNOTSUPP;
>>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>>>        MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>>>        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>>>        for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>>>> -        list[i] = cpu_to_be32(ndev->vqs[j %
>>>> ndev->mvdev.max_vqs].virtq_id);
>>>> +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>>> Good catch. LGTM.
>>>
>>> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>
>> Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
>> the negotiated num of queues. Otherwise any site referencing cur_num_vqs
>> won't work properly.
> You can't really use cur_num_vqs. Consider this scenario:
> create vdpa device with max VQs = 16 (RQT size created with 8 entries)
> boot VM
> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> reboot VM.
Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
> create RQT with 1 entry only.
Here cur_num_vqs will be loaded with the newly negotiated value 
(max_rqt) again.

> ethtool modify num QPs to 4. modify RQT will fail since it was created
> with max QPs equals 1.
It won't fail as the cur_num_vqs will be consistent with the number of 
queues newly negotiated (i.e the max_rqt in create_rqt) for modify.

>
> I think it is ok to create the RQT always with the value configured when
> the device was created.
The problem of not setting cur_num_vqs in create_rqt (and resetting it 
in mlx5_vdpa_reset) is that, once the VM is rebooted or the device is 
reset, the value doesn't go with the actual number of rqt entries hence 
would break various assumptions in change_num_qps() or modify_rqt(). For 
instances,

create vdpa device with max data VQs = 16
boot VM
features_ok set with MQ negotiated
RQT created with 8 entries in create_rqt
ethtool modify num QPs to 2. cur_num_vqs now equals 2.
reboot VM
features_ok set with MQ negotiated
RQT created with 8 entries in create_rqt
ethtool modify num QPs to 6.
cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken

and this case too, which calls out the need to validate the presence of 
VIRTIO_NET_F_MQ in handle_ctrl_mq()

create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
boot VM
features_ok set with MQ negotiated
RQT created with 8 entries in create_rqt
ethtool modify num QPs to 2. cur_num_vqs now equals 2.
reboot VM
features_ok set with no MQ negotiated
RQT created with 8 entries in create_rqt
ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue

Thanks,
-Siwei
>
>> Further, we need to validate VIRTIO_NET_F_MQ is present
>> in handle_ctrl_mq() before changing the number of queue pairs.
>>
>> So just disregard my previous R-b for this patch.
>>
>> Thanks,
>> -Siwei
>>
>>
>>>>          MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>>>        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
>>>> &ndev->res.rqtn);
>>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
>>>> *vdev)
>>>>        clear_vqs_ready(ndev);
>>>>        mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>>>        ndev->mvdev.status = 0;
>>>> -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>>>> +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
>>>> (mvdev->max_vqs + 1));
>>>>        ndev->mvdev.actual_features = 0;
>>>>        ++mvdev->generation;
>>>>        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
>>>> *vdev)
>>>>        }
>>>>        mlx5_vdpa_free_resources(&ndev->mvdev);
>>>>        mutex_destroy(&ndev->reslock);
>>>> +    kfree(ndev->event_cbs);
>>>> +    kfree(ndev->vqs);
>>>>    }
>>>>      static struct vdpa_notification_area
>>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>            return -EOPNOTSUPP;
>>>>        }
>>>>    -    /* we save one virtqueue for control virtqueue should we
>>>> require it */
>>>>        max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
>>>> max_num_virtio_queues);
>>>> -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>>>> +    if (max_vqs < 2) {
>>>> +        dev_warn(mdev->device,
>>>> +             "%d virtqueues are supported. At least 2 are required\n",
>>>> +             max_vqs);
>>>> +        return -EAGAIN;
>>>> +    }
>>>> +
>>>> +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>>>> +        if (add_config->net.max_vq_pairs > max_vqs / 2)
>>>> +            return -EINVAL;
>>>> +        max_vqs = min_t(u32, max_vqs, 2 *
>>>> add_config->net.max_vq_pairs);
>>>> +    } else {
>>>> +        max_vqs = 2;
>>>> +    }
>>>>          ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
>>>> mdev->device, &mlx5_vdpa_ops,
>>>>                     name, false);
>>>>        if (IS_ERR(ndev))
>>>>            return PTR_ERR(ndev);
>>>>    +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>>>> +    ndev->event_cbs = kcalloc(max_vqs + 1,
>>>> sizeof(*ndev->event_cbs), GFP_KERNEL);
>>>> +    if (!ndev->vqs || !ndev->event_cbs) {
>>>> +        err = -ENOMEM;
>>>> +        goto err_alloc;
>>>> +    }
>>>>        ndev->mvdev.max_vqs = max_vqs;
>>>>        mvdev = &ndev->mvdev;
>>>>        mvdev->mdev = mdev;
>>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>            mlx5_mpfs_del_mac(pfmdev, config->mac);
>>>>    err_mtu:
>>>>        mutex_destroy(&ndev->reslock);
>>>> +err_alloc:
>>>>        put_device(&mvdev->vdev.dev);
>>>>        return err;
>>>>    }
>>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
>>>> *adev,
>>>>        mgtdev->mgtdev.ops = &mdev_ops;
>>>>        mgtdev->mgtdev.device = mdev->device;
>>>>        mgtdev->mgtdev.id_table = id_table;
>>>> -    mgtdev->mgtdev.config_attr_mask =
>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>>>> +    mgtdev->mgtdev.config_attr_mask =
>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>>>> +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>>>        mgtdev->madev = madev;
>>>>          err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
       [not found]     ` <20220109140956.GA70879@mtl-vdi-166.wap.labs.mlnx>
@ 2022-01-11  1:14       ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  1:14 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 1/9/2022 6:09 AM, Eli Cohen wrote:
> On Thu, Jan 06, 2022 at 04:33:49PM -0800, Si-Wei Liu wrote:
>>
>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>> Add wrappers to get/set status and protect these operations with
>>> cf_mutex to serialize these operations with respect to get/set config
>>> operations.
>>>
>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>> ---
>>>    drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>>>    drivers/vhost/vdpa.c         |  7 +++----
>>>    drivers/virtio/virtio_vdpa.c |  3 +--
>>>    include/linux/vdpa.h         |  3 +++
>>>    4 files changed, 26 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>> index 42d71d60d5dc..5134c83c4a22 100644
>>> --- a/drivers/vdpa/vdpa.c
>>> +++ b/drivers/vdpa/vdpa.c
>>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>>>    static DEFINE_MUTEX(vdpa_dev_mutex);
>>>    static DEFINE_IDA(vdpa_index_ida);
>>> +u8 vdpa_get_status(struct vdpa_device *vdev)
>>> +{
>>> +	u8 status;
>>> +
>>> +	mutex_lock(&vdev->cf_mutex);
>>> +	status = vdev->config->get_status(vdev);
>>> +	mutex_unlock(&vdev->cf_mutex);
>>> +	return status;
>>> +}
>>> +EXPORT_SYMBOL(vdpa_get_status);
>>> +
>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
>>> +{
>>> +	mutex_lock(&vdev->cf_mutex);
>>> +	vdev->config->set_status(vdev, status);
>>> +	mutex_unlock(&vdev->cf_mutex);
>>> +}
>>> +EXPORT_SYMBOL(vdpa_set_status);
>>> +
>>>    static struct genl_family vdpa_nl_family;
>>>    static int vdpa_dev_probe(struct device *d)
>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>> index ebaa373e9b82..d9d499465e2e 100644
>>> --- a/drivers/vhost/vdpa.c
>>> +++ b/drivers/vhost/vdpa.c
>>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
>>>    static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
>>>    {
>>>    	struct vdpa_device *vdpa = v->vdpa;
>>> -	const struct vdpa_config_ops *ops = vdpa->config;
>>>    	u8 status;
>>> -	status = ops->get_status(vdpa);
>>> +	status = vdpa_get_status(vdpa);
>> Not sure why we need to take cf_mutex here. Appears to me only setters
>> (set_status and reset) need to take the lock in this function.
>>
> What if the implenentation of get_status() at the upstream driver
> requires some setup. Taking cf_mutex seems like a good thing to do.
This doesn't like vdpa_get_config() which doesn't need that kind of 
setup at all currently (though whether the legacy detection in 
vdpa_get_config is useful or not remains to be a pending question). Even 
if there's a need in future, we may add it once we get there, right?

It looks to me it's quite unlikely the cf_mutex is needed even in 
future, since the command line tool would never want to intervene 
internal virtio status via set_status() from the the netlink thread, 
while the guest driver is running and in control. It's doubtful this 
just serves no real purpose for foreseeable future, but just add 
unwarranted overhead unnecessarily.

Regards,
-Siwei



>
>>>    	if (copy_to_user(statusp, &status, sizeof(status)))
>>>    		return -EFAULT;
>>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
>>>    	if (copy_from_user(&status, statusp, sizeof(status)))
>>>    		return -EFAULT;
>>> -	status_old = ops->get_status(vdpa);
>>> +	status_old = vdpa_get_status(vdpa);
>> Ditto.
>>
>>>    	/*
>>>    	 * Userspace shouldn't remove status bits unless reset the
>>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
>>>    		if (ret)
>>>    			return ret;
>>>    	} else
>>> -		ops->set_status(vdpa, status);
>>> +		vdpa_set_status(vdpa, status);
>> The reset() call in the if branch above needs to take the cf_mutex, the same
>> way as that for set_status(). The reset() is effectively set_status(vdpa,
>> 0).
>>
> Right, I missed that. Will send a fix.
>
>> Thanks,
>> -Siwei
>>
>>>    	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
>>>    		for (i = 0; i < nvqs; i++)
>>> diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
>>> index a84b04ba3195..76504559bc25 100644
>>> --- a/drivers/virtio/virtio_vdpa.c
>>> +++ b/drivers/virtio/virtio_vdpa.c
>>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct virtio_device *vdev)
>>>    static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status)
>>>    {
>>>    	struct vdpa_device *vdpa = vd_get_vdpa(vdev);
>>> -	const struct vdpa_config_ops *ops = vdpa->config;
>>> -	return ops->set_status(vdpa, status);
>>> +	return vdpa_set_status(vdpa, status);
>>>    }
>>>    static void virtio_vdpa_reset(struct virtio_device *vdev)
>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>> index 9cc4291a79b3..ae047fae2603 100644
>>> --- a/include/linux/vdpa.h
>>> +++ b/include/linux/vdpa.h
>>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
>>>    		     void *buf, unsigned int len);
>>>    void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>>>    		     const void *buf, unsigned int length);
>>> +u8 vdpa_get_status(struct vdpa_device *vdev);
>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
>>> +
>>>    /**
>>>     * struct vdpa_mgmtdev_ops - vdpa device ops
>>>     * @dev_add: Add a vdpa device using alloc and register

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-10  6:05         ` Jason Wang
@ 2022-01-11  1:30           ` Si-Wei Liu
  2022-01-11  4:46             ` Jason Wang
  0 siblings, 1 reply; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  1:30 UTC (permalink / raw)
  To: Jason Wang, Eli Cohen, mst, virtualization; +Cc: lvivier, eperezma



On 1/9/2022 10:05 PM, Jason Wang wrote:
>
> 在 2022/1/8 上午9:23, Si-Wei Liu 写道:
>>
>>
>> On 1/6/2022 9:08 PM, Jason Wang wrote:
>>>
>>> 在 2022/1/7 上午8:33, Si-Wei Liu 写道:
>>>>
>>>>
>>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>>> Add wrappers to get/set status and protect these operations with
>>>>> cf_mutex to serialize these operations with respect to get/set config
>>>>> operations.
>>>>>
>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>> ---
>>>>>   drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>>>>>   drivers/vhost/vdpa.c         |  7 +++----
>>>>>   drivers/virtio/virtio_vdpa.c |  3 +--
>>>>>   include/linux/vdpa.h         |  3 +++
>>>>>   4 files changed, 26 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>>>> index 42d71d60d5dc..5134c83c4a22 100644
>>>>> --- a/drivers/vdpa/vdpa.c
>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>>>>>   static DEFINE_MUTEX(vdpa_dev_mutex);
>>>>>   static DEFINE_IDA(vdpa_index_ida);
>>>>>   +u8 vdpa_get_status(struct vdpa_device *vdev)
>>>>> +{
>>>>> +    u8 status;
>>>>> +
>>>>> +    mutex_lock(&vdev->cf_mutex);
>>>>> +    status = vdev->config->get_status(vdev);
>>>>> +    mutex_unlock(&vdev->cf_mutex);
>>>>> +    return status;
>>>>> +}
>>>>> +EXPORT_SYMBOL(vdpa_get_status);
>>>>> +
>>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
>>>>> +{
>>>>> +    mutex_lock(&vdev->cf_mutex);
>>>>> +    vdev->config->set_status(vdev, status);
>>>>> +    mutex_unlock(&vdev->cf_mutex);
>>>>> +}
>>>>> +EXPORT_SYMBOL(vdpa_set_status);
>>>>> +
>>>>>   static struct genl_family vdpa_nl_family;
>>>>>     static int vdpa_dev_probe(struct device *d)
>>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>>> index ebaa373e9b82..d9d499465e2e 100644
>>>>> --- a/drivers/vhost/vdpa.c
>>>>> +++ b/drivers/vhost/vdpa.c
>>>>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct 
>>>>> vhost_vdpa *v, u8 __user *argp)
>>>>>   static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 
>>>>> __user *statusp)
>>>>>   {
>>>>>       struct vdpa_device *vdpa = v->vdpa;
>>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>>>       u8 status;
>>>>>   -    status = ops->get_status(vdpa);
>>>>> +    status = vdpa_get_status(vdpa);
>>>> Not sure why we need to take cf_mutex here. Appears to me only 
>>>> setters (set_status and reset) need to take the lock in this function.
>>>
>>>
>>> You may be right but it doesn't harm and it is guaranteed to be 
>>> correct if we protect it with mutex here.
>> Is it more for future proof?
>
>
> I think so.

I guess in this situation it would be better defer to the future patch 
to add such locking or wrapper, although right now there are just two 
additional calls taking the lock needlessly when vhost_vdpa_get_status 
is called. Maybe it's just me but I'm worried some future patch may 
calls the locked API wrapper thousands of times unintendedly, then the 
overhead becomes quite obvious.

>
>
>> Ok, but IMHO it might be better to get some comment here, otherwise 
>> it's quite confusing why the lock needs to be held. vhost_vdpa had 
>> done its own serialization in vhost_vdpa_unlocked_ioctl() through 
>> vhost_dev`mutex.
>
>
> Right, but they are done for different levels, one is for vhost_dev 
> antoher is for vdpa_dev.
The cf_mutex is introduced to serialize the command line configure 
thread (via netlink) and the upstream driver calls from either the 
vhost_dev or virtio_dev. I don't see a case, even in future, where the 
netlink thread needs to update the virtio status on the fly. Can you 
enlighten me why that is at all possible?

Thanks,
-Siwei
>
> Thanks
>
>
>>
>> -Siwei
>>
>>>
>>> Thanks
>>>
>>>
>>>>
>>>>>         if (copy_to_user(statusp, &status, sizeof(status)))
>>>>>           return -EFAULT;
>>>>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct 
>>>>> vhost_vdpa *v, u8 __user *statusp)
>>>>>       if (copy_from_user(&status, statusp, sizeof(status)))
>>>>>           return -EFAULT;
>>>>>   -    status_old = ops->get_status(vdpa);
>>>>> +    status_old = vdpa_get_status(vdpa);
>>>> Ditto.
>>>>
>>>>>         /*
>>>>>        * Userspace shouldn't remove status bits unless reset the
>>>>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct 
>>>>> vhost_vdpa *v, u8 __user *statusp)
>>>>>           if (ret)
>>>>>               return ret;
>>>>>       } else
>>>>> -        ops->set_status(vdpa, status);
>>>>> +        vdpa_set_status(vdpa, status);
>>>> The reset() call in the if branch above needs to take the cf_mutex, 
>>>> the same way as that for set_status(). The reset() is effectively 
>>>> set_status(vdpa, 0).
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>>         if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & 
>>>>> VIRTIO_CONFIG_S_DRIVER_OK))
>>>>>           for (i = 0; i < nvqs; i++)
>>>>> diff --git a/drivers/virtio/virtio_vdpa.c 
>>>>> b/drivers/virtio/virtio_vdpa.c
>>>>> index a84b04ba3195..76504559bc25 100644
>>>>> --- a/drivers/virtio/virtio_vdpa.c
>>>>> +++ b/drivers/virtio/virtio_vdpa.c
>>>>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct 
>>>>> virtio_device *vdev)
>>>>>   static void virtio_vdpa_set_status(struct virtio_device *vdev, 
>>>>> u8 status)
>>>>>   {
>>>>>       struct vdpa_device *vdpa = vd_get_vdpa(vdev);
>>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>>>   -    return ops->set_status(vdpa, status);
>>>>> +    return vdpa_set_status(vdpa, status);
>>>>>   }
>>>>>     static void virtio_vdpa_reset(struct virtio_device *vdev)
>>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>>>> index 9cc4291a79b3..ae047fae2603 100644
>>>>> --- a/include/linux/vdpa.h
>>>>> +++ b/include/linux/vdpa.h
>>>>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, 
>>>>> unsigned int offset,
>>>>>                void *buf, unsigned int len);
>>>>>   void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>>>>>                const void *buf, unsigned int length);
>>>>> +u8 vdpa_get_status(struct vdpa_device *vdev);
>>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
>>>>> +
>>>>>   /**
>>>>>    * struct vdpa_mgmtdev_ops - vdpa device ops
>>>>>    * @dev_add: Add a vdpa device using alloc and register
>>>>
>>>
>>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs
  2022-01-10  7:04 ` [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs Michael S. Tsirkin
  2022-01-10  7:09   ` Jason Wang
@ 2022-01-11  1:59   ` Si-Wei Liu
       [not found]   ` <20220110074958.GA105688@mtl-vdi-166.wap.labs.mlnx>
  2 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  1:59 UTC (permalink / raw)
  To: Michael S. Tsirkin, Eli Cohen; +Cc: lvivier, virtualization, eperezma



On 1/9/2022 11:04 PM, Michael S. Tsirkin wrote:
> On Wed, Jan 05, 2022 at 01:46:32PM +0200, Eli Cohen wrote:
>> Allow the user to configure the max number of virtqueue pairs for a vdpa
>> instance. The user can then control the actual number of virtqueue pairs
>> using ethtool.
> So I put a version of this in linux-next, but I had to squash in
> some bugfixes, and resolve some conflicts. Eli, please take a look
> and let me know whether it looks sane. If not pls post a new
> version.
> Jason, what is your take on merging this now? Si-wei here seems to want
> to defer, but OTOH it's up to v7 already, most patches are acked and
> most comments look like minor improvement suggestions to me.
Sure, no worries. I thought maybe it's just me that cares about the 
completeness and correctness of this series. When I did review for v6  I 
also thought v7 might be the last shot, but it turned out there's 
outstanding comment missed to address and other issues due to last 
minute fix. It's definitely your call to pull it or not. I'm satisfied 
with all aspects of the uAPI change for this series in terms of the 
expectation set.

If you can help fix up the needless change to API vdpa_set_features() 
for "[PATCH v7 05/14] vdpa: Allow to configure max data virtqueues" I 
would much appreciate it. Or Eli can post a fix for that together with 
other issues on top, given it was very easy to lose track of these 
similar comments during the previous rounds of my review.

Thanks,
-Siwei

>
>> Example, set number of VQPs to 2:
>> $ ethtool -L ens1 combined 2
>>
>> A user can check the max supported virtqueues for a management device by
>> running:
>>
>> $ $ vdpa mgmtdev show
>>      auxiliary/mlx5_core.sf.1:
>>        supported_classes net
>>        max_supported_vqs 257
>>        dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
>>                     CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
>>
>> and refer to this value when adding a device.
>>
>> To create a device with a max of 5 VQPs:
>> vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 5
>>
>> Please note that for patches that were changed I removed "Reviewed-by"
>> and "Acked-by".
>>
>> v6 -> v7:
>> 1. Make use of cf_mutex for serializing netlink set/get with other
>> calls.
>> 2. Some fixes (See in each patch)
>> 3. Add patch for vdpa_sim to report supported features
>> 4. "Reviewed-by" and "Acked-by" removed from patch 0007 since it had
>> slightly changed.
>>
>> Eli Cohen (14):
>>    vdpa: Provide interface to read driver features
>>    vdpa/mlx5: Distribute RX virtqueues in RQT object
>>    vdpa: Sync calls set/get config/status with cf_mutex
>>    vdpa: Read device configuration only if FEATURES_OK
>>    vdpa: Allow to configure max data virtqueues
>>    vdpa/mlx5: Fix config_attr_mask assignment
>>    vdpa/mlx5: Support configuring max data virtqueue
>>    vdpa: Add support for returning device configuration information
>>    vdpa/mlx5: Restore cur_num_vqs in case of failure in change_num_qps()
>>    vdpa: Support reporting max device capabilities
>>    vdpa/mlx5: Report max device capabilities
>>    vdpa/vdpa_sim: Configure max supported virtqueues
>>    vdpa: Use BIT_ULL for bit operations
>>    vdpa/vdpa_sim_net: Report max device capabilities
>>
>>   drivers/vdpa/alibaba/eni_vdpa.c      |  16 +++-
>>   drivers/vdpa/ifcvf/ifcvf_main.c      |  16 +++-
>>   drivers/vdpa/mlx5/net/mlx5_vnet.c    | 134 ++++++++++++++++-----------
>>   drivers/vdpa/vdpa.c                  | 100 ++++++++++++++++----
>>   drivers/vdpa/vdpa_sim/vdpa_sim.c     |  21 +++--
>>   drivers/vdpa/vdpa_sim/vdpa_sim_net.c |   2 +
>>   drivers/vdpa/vdpa_user/vduse_dev.c   |  16 +++-
>>   drivers/vdpa/virtio_pci/vp_vdpa.c    |  16 +++-
>>   drivers/vhost/vdpa.c                 |  11 +--
>>   drivers/virtio/virtio_vdpa.c         |   7 +-
>>   include/linux/vdpa.h                 |  36 +++++--
>>   include/uapi/linux/vdpa.h            |   6 ++
>>   12 files changed, 271 insertions(+), 110 deletions(-)
>>
>> -- 
>> 2.34.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs
       [not found]   ` <20220110074958.GA105688@mtl-vdi-166.wap.labs.mlnx>
@ 2022-01-11  2:02     ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  2:02 UTC (permalink / raw)
  To: Eli Cohen, Michael S. Tsirkin; +Cc: lvivier, virtualization, eperezma



On 1/9/2022 11:49 PM, Eli Cohen wrote:
> On Mon, Jan 10, 2022 at 02:04:32AM -0500, Michael S. Tsirkin wrote:
>> On Wed, Jan 05, 2022 at 01:46:32PM +0200, Eli Cohen wrote:
>>> Allow the user to configure the max number of virtqueue pairs for a vdpa
>>> instance. The user can then control the actual number of virtqueue pairs
>>> using ethtool.
>> So I put a version of this in linux-next, but I had to squash in
>> some bugfixes, and resolve some conflicts. Eli, please take a look
>> and let me know whether it looks sane. If not pls post a new
>> version.
> I see you squashed two fixes.
> There was one more comment from Si-Wei which I addressed by sending a
> distinct patch titled "vdpa: Protect vdpa reset with cf_mutex".
Sorry, but I did not see this new patch that addressed reset. Please 
copy me when you get it (re-)posted.

-Siwei


>
> It was not reviewed yet but maybe you can squash it on patch 0005 after
> it has been reviewed.
>
>> Jason, what is your take on merging this now? Si-wei here seems to want
>> to defer, but OTOH it's up to v7 already, most patches are acked and
>> most comments look like minor improvement suggestions to me.
>>
>>> Example, set number of VQPs to 2:
>>> $ ethtool -L ens1 combined 2
>>>
>>> A user can check the max supported virtqueues for a management device by
>>> running:
>>>
>>> $ $ vdpa mgmtdev show
>>>      auxiliary/mlx5_core.sf.1:
>>>        supported_classes net
>>>        max_supported_vqs 257
>>>        dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
>>>                     CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM
>>>
>>> and refer to this value when adding a device.
>>>
>>> To create a device with a max of 5 VQPs:
>>> vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 5
>>>
>>> Please note that for patches that were changed I removed "Reviewed-by"
>>> and "Acked-by".
>>>
>>> v6 -> v7:
>>> 1. Make use of cf_mutex for serializing netlink set/get with other
>>> calls.
>>> 2. Some fixes (See in each patch)
>>> 3. Add patch for vdpa_sim to report supported features
>>> 4. "Reviewed-by" and "Acked-by" removed from patch 0007 since it had
>>> slightly changed.
>>>
>>> Eli Cohen (14):
>>>    vdpa: Provide interface to read driver features
>>>    vdpa/mlx5: Distribute RX virtqueues in RQT object
>>>    vdpa: Sync calls set/get config/status with cf_mutex
>>>    vdpa: Read device configuration only if FEATURES_OK
>>>    vdpa: Allow to configure max data virtqueues
>>>    vdpa/mlx5: Fix config_attr_mask assignment
>>>    vdpa/mlx5: Support configuring max data virtqueue
>>>    vdpa: Add support for returning device configuration information
>>>    vdpa/mlx5: Restore cur_num_vqs in case of failure in change_num_qps()
>>>    vdpa: Support reporting max device capabilities
>>>    vdpa/mlx5: Report max device capabilities
>>>    vdpa/vdpa_sim: Configure max supported virtqueues
>>>    vdpa: Use BIT_ULL for bit operations
>>>    vdpa/vdpa_sim_net: Report max device capabilities
>>>
>>>   drivers/vdpa/alibaba/eni_vdpa.c      |  16 +++-
>>>   drivers/vdpa/ifcvf/ifcvf_main.c      |  16 +++-
>>>   drivers/vdpa/mlx5/net/mlx5_vnet.c    | 134 ++++++++++++++++-----------
>>>   drivers/vdpa/vdpa.c                  | 100 ++++++++++++++++----
>>>   drivers/vdpa/vdpa_sim/vdpa_sim.c     |  21 +++--
>>>   drivers/vdpa/vdpa_sim/vdpa_sim_net.c |   2 +
>>>   drivers/vdpa/vdpa_user/vduse_dev.c   |  16 +++-
>>>   drivers/vdpa/virtio_pci/vp_vdpa.c    |  16 +++-
>>>   drivers/vhost/vdpa.c                 |  11 +--
>>>   drivers/virtio/virtio_vdpa.c         |   7 +-
>>>   include/linux/vdpa.h                 |  36 +++++--
>>>   include/uapi/linux/vdpa.h            |   6 ++
>>>   12 files changed, 271 insertions(+), 110 deletions(-)
>>>
>>> -- 
>>> 2.34.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-11  1:30           ` Si-Wei Liu
@ 2022-01-11  4:46             ` Jason Wang
  2022-01-11  6:26               ` Parav Pandit via Virtualization
  2022-01-11  9:23               ` Si-Wei Liu
  0 siblings, 2 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-11  4:46 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: Laurent Vivier, mst, virtualization, eperezma, Eli Cohen

On Tue, Jan 11, 2022 at 9:30 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 1/9/2022 10:05 PM, Jason Wang wrote:
> >
> > 在 2022/1/8 上午9:23, Si-Wei Liu 写道:
> >>
> >>
> >> On 1/6/2022 9:08 PM, Jason Wang wrote:
> >>>
> >>> 在 2022/1/7 上午8:33, Si-Wei Liu 写道:
> >>>>
> >>>>
> >>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
> >>>>> Add wrappers to get/set status and protect these operations with
> >>>>> cf_mutex to serialize these operations with respect to get/set config
> >>>>> operations.
> >>>>>
> >>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>>> ---
> >>>>>   drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
> >>>>>   drivers/vhost/vdpa.c         |  7 +++----
> >>>>>   drivers/virtio/virtio_vdpa.c |  3 +--
> >>>>>   include/linux/vdpa.h         |  3 +++
> >>>>>   4 files changed, 26 insertions(+), 6 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> >>>>> index 42d71d60d5dc..5134c83c4a22 100644
> >>>>> --- a/drivers/vdpa/vdpa.c
> >>>>> +++ b/drivers/vdpa/vdpa.c
> >>>>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
> >>>>>   static DEFINE_MUTEX(vdpa_dev_mutex);
> >>>>>   static DEFINE_IDA(vdpa_index_ida);
> >>>>>   +u8 vdpa_get_status(struct vdpa_device *vdev)
> >>>>> +{
> >>>>> +    u8 status;
> >>>>> +
> >>>>> +    mutex_lock(&vdev->cf_mutex);
> >>>>> +    status = vdev->config->get_status(vdev);
> >>>>> +    mutex_unlock(&vdev->cf_mutex);
> >>>>> +    return status;
> >>>>> +}
> >>>>> +EXPORT_SYMBOL(vdpa_get_status);
> >>>>> +
> >>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
> >>>>> +{
> >>>>> +    mutex_lock(&vdev->cf_mutex);
> >>>>> +    vdev->config->set_status(vdev, status);
> >>>>> +    mutex_unlock(&vdev->cf_mutex);
> >>>>> +}
> >>>>> +EXPORT_SYMBOL(vdpa_set_status);
> >>>>> +
> >>>>>   static struct genl_family vdpa_nl_family;
> >>>>>     static int vdpa_dev_probe(struct device *d)
> >>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >>>>> index ebaa373e9b82..d9d499465e2e 100644
> >>>>> --- a/drivers/vhost/vdpa.c
> >>>>> +++ b/drivers/vhost/vdpa.c
> >>>>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct
> >>>>> vhost_vdpa *v, u8 __user *argp)
> >>>>>   static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8
> >>>>> __user *statusp)
> >>>>>   {
> >>>>>       struct vdpa_device *vdpa = v->vdpa;
> >>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
> >>>>>       u8 status;
> >>>>>   -    status = ops->get_status(vdpa);
> >>>>> +    status = vdpa_get_status(vdpa);
> >>>> Not sure why we need to take cf_mutex here. Appears to me only
> >>>> setters (set_status and reset) need to take the lock in this function.
> >>>
> >>>
> >>> You may be right but it doesn't harm and it is guaranteed to be
> >>> correct if we protect it with mutex here.
> >> Is it more for future proof?
> >
> >
> > I think so.
>
> I guess in this situation it would be better defer to the future patch
> to add such locking or wrapper, although right now there are just two
> additional calls taking the lock needlessly when vhost_vdpa_get_status
> is called. Maybe it's just me but I'm worried some future patch may
> calls the locked API wrapper thousands of times unintendedly, then the
> overhead becomes quite obvious.

I'm fine to remove this if others agree on this.

>
> >
> >
> >> Ok, but IMHO it might be better to get some comment here, otherwise
> >> it's quite confusing why the lock needs to be held. vhost_vdpa had
> >> done its own serialization in vhost_vdpa_unlocked_ioctl() through
> >> vhost_dev`mutex.
> >
> >
> > Right, but they are done for different levels, one is for vhost_dev
> > antoher is for vdpa_dev.
> The cf_mutex is introduced to serialize the command line configure
> thread (via netlink) and the upstream driver calls from either the
> vhost_dev or virtio_dev. I don't see a case, even in future, where the
> netlink thread needs to update the virtio status on the fly. Can you
> enlighten me why that is at all possible?

After some thought I don't see a case. I can think of NEEDS_RESET but
it should come with a config interrupt so we're probably fine without
the mutex here.

Thanks

>
> Thanks,
> -Siwei
> >
> > Thanks
> >
> >
> >>
> >> -Siwei
> >>
> >>>
> >>> Thanks
> >>>
> >>>
> >>>>
> >>>>>         if (copy_to_user(statusp, &status, sizeof(status)))
> >>>>>           return -EFAULT;
> >>>>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct
> >>>>> vhost_vdpa *v, u8 __user *statusp)
> >>>>>       if (copy_from_user(&status, statusp, sizeof(status)))
> >>>>>           return -EFAULT;
> >>>>>   -    status_old = ops->get_status(vdpa);
> >>>>> +    status_old = vdpa_get_status(vdpa);
> >>>> Ditto.
> >>>>
> >>>>>         /*
> >>>>>        * Userspace shouldn't remove status bits unless reset the
> >>>>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct
> >>>>> vhost_vdpa *v, u8 __user *statusp)
> >>>>>           if (ret)
> >>>>>               return ret;
> >>>>>       } else
> >>>>> -        ops->set_status(vdpa, status);
> >>>>> +        vdpa_set_status(vdpa, status);
> >>>> The reset() call in the if branch above needs to take the cf_mutex,
> >>>> the same way as that for set_status(). The reset() is effectively
> >>>> set_status(vdpa, 0).
> >>>>
> >>>> Thanks,
> >>>> -Siwei
> >>>>
> >>>>>         if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old &
> >>>>> VIRTIO_CONFIG_S_DRIVER_OK))
> >>>>>           for (i = 0; i < nvqs; i++)
> >>>>> diff --git a/drivers/virtio/virtio_vdpa.c
> >>>>> b/drivers/virtio/virtio_vdpa.c
> >>>>> index a84b04ba3195..76504559bc25 100644
> >>>>> --- a/drivers/virtio/virtio_vdpa.c
> >>>>> +++ b/drivers/virtio/virtio_vdpa.c
> >>>>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct
> >>>>> virtio_device *vdev)
> >>>>>   static void virtio_vdpa_set_status(struct virtio_device *vdev,
> >>>>> u8 status)
> >>>>>   {
> >>>>>       struct vdpa_device *vdpa = vd_get_vdpa(vdev);
> >>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
> >>>>>   -    return ops->set_status(vdpa, status);
> >>>>> +    return vdpa_set_status(vdpa, status);
> >>>>>   }
> >>>>>     static void virtio_vdpa_reset(struct virtio_device *vdev)
> >>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> >>>>> index 9cc4291a79b3..ae047fae2603 100644
> >>>>> --- a/include/linux/vdpa.h
> >>>>> +++ b/include/linux/vdpa.h
> >>>>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev,
> >>>>> unsigned int offset,
> >>>>>                void *buf, unsigned int len);
> >>>>>   void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
> >>>>>                const void *buf, unsigned int length);
> >>>>> +u8 vdpa_get_status(struct vdpa_device *vdev);
> >>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
> >>>>> +
> >>>>>   /**
> >>>>>    * struct vdpa_mgmtdev_ops - vdpa device ops
> >>>>>    * @dev_add: Add a vdpa device using alloc and register
> >>>>
> >>>
> >>
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-11  4:46             ` Jason Wang
@ 2022-01-11  6:26               ` Parav Pandit via Virtualization
  2022-01-11  9:15                 ` Si-Wei Liu
  2022-01-11  9:23               ` Si-Wei Liu
  1 sibling, 1 reply; 40+ messages in thread
From: Parav Pandit via Virtualization @ 2022-01-11  6:26 UTC (permalink / raw)
  To: Jason Wang, Si-Wei Liu
  Cc: Laurent Vivier, mst, virtualization, eperezma, Eli Cohen



> From: Jason Wang <jasowang@redhat.com>
> Sent: Tuesday, January 11, 2022 10:17 AM
> >
> > I guess in this situation it would be better defer to the future patch
> > to add such locking or wrapper, although right now there are just two
> > additional calls taking the lock needlessly when vhost_vdpa_get_status
> > is called. Maybe it's just me but I'm worried some future patch may
> > calls the locked API wrapper thousands of times unintendedly, then the
> > overhead becomes quite obvious.
> 
> I'm fine to remove this if others agree on this.
> 
> >
> > >
> > >
> > >> Ok, but IMHO it might be better to get some comment here, otherwise
> > >> it's quite confusing why the lock needs to be held. vhost_vdpa had
> > >> done its own serialization in vhost_vdpa_unlocked_ioctl() through
> > >> vhost_dev`mutex.
> > >
> > >
> > > Right, but they are done for different levels, one is for vhost_dev
> > > antoher is for vdpa_dev.
> > The cf_mutex is introduced to serialize the command line configure
> > thread (via netlink) and the upstream driver calls from either the
> > vhost_dev or virtio_dev. I don't see a case, even in future, where the
> > netlink thread needs to update the virtio status on the fly. Can you
> > enlighten me why that is at all possible?
> 
Sorry for my late response.

Netlink reads the whole space while it is not getting modified by vhost/virtio driver side.
A better to convert cf_mutex to rw_sem that clarifies the code more and still ensure that netlink doesn't read bytes while it is getting modified.
Given that config bytes can be updated anytime and not on each field boundary, it is anyway not atomic.
It was added where we wanted to modify the fields post creation, but eventually drop that idea.

So yes, cf_mutex removal is fine too.

> After some thought I don't see a case. I can think of NEEDS_RESET but it should
> come with a config interrupt so we're probably fine without the mutex here.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
       [not found]           ` <20220111073416.GB149570@mtl-vdi-166.wap.labs.mlnx>
@ 2022-01-11  8:28             ` Jason Wang
  2022-01-11 12:05               ` Michael S. Tsirkin
  2022-01-11  8:52             ` Si-Wei Liu
  1 sibling, 1 reply; 40+ messages in thread
From: Jason Wang @ 2022-01-11  8:28 UTC (permalink / raw)
  To: Eli Cohen; +Cc: Laurent Vivier, mst, virtualization, eperezma, Si-Wei Liu

On Tue, Jan 11, 2022 at 3:34 PM Eli Cohen <elic@nvidia.com> wrote:
>
> On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote:
> >
> >
> > On 1/9/2022 6:10 AM, Eli Cohen wrote:
> > > On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
> > > >
> > > > On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
> > > > >
> > > > > On 1/5/2022 3:46 AM, Eli Cohen wrote:
> > > > > > Check whether the max number of data virtqueue pairs was provided when a
> > > > > > adding a new device and verify the new value does not exceed device
> > > > > > capabilities.
> > > > > >
> > > > > > In addition, change the arrays holding virtqueue and callback contexts
> > > > > > to be dynamically allocated.
> > > > > >
> > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > ---
> > > > > > v6 -> v7:
> > > > > > 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
> > > > > >
> > > > > >    drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
> > > > > >    1 file changed, 37 insertions(+), 14 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > index 4a2149f70f1e..d4720444bf78 100644
> > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
> > > > > >        struct mlx5_vq_restore_info ri;
> > > > > >    };
> > > > > >    -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
> > > > > > - * provides for driver space allocation
> > > > > > - */
> > > > > > -#define MLX5_MAX_SUPPORTED_VQS 16
> > > > > > -
> > > > > >    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > >    {
> > > > > >        if (unlikely(idx > mvdev->max_idx))
> > > > > > @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
> > > > > >        struct mlx5_vdpa_dev mvdev;
> > > > > >        struct mlx5_vdpa_net_resources res;
> > > > > >        struct virtio_net_config config;
> > > > > > -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> > > > > > -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
> > > > > > +    struct mlx5_vdpa_virtqueue *vqs;
> > > > > > +    struct vdpa_callback *event_cbs;
> > > > > >          /* Serialize vq resources creation and destruction. This is
> > > > > > required
> > > > > >         * since memory map might change and we need to destroy and create
> > > > > > @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
> > > > > > *ndev)
> > > > > >    {
> > > > > >        int i;
> > > > > >    -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
> > > > > > +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
> > > > > >            suspend_vq(ndev, &ndev->vqs[i]);
> > > > > >    }
> > > > > >    @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
> > > > > > *ndev)
> > > > > >        void *in;
> > > > > >        int i, j;
> > > > > >        int err;
> > > > > > +    int num;
> > > > > >    -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> > > > > > +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> > > > > > +        num = 1;
> > > > > > +    else
> > > > > > +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
> > > > > > +
> > > > > > +    max_rqt = min_t(int, roundup_pow_of_two(num),
> > > > > >                1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> > > > > >        if (max_rqt < 1)
> > > > > >            return -EOPNOTSUPP;
> > > > > > @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> > > > > >        MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
> > > > > >        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> > > > > >        for (i = 0, j = 0; i < max_rqt; i++, j += 2)
> > > > > > -        list[i] = cpu_to_be32(ndev->vqs[j %
> > > > > > ndev->mvdev.max_vqs].virtq_id);
> > > > > > +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
> > > > > Good catch. LGTM.
> > > > >
> > > > > Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
> > > > >
> > > > Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
> > > > the negotiated num of queues. Otherwise any site referencing cur_num_vqs
> > > > won't work properly.
> > > You can't really use cur_num_vqs. Consider this scenario:
> > > create vdpa device with max VQs = 16 (RQT size created with 8 entries)
> > > boot VM
> > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > reboot VM.
> > Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
>
> Why reset to 0? The correct value to set here is
> config->max_virtqueue_pairs.

Multiqueue is disabled by default so I think it should be 1 here. This
is the behavior of Qemu.

It looks to me we need to clarify this in the spec.

Thanks

> This is what happens when you add the
> device. Setting cur_num_vqs to config->max_virtqueue_pairs will address
> your concerns with respect to modify_rqt. Once you reset cur_num_vqs to
> config->max_virtqueue_pairs your concerns with respect to changing
> mumber of QPs should be addressed. We could even leave create_rqt
> untouched or modify the code to use cur_num_vqs. It should work the
> same.
>
> > > create RQT with 1 entry only.
> > Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt)
> > again.
> >
> > > ethtool modify num QPs to 4. modify RQT will fail since it was created
> > > with max QPs equals 1.
> > It won't fail as the cur_num_vqs will be consistent with the number of
> > queues newly negotiated (i.e the max_rqt in create_rqt) for modify.
> >
> > >
> > > I think it is ok to create the RQT always with the value configured when
> > > the device was created.
> > The problem of not setting cur_num_vqs in create_rqt (and resetting it in
> > mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset,
> > the value doesn't go with the actual number of rqt entries hence would break
> > various assumptions in change_num_qps() or modify_rqt(). For instances,
> >
> > create vdpa device with max data VQs = 16
> > boot VM
> > features_ok set with MQ negotiated
> > RQT created with 8 entries in create_rqt
> > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > reboot VM
> > features_ok set with MQ negotiated
> > RQT created with 8 entries in create_rqt
> > ethtool modify num QPs to 6.
> > cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
> > change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken
> >
> > and this case too, which calls out the need to validate the presence of
> > VIRTIO_NET_F_MQ in handle_ctrl_mq()
> >
> > create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
> > boot VM
> > features_ok set with MQ negotiated
> > RQT created with 8 entries in create_rqt
> > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > reboot VM
> > features_ok set with no MQ negotiated
> > RQT created with 8 entries in create_rqt
> > ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
> > change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue
> >
> > Thanks,
> > -Siwei
> > >
> > > > Further, we need to validate VIRTIO_NET_F_MQ is present
> > > > in handle_ctrl_mq() before changing the number of queue pairs.
> > > >
> > > > So just disregard my previous R-b for this patch.
> > > >
> > > > Thanks,
> > > > -Siwei
> > > >
> > > >
> > > > > >          MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
> > > > > >        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
> > > > > > &ndev->res.rqtn);
> > > > > > @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
> > > > > > *vdev)
> > > > > >        clear_vqs_ready(ndev);
> > > > > >        mlx5_vdpa_destroy_mr(&ndev->mvdev);
> > > > > >        ndev->mvdev.status = 0;
> > > > > > -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
> > > > > > +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
> > > > > > (mvdev->max_vqs + 1));
> > > > > >        ndev->mvdev.actual_features = 0;
> > > > > >        ++mvdev->generation;
> > > > > >        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> > > > > > @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
> > > > > > *vdev)
> > > > > >        }
> > > > > >        mlx5_vdpa_free_resources(&ndev->mvdev);
> > > > > >        mutex_destroy(&ndev->reslock);
> > > > > > +    kfree(ndev->event_cbs);
> > > > > > +    kfree(ndev->vqs);
> > > > > >    }
> > > > > >      static struct vdpa_notification_area
> > > > > > mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
> > > > > > @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > >            return -EOPNOTSUPP;
> > > > > >        }
> > > > > >    -    /* we save one virtqueue for control virtqueue should we
> > > > > > require it */
> > > > > >        max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
> > > > > > max_num_virtio_queues);
> > > > > > -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> > > > > > +    if (max_vqs < 2) {
> > > > > > +        dev_warn(mdev->device,
> > > > > > +             "%d virtqueues are supported. At least 2 are required\n",
> > > > > > +             max_vqs);
> > > > > > +        return -EAGAIN;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
> > > > > > +        if (add_config->net.max_vq_pairs > max_vqs / 2)
> > > > > > +            return -EINVAL;
> > > > > > +        max_vqs = min_t(u32, max_vqs, 2 *
> > > > > > add_config->net.max_vq_pairs);
> > > > > > +    } else {
> > > > > > +        max_vqs = 2;
> > > > > > +    }
> > > > > >          ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
> > > > > > mdev->device, &mlx5_vdpa_ops,
> > > > > >                     name, false);
> > > > > >        if (IS_ERR(ndev))
> > > > > >            return PTR_ERR(ndev);
> > > > > >    +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
> > > > > > +    ndev->event_cbs = kcalloc(max_vqs + 1,
> > > > > > sizeof(*ndev->event_cbs), GFP_KERNEL);
> > > > > > +    if (!ndev->vqs || !ndev->event_cbs) {
> > > > > > +        err = -ENOMEM;
> > > > > > +        goto err_alloc;
> > > > > > +    }
> > > > > >        ndev->mvdev.max_vqs = max_vqs;
> > > > > >        mvdev = &ndev->mvdev;
> > > > > >        mvdev->mdev = mdev;
> > > > > > @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > >            mlx5_mpfs_del_mac(pfmdev, config->mac);
> > > > > >    err_mtu:
> > > > > >        mutex_destroy(&ndev->reslock);
> > > > > > +err_alloc:
> > > > > >        put_device(&mvdev->vdev.dev);
> > > > > >        return err;
> > > > > >    }
> > > > > > @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
> > > > > > *adev,
> > > > > >        mgtdev->mgtdev.ops = &mdev_ops;
> > > > > >        mgtdev->mgtdev.device = mdev->device;
> > > > > >        mgtdev->mgtdev.id_table = id_table;
> > > > > > -    mgtdev->mgtdev.config_attr_mask =
> > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
> > > > > > +    mgtdev->mgtdev.config_attr_mask =
> > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
> > > > > > +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
> > > > > >        mgtdev->madev = madev;
> > > > > >          err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
       [not found]           ` <20220111073416.GB149570@mtl-vdi-166.wap.labs.mlnx>
  2022-01-11  8:28             ` Jason Wang
@ 2022-01-11  8:52             ` Si-Wei Liu
       [not found]               ` <20220111152154.GA165838@mtl-vdi-166.wap.labs.mlnx>
  1 sibling, 1 reply; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  8:52 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 1/10/2022 11:34 PM, Eli Cohen wrote:
> On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote:
>>
>> On 1/9/2022 6:10 AM, Eli Cohen wrote:
>>> On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
>>>> On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
>>>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>>>> Check whether the max number of data virtqueue pairs was provided when a
>>>>>> adding a new device and verify the new value does not exceed device
>>>>>> capabilities.
>>>>>>
>>>>>> In addition, change the arrays holding virtqueue and callback contexts
>>>>>> to be dynamically allocated.
>>>>>>
>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>> ---
>>>>>> v6 -> v7:
>>>>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>>>>>
>>>>>>     drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>>>>>>     1 file changed, 37 insertions(+), 14 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>> index 4a2149f70f1e..d4720444bf78 100644
>>>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>>>>>         struct mlx5_vq_restore_info ri;
>>>>>>     };
>>>>>>     -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>>>>> - * provides for driver space allocation
>>>>>> - */
>>>>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>>>>> -
>>>>>>     static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>>>>>     {
>>>>>>         if (unlikely(idx > mvdev->max_idx))
>>>>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>>>>>         struct mlx5_vdpa_dev mvdev;
>>>>>>         struct mlx5_vdpa_net_resources res;
>>>>>>         struct virtio_net_config config;
>>>>>> -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>>>>>> -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>>>>>> +    struct mlx5_vdpa_virtqueue *vqs;
>>>>>> +    struct vdpa_callback *event_cbs;
>>>>>>           /* Serialize vq resources creation and destruction. This is
>>>>>> required
>>>>>>          * since memory map might change and we need to destroy and create
>>>>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
>>>>>> *ndev)
>>>>>>     {
>>>>>>         int i;
>>>>>>     -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>>>>>> +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>>>>>             suspend_vq(ndev, &ndev->vqs[i]);
>>>>>>     }
>>>>>>     @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
>>>>>> *ndev)
>>>>>>         void *in;
>>>>>>         int i, j;
>>>>>>         int err;
>>>>>> +    int num;
>>>>>>     -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>>>>>> +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>>>>>> +        num = 1;
>>>>>> +    else
>>>>>> +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>>>>>> +
>>>>>> +    max_rqt = min_t(int, roundup_pow_of_two(num),
>>>>>>                 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>>>>>         if (max_rqt < 1)
>>>>>>             return -EOPNOTSUPP;
>>>>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>>>>>         MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>>>>>         list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>>>>>         for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>>>>>> -        list[i] = cpu_to_be32(ndev->vqs[j %
>>>>>> ndev->mvdev.max_vqs].virtq_id);
>>>>>> +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>>>>> Good catch. LGTM.
>>>>>
>>>>> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>>>
>>>> Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
>>>> the negotiated num of queues. Otherwise any site referencing cur_num_vqs
>>>> won't work properly.
>>> You can't really use cur_num_vqs. Consider this scenario:
>>> create vdpa device with max VQs = 16 (RQT size created with 8 entries)
>>> boot VM
>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
>>> reboot VM.
>> Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
> Why reset to 0? The correct value to set here is
> config->max_virtqueue_pairs.
I am not sure I get you, perhaps post a patch to show what you want to 
do? If you intend to set to 2 (non-MQ) or 2 * max_virtqueue_pairs (MQ) 
later in create_rqt(), maybe it's not quite relevant whether or not to 
reset. But in the habit of keeping things consistent I'd prefer reset to 
a value to reflect the fact that this field won't have a valid value 
until features are negotiated. Whenever there's a need in future, this 
field can be easily exposed to userspace.

>   This is what happens when you add the
> device.
This is not relevant, and doesn't matter. Essentially that line of code 
to set cur_num_vqs in vdpa_add can be removed. The value of cur_num_vqs 
won't be effective before the MQ feature is negotiated, i.e. you don't 
get a sensible cur_num_vqs value before the size of RQT is derived from 
the MQ feature and the table gets created.

>   Setting cur_num_vqs to config->max_virtqueue_pairs will address
> your concerns with respect to modify_rqt. Once you reset cur_num_vqs to
> config->max_virtqueue_pairs your concerns with respect to changing
> mumber of QPs should be addressed.
No, it won't. If you look at the caller of change_num_qps(), in 
handle_ctrl_mq() there's following code:

                 if (ndev->cur_num_vqs == 2 * newqps) {
                         status = VIRTIO_NET_OK;
                         break;
                 }

So if you set cur_num_vqs to max_virtqueue_pairs while MQ is not 
negotiated, it will result in a very weird success for a non-MQ 
supporting driver to be able to change the number of queues without 
changing anything effectively. You may argue that we can fail the non-MQ 
case early before coming to this code. But this is another patch, and 
would make code more obscure, not less. Intuitively people won't realize 
your cur_num_vqs doesn't apply to non-MQ just by follow the name.


Regards,
-Siwei


>   We could even leave create_rqt
> untouched or modify the code to use cur_num_vqs. It should work the
> same.
>
>>> create RQT with 1 entry only.
>> Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt)
>> again.
>>
>>> ethtool modify num QPs to 4. modify RQT will fail since it was created
>>> with max QPs equals 1.
>> It won't fail as the cur_num_vqs will be consistent with the number of
>> queues newly negotiated (i.e the max_rqt in create_rqt) for modify.
>>
>>> I think it is ok to create the RQT always with the value configured when
>>> the device was created.
>> The problem of not setting cur_num_vqs in create_rqt (and resetting it in
>> mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset,
>> the value doesn't go with the actual number of rqt entries hence would break
>> various assumptions in change_num_qps() or modify_rqt(). For instances,
>>
>> create vdpa device with max data VQs = 16
>> boot VM
>> features_ok set with MQ negotiated
>> RQT created with 8 entries in create_rqt
>> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
>> reboot VM
>> features_ok set with MQ negotiated
>> RQT created with 8 entries in create_rqt
>> ethtool modify num QPs to 6.
>> cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
>> change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken
>>
>> and this case too, which calls out the need to validate the presence of
>> VIRTIO_NET_F_MQ in handle_ctrl_mq()
>>
>> create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
>> boot VM
>> features_ok set with MQ negotiated
>> RQT created with 8 entries in create_rqt
>> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
>> reboot VM
>> features_ok set with no MQ negotiated
>> RQT created with 8 entries in create_rqt
>> ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
>> change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue
>>
>> Thanks,
>> -Siwei
>>>> Further, we need to validate VIRTIO_NET_F_MQ is present
>>>> in handle_ctrl_mq() before changing the number of queue pairs.
>>>>
>>>> So just disregard my previous R-b for this patch.
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>
>>>>>>           MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>>>>>         err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
>>>>>> &ndev->res.rqtn);
>>>>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
>>>>>> *vdev)
>>>>>>         clear_vqs_ready(ndev);
>>>>>>         mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>>>>>         ndev->mvdev.status = 0;
>>>>>> -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>>>>>> +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
>>>>>> (mvdev->max_vqs + 1));
>>>>>>         ndev->mvdev.actual_features = 0;
>>>>>>         ++mvdev->generation;
>>>>>>         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>>>>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
>>>>>> *vdev)
>>>>>>         }
>>>>>>         mlx5_vdpa_free_resources(&ndev->mvdev);
>>>>>>         mutex_destroy(&ndev->reslock);
>>>>>> +    kfree(ndev->event_cbs);
>>>>>> +    kfree(ndev->vqs);
>>>>>>     }
>>>>>>       static struct vdpa_notification_area
>>>>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>>>>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
>>>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>>>             return -EOPNOTSUPP;
>>>>>>         }
>>>>>>     -    /* we save one virtqueue for control virtqueue should we
>>>>>> require it */
>>>>>>         max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
>>>>>> max_num_virtio_queues);
>>>>>> -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>>>>>> +    if (max_vqs < 2) {
>>>>>> +        dev_warn(mdev->device,
>>>>>> +             "%d virtqueues are supported. At least 2 are required\n",
>>>>>> +             max_vqs);
>>>>>> +        return -EAGAIN;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>>>>>> +        if (add_config->net.max_vq_pairs > max_vqs / 2)
>>>>>> +            return -EINVAL;
>>>>>> +        max_vqs = min_t(u32, max_vqs, 2 *
>>>>>> add_config->net.max_vq_pairs);
>>>>>> +    } else {
>>>>>> +        max_vqs = 2;
>>>>>> +    }
>>>>>>           ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
>>>>>> mdev->device, &mlx5_vdpa_ops,
>>>>>>                      name, false);
>>>>>>         if (IS_ERR(ndev))
>>>>>>             return PTR_ERR(ndev);
>>>>>>     +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>>>>>> +    ndev->event_cbs = kcalloc(max_vqs + 1,
>>>>>> sizeof(*ndev->event_cbs), GFP_KERNEL);
>>>>>> +    if (!ndev->vqs || !ndev->event_cbs) {
>>>>>> +        err = -ENOMEM;
>>>>>> +        goto err_alloc;
>>>>>> +    }
>>>>>>         ndev->mvdev.max_vqs = max_vqs;
>>>>>>         mvdev = &ndev->mvdev;
>>>>>>         mvdev->mdev = mdev;
>>>>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
>>>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>>>             mlx5_mpfs_del_mac(pfmdev, config->mac);
>>>>>>     err_mtu:
>>>>>>         mutex_destroy(&ndev->reslock);
>>>>>> +err_alloc:
>>>>>>         put_device(&mvdev->vdev.dev);
>>>>>>         return err;
>>>>>>     }
>>>>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
>>>>>> *adev,
>>>>>>         mgtdev->mgtdev.ops = &mdev_ops;
>>>>>>         mgtdev->mgtdev.device = mdev->device;
>>>>>>         mgtdev->mgtdev.id_table = id_table;
>>>>>> -    mgtdev->mgtdev.config_attr_mask =
>>>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>>>>>> +    mgtdev->mgtdev.config_attr_mask =
>>>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>>>>>> +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>>>>>         mgtdev->madev = madev;
>>>>>>           err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-11  6:26               ` Parav Pandit via Virtualization
@ 2022-01-11  9:15                 ` Si-Wei Liu
  0 siblings, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  9:15 UTC (permalink / raw)
  To: Parav Pandit, Jason Wang
  Cc: Laurent Vivier, mst, virtualization, eperezma, Eli Cohen



On 1/10/2022 10:26 PM, Parav Pandit wrote:
>
>> From: Jason Wang <jasowang@redhat.com>
>> Sent: Tuesday, January 11, 2022 10:17 AM
>>> I guess in this situation it would be better defer to the future patch
>>> to add such locking or wrapper, although right now there are just two
>>> additional calls taking the lock needlessly when vhost_vdpa_get_status
>>> is called. Maybe it's just me but I'm worried some future patch may
>>> calls the locked API wrapper thousands of times unintendedly, then the
>>> overhead becomes quite obvious.
>> I'm fine to remove this if others agree on this.
>>
>>>>
>>>>> Ok, but IMHO it might be better to get some comment here, otherwise
>>>>> it's quite confusing why the lock needs to be held. vhost_vdpa had
>>>>> done its own serialization in vhost_vdpa_unlocked_ioctl() through
>>>>> vhost_dev`mutex.
>>>>
>>>> Right, but they are done for different levels, one is for vhost_dev
>>>> antoher is for vdpa_dev.
>>> The cf_mutex is introduced to serialize the command line configure
>>> thread (via netlink) and the upstream driver calls from either the
>>> vhost_dev or virtio_dev. I don't see a case, even in future, where the
>>> netlink thread needs to update the virtio status on the fly. Can you
>>> enlighten me why that is at all possible?
> Sorry for my late response.
>
> Netlink reads the whole space while it is not getting modified by vhost/virtio driver side.
> A better to convert cf_mutex to rw_sem that clarifies the code more and still ensure that netlink doesn't read bytes while it is getting modified.
I missed the best timing for asking why it was not a rw_sem in the first 
place, but I don't mind multi-reader's case too much, so it's not a 
hurry for me to make the convert.

-Siwei
> Given that config bytes can be updated anytime and not on each field boundary, it is anyway not atomic.
> It was added where we wanted to modify the fields post creation, but eventually drop that idea.
>
> So yes, cf_mutex removal is fine too.
>
>> After some thought I don't see a case. I can think of NEEDS_RESET but it should
>> come with a config interrupt so we're probably fine without the mutex here.

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex
  2022-01-11  4:46             ` Jason Wang
  2022-01-11  6:26               ` Parav Pandit via Virtualization
@ 2022-01-11  9:23               ` Si-Wei Liu
  1 sibling, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11  9:23 UTC (permalink / raw)
  To: Jason Wang; +Cc: Laurent Vivier, mst, virtualization, eperezma, Eli Cohen



On 1/10/2022 8:46 PM, Jason Wang wrote:
> On Tue, Jan 11, 2022 at 9:30 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 1/9/2022 10:05 PM, Jason Wang wrote:
>>> 在 2022/1/8 上午9:23, Si-Wei Liu 写道:
>>>>
>>>> On 1/6/2022 9:08 PM, Jason Wang wrote:
>>>>> 在 2022/1/7 上午8:33, Si-Wei Liu 写道:
>>>>>>
>>>>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>>>>> Add wrappers to get/set status and protect these operations with
>>>>>>> cf_mutex to serialize these operations with respect to get/set config
>>>>>>> operations.
>>>>>>>
>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>> ---
>>>>>>>    drivers/vdpa/vdpa.c          | 19 +++++++++++++++++++
>>>>>>>    drivers/vhost/vdpa.c         |  7 +++----
>>>>>>>    drivers/virtio/virtio_vdpa.c |  3 +--
>>>>>>>    include/linux/vdpa.h         |  3 +++
>>>>>>>    4 files changed, 26 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>>>>>> index 42d71d60d5dc..5134c83c4a22 100644
>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>> @@ -21,6 +21,25 @@ static LIST_HEAD(mdev_head);
>>>>>>>    static DEFINE_MUTEX(vdpa_dev_mutex);
>>>>>>>    static DEFINE_IDA(vdpa_index_ida);
>>>>>>>    +u8 vdpa_get_status(struct vdpa_device *vdev)
>>>>>>> +{
>>>>>>> +    u8 status;
>>>>>>> +
>>>>>>> +    mutex_lock(&vdev->cf_mutex);
>>>>>>> +    status = vdev->config->get_status(vdev);
>>>>>>> +    mutex_unlock(&vdev->cf_mutex);
>>>>>>> +    return status;
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL(vdpa_get_status);
>>>>>>> +
>>>>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status)
>>>>>>> +{
>>>>>>> +    mutex_lock(&vdev->cf_mutex);
>>>>>>> +    vdev->config->set_status(vdev, status);
>>>>>>> +    mutex_unlock(&vdev->cf_mutex);
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL(vdpa_set_status);
>>>>>>> +
>>>>>>>    static struct genl_family vdpa_nl_family;
>>>>>>>      static int vdpa_dev_probe(struct device *d)
>>>>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>>>>> index ebaa373e9b82..d9d499465e2e 100644
>>>>>>> --- a/drivers/vhost/vdpa.c
>>>>>>> +++ b/drivers/vhost/vdpa.c
>>>>>>> @@ -142,10 +142,9 @@ static long vhost_vdpa_get_device_id(struct
>>>>>>> vhost_vdpa *v, u8 __user *argp)
>>>>>>>    static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8
>>>>>>> __user *statusp)
>>>>>>>    {
>>>>>>>        struct vdpa_device *vdpa = v->vdpa;
>>>>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>>>>>        u8 status;
>>>>>>>    -    status = ops->get_status(vdpa);
>>>>>>> +    status = vdpa_get_status(vdpa);
>>>>>> Not sure why we need to take cf_mutex here. Appears to me only
>>>>>> setters (set_status and reset) need to take the lock in this function.
>>>>>
>>>>> You may be right but it doesn't harm and it is guaranteed to be
>>>>> correct if we protect it with mutex here.
>>>> Is it more for future proof?
>>>
>>> I think so.
>> I guess in this situation it would be better defer to the future patch
>> to add such locking or wrapper, although right now there are just two
>> additional calls taking the lock needlessly when vhost_vdpa_get_status
>> is called. Maybe it's just me but I'm worried some future patch may
>> calls the locked API wrapper thousands of times unintendedly, then the
>> overhead becomes quite obvious.
> I'm fine to remove this if others agree on this.
It looks Parav acked on this. Anyone else?
>
>>>
>>>> Ok, but IMHO it might be better to get some comment here, otherwise
>>>> it's quite confusing why the lock needs to be held. vhost_vdpa had
>>>> done its own serialization in vhost_vdpa_unlocked_ioctl() through
>>>> vhost_dev`mutex.
>>>
>>> Right, but they are done for different levels, one is for vhost_dev
>>> antoher is for vdpa_dev.
>> The cf_mutex is introduced to serialize the command line configure
>> thread (via netlink) and the upstream driver calls from either the
>> vhost_dev or virtio_dev. I don't see a case, even in future, where the
>> netlink thread needs to update the virtio status on the fly. Can you
>> enlighten me why that is at all possible?
> After some thought I don't see a case. I can think of NEEDS_RESET but
> it should come with a config interrupt so we're probably fine without
> the mutex here.
True, it's quite unlikely for netlink to alter the status directly while 
driver is running. More likely the out of band reset has to reach the 
guest driver first for it to be aware, which would then initiate the 
vhost call down to vhost-vdpa through the same path.

-Siwei
>
> Thanks
>
>> Thanks,
>> -Siwei
>>> Thanks
>>>
>>>
>>>> -Siwei
>>>>
>>>>> Thanks
>>>>>
>>>>>
>>>>>>>          if (copy_to_user(statusp, &status, sizeof(status)))
>>>>>>>            return -EFAULT;
>>>>>>> @@ -164,7 +163,7 @@ static long vhost_vdpa_set_status(struct
>>>>>>> vhost_vdpa *v, u8 __user *statusp)
>>>>>>>        if (copy_from_user(&status, statusp, sizeof(status)))
>>>>>>>            return -EFAULT;
>>>>>>>    -    status_old = ops->get_status(vdpa);
>>>>>>> +    status_old = vdpa_get_status(vdpa);
>>>>>> Ditto.
>>>>>>
>>>>>>>          /*
>>>>>>>         * Userspace shouldn't remove status bits unless reset the
>>>>>>> @@ -182,7 +181,7 @@ static long vhost_vdpa_set_status(struct
>>>>>>> vhost_vdpa *v, u8 __user *statusp)
>>>>>>>            if (ret)
>>>>>>>                return ret;
>>>>>>>        } else
>>>>>>> -        ops->set_status(vdpa, status);
>>>>>>> +        vdpa_set_status(vdpa, status);
>>>>>> The reset() call in the if branch above needs to take the cf_mutex,
>>>>>> the same way as that for set_status(). The reset() is effectively
>>>>>> set_status(vdpa, 0).
>>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>>          if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old &
>>>>>>> VIRTIO_CONFIG_S_DRIVER_OK))
>>>>>>>            for (i = 0; i < nvqs; i++)
>>>>>>> diff --git a/drivers/virtio/virtio_vdpa.c
>>>>>>> b/drivers/virtio/virtio_vdpa.c
>>>>>>> index a84b04ba3195..76504559bc25 100644
>>>>>>> --- a/drivers/virtio/virtio_vdpa.c
>>>>>>> +++ b/drivers/virtio/virtio_vdpa.c
>>>>>>> @@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct
>>>>>>> virtio_device *vdev)
>>>>>>>    static void virtio_vdpa_set_status(struct virtio_device *vdev,
>>>>>>> u8 status)
>>>>>>>    {
>>>>>>>        struct vdpa_device *vdpa = vd_get_vdpa(vdev);
>>>>>>> -    const struct vdpa_config_ops *ops = vdpa->config;
>>>>>>>    -    return ops->set_status(vdpa, status);
>>>>>>> +    return vdpa_set_status(vdpa, status);
>>>>>>>    }
>>>>>>>      static void virtio_vdpa_reset(struct virtio_device *vdev)
>>>>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>>>>>> index 9cc4291a79b3..ae047fae2603 100644
>>>>>>> --- a/include/linux/vdpa.h
>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>> @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev,
>>>>>>> unsigned int offset,
>>>>>>>                 void *buf, unsigned int len);
>>>>>>>    void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
>>>>>>>                 const void *buf, unsigned int length);
>>>>>>> +u8 vdpa_get_status(struct vdpa_device *vdev);
>>>>>>> +void vdpa_set_status(struct vdpa_device *vdev, u8 status);
>>>>>>> +
>>>>>>>    /**
>>>>>>>     * struct vdpa_mgmtdev_ops - vdpa device ops
>>>>>>>     * @dev_add: Add a vdpa device using alloc and register

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-11  8:28             ` Jason Wang
@ 2022-01-11 12:05               ` Michael S. Tsirkin
  2022-01-12  2:36                 ` Jason Wang
  0 siblings, 1 reply; 40+ messages in thread
From: Michael S. Tsirkin @ 2022-01-11 12:05 UTC (permalink / raw)
  To: Jason Wang
  Cc: Laurent Vivier, virtualization, eperezma, Si-Wei Liu, Eli Cohen

On Tue, Jan 11, 2022 at 04:28:58PM +0800, Jason Wang wrote:
> On Tue, Jan 11, 2022 at 3:34 PM Eli Cohen <elic@nvidia.com> wrote:
> >
> > On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote:
> > >
> > >
> > > On 1/9/2022 6:10 AM, Eli Cohen wrote:
> > > > On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
> > > > >
> > > > > On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
> > > > > >
> > > > > > On 1/5/2022 3:46 AM, Eli Cohen wrote:
> > > > > > > Check whether the max number of data virtqueue pairs was provided when a
> > > > > > > adding a new device and verify the new value does not exceed device
> > > > > > > capabilities.
> > > > > > >
> > > > > > > In addition, change the arrays holding virtqueue and callback contexts
> > > > > > > to be dynamically allocated.
> > > > > > >
> > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > ---
> > > > > > > v6 -> v7:
> > > > > > > 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
> > > > > > >
> > > > > > >    drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
> > > > > > >    1 file changed, 37 insertions(+), 14 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > index 4a2149f70f1e..d4720444bf78 100644
> > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
> > > > > > >        struct mlx5_vq_restore_info ri;
> > > > > > >    };
> > > > > > >    -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
> > > > > > > - * provides for driver space allocation
> > > > > > > - */
> > > > > > > -#define MLX5_MAX_SUPPORTED_VQS 16
> > > > > > > -
> > > > > > >    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > > >    {
> > > > > > >        if (unlikely(idx > mvdev->max_idx))
> > > > > > > @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
> > > > > > >        struct mlx5_vdpa_dev mvdev;
> > > > > > >        struct mlx5_vdpa_net_resources res;
> > > > > > >        struct virtio_net_config config;
> > > > > > > -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> > > > > > > -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
> > > > > > > +    struct mlx5_vdpa_virtqueue *vqs;
> > > > > > > +    struct vdpa_callback *event_cbs;
> > > > > > >          /* Serialize vq resources creation and destruction. This is
> > > > > > > required
> > > > > > >         * since memory map might change and we need to destroy and create
> > > > > > > @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
> > > > > > > *ndev)
> > > > > > >    {
> > > > > > >        int i;
> > > > > > >    -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
> > > > > > > +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
> > > > > > >            suspend_vq(ndev, &ndev->vqs[i]);
> > > > > > >    }
> > > > > > >    @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
> > > > > > > *ndev)
> > > > > > >        void *in;
> > > > > > >        int i, j;
> > > > > > >        int err;
> > > > > > > +    int num;
> > > > > > >    -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> > > > > > > +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> > > > > > > +        num = 1;
> > > > > > > +    else
> > > > > > > +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
> > > > > > > +
> > > > > > > +    max_rqt = min_t(int, roundup_pow_of_two(num),
> > > > > > >                1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> > > > > > >        if (max_rqt < 1)
> > > > > > >            return -EOPNOTSUPP;
> > > > > > > @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> > > > > > >        MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
> > > > > > >        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> > > > > > >        for (i = 0, j = 0; i < max_rqt; i++, j += 2)
> > > > > > > -        list[i] = cpu_to_be32(ndev->vqs[j %
> > > > > > > ndev->mvdev.max_vqs].virtq_id);
> > > > > > > +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
> > > > > > Good catch. LGTM.
> > > > > >
> > > > > > Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
> > > > > >
> > > > > Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
> > > > > the negotiated num of queues. Otherwise any site referencing cur_num_vqs
> > > > > won't work properly.
> > > > You can't really use cur_num_vqs. Consider this scenario:
> > > > create vdpa device with max VQs = 16 (RQT size created with 8 entries)
> > > > boot VM
> > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > reboot VM.
> > > Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
> >
> > Why reset to 0? The correct value to set here is
> > config->max_virtqueue_pairs.
> 
> Multiqueue is disabled by default so I think it should be 1 here. This
> is the behavior of Qemu.
> 
> It looks to me we need to clarify this in the spec.
> 
> Thanks

Not sure what do you want to clarify.


> > This is what happens when you add the
> > device. Setting cur_num_vqs to config->max_virtqueue_pairs will address
> > your concerns with respect to modify_rqt. Once you reset cur_num_vqs to
> > config->max_virtqueue_pairs your concerns with respect to changing
> > mumber of QPs should be addressed. We could even leave create_rqt
> > untouched or modify the code to use cur_num_vqs. It should work the
> > same.
> >
> > > > create RQT with 1 entry only.
> > > Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt)
> > > again.
> > >
> > > > ethtool modify num QPs to 4. modify RQT will fail since it was created
> > > > with max QPs equals 1.
> > > It won't fail as the cur_num_vqs will be consistent with the number of
> > > queues newly negotiated (i.e the max_rqt in create_rqt) for modify.
> > >
> > > >
> > > > I think it is ok to create the RQT always with the value configured when
> > > > the device was created.
> > > The problem of not setting cur_num_vqs in create_rqt (and resetting it in
> > > mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset,
> > > the value doesn't go with the actual number of rqt entries hence would break
> > > various assumptions in change_num_qps() or modify_rqt(). For instances,
> > >
> > > create vdpa device with max data VQs = 16
> > > boot VM
> > > features_ok set with MQ negotiated
> > > RQT created with 8 entries in create_rqt
> > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > reboot VM
> > > features_ok set with MQ negotiated
> > > RQT created with 8 entries in create_rqt
> > > ethtool modify num QPs to 6.
> > > cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
> > > change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken
> > >
> > > and this case too, which calls out the need to validate the presence of
> > > VIRTIO_NET_F_MQ in handle_ctrl_mq()
> > >
> > > create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
> > > boot VM
> > > features_ok set with MQ negotiated
> > > RQT created with 8 entries in create_rqt
> > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > reboot VM
> > > features_ok set with no MQ negotiated
> > > RQT created with 8 entries in create_rqt
> > > ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
> > > change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue
> > >
> > > Thanks,
> > > -Siwei
> > > >
> > > > > Further, we need to validate VIRTIO_NET_F_MQ is present
> > > > > in handle_ctrl_mq() before changing the number of queue pairs.
> > > > >
> > > > > So just disregard my previous R-b for this patch.
> > > > >
> > > > > Thanks,
> > > > > -Siwei
> > > > >
> > > > >
> > > > > > >          MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
> > > > > > >        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
> > > > > > > &ndev->res.rqtn);
> > > > > > > @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
> > > > > > > *vdev)
> > > > > > >        clear_vqs_ready(ndev);
> > > > > > >        mlx5_vdpa_destroy_mr(&ndev->mvdev);
> > > > > > >        ndev->mvdev.status = 0;
> > > > > > > -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
> > > > > > > +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
> > > > > > > (mvdev->max_vqs + 1));
> > > > > > >        ndev->mvdev.actual_features = 0;
> > > > > > >        ++mvdev->generation;
> > > > > > >        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> > > > > > > @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
> > > > > > > *vdev)
> > > > > > >        }
> > > > > > >        mlx5_vdpa_free_resources(&ndev->mvdev);
> > > > > > >        mutex_destroy(&ndev->reslock);
> > > > > > > +    kfree(ndev->event_cbs);
> > > > > > > +    kfree(ndev->vqs);
> > > > > > >    }
> > > > > > >      static struct vdpa_notification_area
> > > > > > > mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
> > > > > > > @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > >            return -EOPNOTSUPP;
> > > > > > >        }
> > > > > > >    -    /* we save one virtqueue for control virtqueue should we
> > > > > > > require it */
> > > > > > >        max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
> > > > > > > max_num_virtio_queues);
> > > > > > > -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> > > > > > > +    if (max_vqs < 2) {
> > > > > > > +        dev_warn(mdev->device,
> > > > > > > +             "%d virtqueues are supported. At least 2 are required\n",
> > > > > > > +             max_vqs);
> > > > > > > +        return -EAGAIN;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
> > > > > > > +        if (add_config->net.max_vq_pairs > max_vqs / 2)
> > > > > > > +            return -EINVAL;
> > > > > > > +        max_vqs = min_t(u32, max_vqs, 2 *
> > > > > > > add_config->net.max_vq_pairs);
> > > > > > > +    } else {
> > > > > > > +        max_vqs = 2;
> > > > > > > +    }
> > > > > > >          ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
> > > > > > > mdev->device, &mlx5_vdpa_ops,
> > > > > > >                     name, false);
> > > > > > >        if (IS_ERR(ndev))
> > > > > > >            return PTR_ERR(ndev);
> > > > > > >    +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
> > > > > > > +    ndev->event_cbs = kcalloc(max_vqs + 1,
> > > > > > > sizeof(*ndev->event_cbs), GFP_KERNEL);
> > > > > > > +    if (!ndev->vqs || !ndev->event_cbs) {
> > > > > > > +        err = -ENOMEM;
> > > > > > > +        goto err_alloc;
> > > > > > > +    }
> > > > > > >        ndev->mvdev.max_vqs = max_vqs;
> > > > > > >        mvdev = &ndev->mvdev;
> > > > > > >        mvdev->mdev = mdev;
> > > > > > > @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > >            mlx5_mpfs_del_mac(pfmdev, config->mac);
> > > > > > >    err_mtu:
> > > > > > >        mutex_destroy(&ndev->reslock);
> > > > > > > +err_alloc:
> > > > > > >        put_device(&mvdev->vdev.dev);
> > > > > > >        return err;
> > > > > > >    }
> > > > > > > @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
> > > > > > > *adev,
> > > > > > >        mgtdev->mgtdev.ops = &mdev_ops;
> > > > > > >        mgtdev->mgtdev.device = mdev->device;
> > > > > > >        mgtdev->mgtdev.id_table = id_table;
> > > > > > > -    mgtdev->mgtdev.config_attr_mask =
> > > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
> > > > > > > +    mgtdev->mgtdev.config_attr_mask =
> > > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
> > > > > > > +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
> > > > > > >        mgtdev->madev = madev;
> > > > > > >          err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
> > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
       [not found]               ` <20220111152154.GA165838@mtl-vdi-166.wap.labs.mlnx>
@ 2022-01-11 15:31                 ` Michael S. Tsirkin
  2022-01-11 22:21                 ` Si-Wei Liu
  1 sibling, 0 replies; 40+ messages in thread
From: Michael S. Tsirkin @ 2022-01-11 15:31 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, virtualization, eperezma, Si-Wei Liu

On Tue, Jan 11, 2022 at 05:21:54PM +0200, Eli Cohen wrote:
> On Tue, Jan 11, 2022 at 12:52:29AM -0800, Si-Wei Liu wrote:
> > 
> > 
> > On 1/10/2022 11:34 PM, Eli Cohen wrote:
> > > On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote:
> > > > 
> > > > On 1/9/2022 6:10 AM, Eli Cohen wrote:
> > > > > On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
> > > > > > On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
> > > > > > > On 1/5/2022 3:46 AM, Eli Cohen wrote:
> > > > > > > > Check whether the max number of data virtqueue pairs was provided when a
> > > > > > > > adding a new device and verify the new value does not exceed device
> > > > > > > > capabilities.
> > > > > > > > 
> > > > > > > > In addition, change the arrays holding virtqueue and callback contexts
> > > > > > > > to be dynamically allocated.
> > > > > > > > 
> > > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > > ---
> > > > > > > > v6 -> v7:
> > > > > > > > 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
> > > > > > > > 
> > > > > > > >     drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
> > > > > > > >     1 file changed, 37 insertions(+), 14 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > index 4a2149f70f1e..d4720444bf78 100644
> > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
> > > > > > > >         struct mlx5_vq_restore_info ri;
> > > > > > > >     };
> > > > > > > >     -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
> > > > > > > > - * provides for driver space allocation
> > > > > > > > - */
> > > > > > > > -#define MLX5_MAX_SUPPORTED_VQS 16
> > > > > > > > -
> > > > > > > >     static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > > > >     {
> > > > > > > >         if (unlikely(idx > mvdev->max_idx))
> > > > > > > > @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
> > > > > > > >         struct mlx5_vdpa_dev mvdev;
> > > > > > > >         struct mlx5_vdpa_net_resources res;
> > > > > > > >         struct virtio_net_config config;
> > > > > > > > -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> > > > > > > > -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
> > > > > > > > +    struct mlx5_vdpa_virtqueue *vqs;
> > > > > > > > +    struct vdpa_callback *event_cbs;
> > > > > > > >           /* Serialize vq resources creation and destruction. This is
> > > > > > > > required
> > > > > > > >          * since memory map might change and we need to destroy and create
> > > > > > > > @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
> > > > > > > > *ndev)
> > > > > > > >     {
> > > > > > > >         int i;
> > > > > > > >     -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
> > > > > > > > +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
> > > > > > > >             suspend_vq(ndev, &ndev->vqs[i]);
> > > > > > > >     }
> > > > > > > >     @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
> > > > > > > > *ndev)
> > > > > > > >         void *in;
> > > > > > > >         int i, j;
> > > > > > > >         int err;
> > > > > > > > +    int num;
> > > > > > > >     -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> > > > > > > > +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> > > > > > > > +        num = 1;
> > > > > > > > +    else
> > > > > > > > +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
> > > > > > > > +
> > > > > > > > +    max_rqt = min_t(int, roundup_pow_of_two(num),
> > > > > > > >                 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> > > > > > > >         if (max_rqt < 1)
> > > > > > > >             return -EOPNOTSUPP;
> > > > > > > > @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> > > > > > > >         MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
> > > > > > > >         list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> > > > > > > >         for (i = 0, j = 0; i < max_rqt; i++, j += 2)
> > > > > > > > -        list[i] = cpu_to_be32(ndev->vqs[j %
> > > > > > > > ndev->mvdev.max_vqs].virtq_id);
> > > > > > > > +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
> > > > > > > Good catch. LGTM.
> > > > > > > 
> > > > > > > Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
> > > > > > > 
> > > > > > Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
> > > > > > the negotiated num of queues. Otherwise any site referencing cur_num_vqs
> > > > > > won't work properly.
> > > > > You can't really use cur_num_vqs. Consider this scenario:
> > > > > create vdpa device with max VQs = 16 (RQT size created with 8 entries)
> > > > > boot VM
> > > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > > reboot VM.
> > > > Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
> > > Why reset to 0? The correct value to set here is
> > > config->max_virtqueue_pairs.
> > I am not sure I get you, perhaps post a patch to show what you want to do?
> > If you intend to set to 2 (non-MQ) or 2 * max_virtqueue_pairs (MQ) later in
> > create_rqt(), maybe it's not quite relevant whether or not to reset. But in
> > the habit of keeping things consistent I'd prefer reset to a value to
> > reflect the fact that this field won't have a valid value until features are
> > negotiated. Whenever there's a need in future, this field can be easily
> > exposed to userspace.
> > 
> > >   This is what happens when you add the
> > > device.
> > This is not relevant, and doesn't matter. Essentially that line of code to
> > set cur_num_vqs in vdpa_add can be removed. The value of cur_num_vqs won't
> > be effective before the MQ feature is negotiated, i.e. you don't get a
> > sensible cur_num_vqs value before the size of RQT is derived from the MQ
> > feature and the table gets created.
> > 
> > >   Setting cur_num_vqs to config->max_virtqueue_pairs will address
> > > your concerns with respect to modify_rqt. Once you reset cur_num_vqs to
> > > config->max_virtqueue_pairs your concerns with respect to changing
> > > mumber of QPs should be addressed.
> > No, it won't. If you look at the caller of change_num_qps(), in
> > handle_ctrl_mq() there's following code:
> > 
> >                 if (ndev->cur_num_vqs == 2 * newqps) {
> >                         status = VIRTIO_NET_OK;
> >                         break;
> >                 }
> > 
> > So if you set cur_num_vqs to max_virtqueue_pairs while MQ is not negotiated,
> > it will result in a very weird success for a non-MQ supporting driver to be
> > able to change the number of queues without changing anything effectively.
> > You may argue that we can fail the non-MQ case early before coming to this
> > code. But this is another patch, and would make code more obscure, not less.
> > Intuitively people won't realize your cur_num_vqs doesn't apply to non-MQ
> > just by follow the name.
> > 
> > 
> > Regards,
> > -Siwei
> > 
> 
> Si-Wei:
> So your point is that you want to see cur_num_vqs 0 feature negotiated
> and other 2 for non MQ, and whatever configured through ethtool
> otherwise.
> 
> I can send a patch to address this concern of yours.
> 
> Quesiton is how we want to proceed here. I do want this series to get
> into 5.16. Should I send a new series or fixes on top of this? Also, it
> would be helpful if you could list all the changes you think should be
> made.
> 
> Michael: what do you think?

Fixes on top is best, including the Fixes tag.

Merge window is open, so
what's there right now should be pretty close to what goes into rc1.
If possible please send a patchset with all fixes so it's possible
to evaluate whether we are there or it has to be pushed out to 5.17 (hope not).
If there are cleanups we can push out to rc2 pls make that clear too.


> > 
> > >   We could even leave create_rqt
> > > untouched or modify the code to use cur_num_vqs. It should work the
> > > same.
> > > 
> > > > > create RQT with 1 entry only.
> > > > Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt)
> > > > again.
> > > > 
> > > > > ethtool modify num QPs to 4. modify RQT will fail since it was created
> > > > > with max QPs equals 1.
> > > > It won't fail as the cur_num_vqs will be consistent with the number of
> > > > queues newly negotiated (i.e the max_rqt in create_rqt) for modify.
> > > > 
> > > > > I think it is ok to create the RQT always with the value configured when
> > > > > the device was created.
> > > > The problem of not setting cur_num_vqs in create_rqt (and resetting it in
> > > > mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset,
> > > > the value doesn't go with the actual number of rqt entries hence would break
> > > > various assumptions in change_num_qps() or modify_rqt(). For instances,
> > > > 
> > > > create vdpa device with max data VQs = 16
> > > > boot VM
> > > > features_ok set with MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > reboot VM
> > > > features_ok set with MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 6.
> > > > cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
> > > > change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken
> > > > 
> > > > and this case too, which calls out the need to validate the presence of
> > > > VIRTIO_NET_F_MQ in handle_ctrl_mq()
> > > > 
> > > > create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
> > > > boot VM
> > > > features_ok set with MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > reboot VM
> > > > features_ok set with no MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
> > > > change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue
> > > > 
> > > > Thanks,
> > > > -Siwei
> > > > > > Further, we need to validate VIRTIO_NET_F_MQ is present
> > > > > > in handle_ctrl_mq() before changing the number of queue pairs.
> > > > > > 
> > > > > > So just disregard my previous R-b for this patch.
> > > > > > 
> > > > > > Thanks,
> > > > > > -Siwei
> > > > > > 
> > > > > > 
> > > > > > > >           MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
> > > > > > > >         err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
> > > > > > > > &ndev->res.rqtn);
> > > > > > > > @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
> > > > > > > > *vdev)
> > > > > > > >         clear_vqs_ready(ndev);
> > > > > > > >         mlx5_vdpa_destroy_mr(&ndev->mvdev);
> > > > > > > >         ndev->mvdev.status = 0;
> > > > > > > > -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
> > > > > > > > +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
> > > > > > > > (mvdev->max_vqs + 1));
> > > > > > > >         ndev->mvdev.actual_features = 0;
> > > > > > > >         ++mvdev->generation;
> > > > > > > >         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> > > > > > > > @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
> > > > > > > > *vdev)
> > > > > > > >         }
> > > > > > > >         mlx5_vdpa_free_resources(&ndev->mvdev);
> > > > > > > >         mutex_destroy(&ndev->reslock);
> > > > > > > > +    kfree(ndev->event_cbs);
> > > > > > > > +    kfree(ndev->vqs);
> > > > > > > >     }
> > > > > > > >       static struct vdpa_notification_area
> > > > > > > > mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
> > > > > > > > @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > > >             return -EOPNOTSUPP;
> > > > > > > >         }
> > > > > > > >     -    /* we save one virtqueue for control virtqueue should we
> > > > > > > > require it */
> > > > > > > >         max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
> > > > > > > > max_num_virtio_queues);
> > > > > > > > -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> > > > > > > > +    if (max_vqs < 2) {
> > > > > > > > +        dev_warn(mdev->device,
> > > > > > > > +             "%d virtqueues are supported. At least 2 are required\n",
> > > > > > > > +             max_vqs);
> > > > > > > > +        return -EAGAIN;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
> > > > > > > > +        if (add_config->net.max_vq_pairs > max_vqs / 2)
> > > > > > > > +            return -EINVAL;
> > > > > > > > +        max_vqs = min_t(u32, max_vqs, 2 *
> > > > > > > > add_config->net.max_vq_pairs);
> > > > > > > > +    } else {
> > > > > > > > +        max_vqs = 2;
> > > > > > > > +    }
> > > > > > > >           ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
> > > > > > > > mdev->device, &mlx5_vdpa_ops,
> > > > > > > >                      name, false);
> > > > > > > >         if (IS_ERR(ndev))
> > > > > > > >             return PTR_ERR(ndev);
> > > > > > > >     +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
> > > > > > > > +    ndev->event_cbs = kcalloc(max_vqs + 1,
> > > > > > > > sizeof(*ndev->event_cbs), GFP_KERNEL);
> > > > > > > > +    if (!ndev->vqs || !ndev->event_cbs) {
> > > > > > > > +        err = -ENOMEM;
> > > > > > > > +        goto err_alloc;
> > > > > > > > +    }
> > > > > > > >         ndev->mvdev.max_vqs = max_vqs;
> > > > > > > >         mvdev = &ndev->mvdev;
> > > > > > > >         mvdev->mdev = mdev;
> > > > > > > > @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > > >             mlx5_mpfs_del_mac(pfmdev, config->mac);
> > > > > > > >     err_mtu:
> > > > > > > >         mutex_destroy(&ndev->reslock);
> > > > > > > > +err_alloc:
> > > > > > > >         put_device(&mvdev->vdev.dev);
> > > > > > > >         return err;
> > > > > > > >     }
> > > > > > > > @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
> > > > > > > > *adev,
> > > > > > > >         mgtdev->mgtdev.ops = &mdev_ops;
> > > > > > > >         mgtdev->mgtdev.device = mdev->device;
> > > > > > > >         mgtdev->mgtdev.id_table = id_table;
> > > > > > > > -    mgtdev->mgtdev.config_attr_mask =
> > > > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
> > > > > > > > +    mgtdev->mgtdev.config_attr_mask =
> > > > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
> > > > > > > > +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
> > > > > > > >         mgtdev->madev = madev;
> > > > > > > >           err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
> > 

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
       [not found]               ` <20220111152154.GA165838@mtl-vdi-166.wap.labs.mlnx>
  2022-01-11 15:31                 ` Michael S. Tsirkin
@ 2022-01-11 22:21                 ` Si-Wei Liu
  1 sibling, 0 replies; 40+ messages in thread
From: Si-Wei Liu @ 2022-01-11 22:21 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 1/11/2022 7:21 AM, Eli Cohen wrote:
> On Tue, Jan 11, 2022 at 12:52:29AM -0800, Si-Wei Liu wrote:
>>
>> On 1/10/2022 11:34 PM, Eli Cohen wrote:
>>> On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote:
>>>> On 1/9/2022 6:10 AM, Eli Cohen wrote:
>>>>> On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
>>>>>> On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
>>>>>>> On 1/5/2022 3:46 AM, Eli Cohen wrote:
>>>>>>>> Check whether the max number of data virtqueue pairs was provided when a
>>>>>>>> adding a new device and verify the new value does not exceed device
>>>>>>>> capabilities.
>>>>>>>>
>>>>>>>> In addition, change the arrays holding virtqueue and callback contexts
>>>>>>>> to be dynamically allocated.
>>>>>>>>
>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>> ---
>>>>>>>> v6 -> v7:
>>>>>>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
>>>>>>>>
>>>>>>>>      drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
>>>>>>>>      1 file changed, 37 insertions(+), 14 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>>>> index 4a2149f70f1e..d4720444bf78 100644
>>>>>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>>>>>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
>>>>>>>>          struct mlx5_vq_restore_info ri;
>>>>>>>>      };
>>>>>>>>      -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
>>>>>>>> - * provides for driver space allocation
>>>>>>>> - */
>>>>>>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>>>>>>> -
>>>>>>>>      static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>>>>>>>      {
>>>>>>>>          if (unlikely(idx > mvdev->max_idx))
>>>>>>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
>>>>>>>>          struct mlx5_vdpa_dev mvdev;
>>>>>>>>          struct mlx5_vdpa_net_resources res;
>>>>>>>>          struct virtio_net_config config;
>>>>>>>> -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
>>>>>>>> -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
>>>>>>>> +    struct mlx5_vdpa_virtqueue *vqs;
>>>>>>>> +    struct vdpa_callback *event_cbs;
>>>>>>>>            /* Serialize vq resources creation and destruction. This is
>>>>>>>> required
>>>>>>>>           * since memory map might change and we need to destroy and create
>>>>>>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
>>>>>>>> *ndev)
>>>>>>>>      {
>>>>>>>>          int i;
>>>>>>>>      -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
>>>>>>>> +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
>>>>>>>>              suspend_vq(ndev, &ndev->vqs[i]);
>>>>>>>>      }
>>>>>>>>      @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
>>>>>>>> *ndev)
>>>>>>>>          void *in;
>>>>>>>>          int i, j;
>>>>>>>>          int err;
>>>>>>>> +    int num;
>>>>>>>>      -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
>>>>>>>> +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
>>>>>>>> +        num = 1;
>>>>>>>> +    else
>>>>>>>> +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
>>>>>>>> +
>>>>>>>> +    max_rqt = min_t(int, roundup_pow_of_two(num),
>>>>>>>>                  1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
>>>>>>>>          if (max_rqt < 1)
>>>>>>>>              return -EOPNOTSUPP;
>>>>>>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
>>>>>>>>          MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
>>>>>>>>          list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
>>>>>>>>          for (i = 0, j = 0; i < max_rqt; i++, j += 2)
>>>>>>>> -        list[i] = cpu_to_be32(ndev->vqs[j %
>>>>>>>> ndev->mvdev.max_vqs].virtq_id);
>>>>>>>> +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
>>>>>>> Good catch. LGTM.
>>>>>>>
>>>>>>> Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>>>>>
>>>>>> Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
>>>>>> the negotiated num of queues. Otherwise any site referencing cur_num_vqs
>>>>>> won't work properly.
>>>>> You can't really use cur_num_vqs. Consider this scenario:
>>>>> create vdpa device with max VQs = 16 (RQT size created with 8 entries)
>>>>> boot VM
>>>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
>>>>> reboot VM.
>>>> Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
>>> Why reset to 0? The correct value to set here is
>>> config->max_virtqueue_pairs.
>> I am not sure I get you, perhaps post a patch to show what you want to do?
>> If you intend to set to 2 (non-MQ) or 2 * max_virtqueue_pairs (MQ) later in
>> create_rqt(), maybe it's not quite relevant whether or not to reset. But in
>> the habit of keeping things consistent I'd prefer reset to a value to
>> reflect the fact that this field won't have a valid value until features are
>> negotiated. Whenever there's a need in future, this field can be easily
>> exposed to userspace.
>>
>>>    This is what happens when you add the
>>> device.
>> This is not relevant, and doesn't matter. Essentially that line of code to
>> set cur_num_vqs in vdpa_add can be removed. The value of cur_num_vqs won't
>> be effective before the MQ feature is negotiated, i.e. you don't get a
>> sensible cur_num_vqs value before the size of RQT is derived from the MQ
>> feature and the table gets created.
>>
>>>    Setting cur_num_vqs to config->max_virtqueue_pairs will address
>>> your concerns with respect to modify_rqt. Once you reset cur_num_vqs to
>>> config->max_virtqueue_pairs your concerns with respect to changing
>>> mumber of QPs should be addressed.
>> No, it won't. If you look at the caller of change_num_qps(), in
>> handle_ctrl_mq() there's following code:
>>
>>                  if (ndev->cur_num_vqs == 2 * newqps) {
>>                          status = VIRTIO_NET_OK;
>>                          break;
>>                  }
>>
>> So if you set cur_num_vqs to max_virtqueue_pairs while MQ is not negotiated,
>> it will result in a very weird success for a non-MQ supporting driver to be
>> able to change the number of queues without changing anything effectively.
>> You may argue that we can fail the non-MQ case early before coming to this
>> code. But this is another patch, and would make code more obscure, not less.
>> Intuitively people won't realize your cur_num_vqs doesn't apply to non-MQ
>> just by follow the name.
>>
>>
>> Regards,
>> -Siwei
>>
> Si-Wei:
> So your point is that you want to see cur_num_vqs 0 feature negotiated
> and other 2 for non MQ, and whatever configured through ethtool
> otherwise.
>
> I can send a patch to address this concern of yours.
Thanks, Eli!

>
> Quesiton is how we want to proceed here. I do want this series to get
> into 5.16. Should I send a new series or fixes on top of this?
Yep, fixes on top should be fine. I already gave R-b on your fixes. This 
should pretty much close the review loop. Thanks for your hard work on this!

>   Also, it
> would be helpful if you could list all the changes you think should be
> made.
There are a couple cosmetic changes, but doesn't matter anyway. I think 
your latest fixes address the major comments, and I can post patches 
myself to address the rest.

Thanks!
-Siwei


>
> Michael: what do you think?
>
>>>    We could even leave create_rqt
>>> untouched or modify the code to use cur_num_vqs. It should work the
>>> same.
>>>
>>>>> create RQT with 1 entry only.
>>>> Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt)
>>>> again.
>>>>
>>>>> ethtool modify num QPs to 4. modify RQT will fail since it was created
>>>>> with max QPs equals 1.
>>>> It won't fail as the cur_num_vqs will be consistent with the number of
>>>> queues newly negotiated (i.e the max_rqt in create_rqt) for modify.
>>>>
>>>>> I think it is ok to create the RQT always with the value configured when
>>>>> the device was created.
>>>> The problem of not setting cur_num_vqs in create_rqt (and resetting it in
>>>> mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset,
>>>> the value doesn't go with the actual number of rqt entries hence would break
>>>> various assumptions in change_num_qps() or modify_rqt(). For instances,
>>>>
>>>> create vdpa device with max data VQs = 16
>>>> boot VM
>>>> features_ok set with MQ negotiated
>>>> RQT created with 8 entries in create_rqt
>>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
>>>> reboot VM
>>>> features_ok set with MQ negotiated
>>>> RQT created with 8 entries in create_rqt
>>>> ethtool modify num QPs to 6.
>>>> cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
>>>> change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken
>>>>
>>>> and this case too, which calls out the need to validate the presence of
>>>> VIRTIO_NET_F_MQ in handle_ctrl_mq()
>>>>
>>>> create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
>>>> boot VM
>>>> features_ok set with MQ negotiated
>>>> RQT created with 8 entries in create_rqt
>>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2.
>>>> reboot VM
>>>> features_ok set with no MQ negotiated
>>>> RQT created with 8 entries in create_rqt
>>>> ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
>>>> change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>>> Further, we need to validate VIRTIO_NET_F_MQ is present
>>>>>> in handle_ctrl_mq() before changing the number of queue pairs.
>>>>>>
>>>>>> So just disregard my previous R-b for this patch.
>>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>
>>>>>>>>            MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
>>>>>>>>          err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
>>>>>>>> &ndev->res.rqtn);
>>>>>>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
>>>>>>>> *vdev)
>>>>>>>>          clear_vqs_ready(ndev);
>>>>>>>>          mlx5_vdpa_destroy_mr(&ndev->mvdev);
>>>>>>>>          ndev->mvdev.status = 0;
>>>>>>>> -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
>>>>>>>> +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
>>>>>>>> (mvdev->max_vqs + 1));
>>>>>>>>          ndev->mvdev.actual_features = 0;
>>>>>>>>          ++mvdev->generation;
>>>>>>>>          if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>>>>>>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
>>>>>>>> *vdev)
>>>>>>>>          }
>>>>>>>>          mlx5_vdpa_free_resources(&ndev->mvdev);
>>>>>>>>          mutex_destroy(&ndev->reslock);
>>>>>>>> +    kfree(ndev->event_cbs);
>>>>>>>> +    kfree(ndev->vqs);
>>>>>>>>      }
>>>>>>>>        static struct vdpa_notification_area
>>>>>>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
>>>>>>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
>>>>>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>>>>>              return -EOPNOTSUPP;
>>>>>>>>          }
>>>>>>>>      -    /* we save one virtqueue for control virtqueue should we
>>>>>>>> require it */
>>>>>>>>          max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
>>>>>>>> max_num_virtio_queues);
>>>>>>>> -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>>>>>>>> +    if (max_vqs < 2) {
>>>>>>>> +        dev_warn(mdev->device,
>>>>>>>> +             "%d virtqueues are supported. At least 2 are required\n",
>>>>>>>> +             max_vqs);
>>>>>>>> +        return -EAGAIN;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
>>>>>>>> +        if (add_config->net.max_vq_pairs > max_vqs / 2)
>>>>>>>> +            return -EINVAL;
>>>>>>>> +        max_vqs = min_t(u32, max_vqs, 2 *
>>>>>>>> add_config->net.max_vq_pairs);
>>>>>>>> +    } else {
>>>>>>>> +        max_vqs = 2;
>>>>>>>> +    }
>>>>>>>>            ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
>>>>>>>> mdev->device, &mlx5_vdpa_ops,
>>>>>>>>                       name, false);
>>>>>>>>          if (IS_ERR(ndev))
>>>>>>>>              return PTR_ERR(ndev);
>>>>>>>>      +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
>>>>>>>> +    ndev->event_cbs = kcalloc(max_vqs + 1,
>>>>>>>> sizeof(*ndev->event_cbs), GFP_KERNEL);
>>>>>>>> +    if (!ndev->vqs || !ndev->event_cbs) {
>>>>>>>> +        err = -ENOMEM;
>>>>>>>> +        goto err_alloc;
>>>>>>>> +    }
>>>>>>>>          ndev->mvdev.max_vqs = max_vqs;
>>>>>>>>          mvdev = &ndev->mvdev;
>>>>>>>>          mvdev->mdev = mdev;
>>>>>>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
>>>>>>>> vdpa_mgmt_dev *v_mdev, const char *name,
>>>>>>>>              mlx5_mpfs_del_mac(pfmdev, config->mac);
>>>>>>>>      err_mtu:
>>>>>>>>          mutex_destroy(&ndev->reslock);
>>>>>>>> +err_alloc:
>>>>>>>>          put_device(&mvdev->vdev.dev);
>>>>>>>>          return err;
>>>>>>>>      }
>>>>>>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
>>>>>>>> *adev,
>>>>>>>>          mgtdev->mgtdev.ops = &mdev_ops;
>>>>>>>>          mgtdev->mgtdev.device = mdev->device;
>>>>>>>>          mgtdev->mgtdev.id_table = id_table;
>>>>>>>> -    mgtdev->mgtdev.config_attr_mask =
>>>>>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
>>>>>>>> +    mgtdev->mgtdev.config_attr_mask =
>>>>>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
>>>>>>>> +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
>>>>>>>>          mgtdev->madev = madev;
>>>>>>>>            err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
  2022-01-11 12:05               ` Michael S. Tsirkin
@ 2022-01-12  2:36                 ` Jason Wang
  0 siblings, 0 replies; 40+ messages in thread
From: Jason Wang @ 2022-01-12  2:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Laurent Vivier, virtualization, eperezma, Si-Wei Liu, Eli Cohen

On Tue, Jan 11, 2022 at 8:06 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Jan 11, 2022 at 04:28:58PM +0800, Jason Wang wrote:
> > On Tue, Jan 11, 2022 at 3:34 PM Eli Cohen <elic@nvidia.com> wrote:
> > >
> > > On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote:
> > > >
> > > >
> > > > On 1/9/2022 6:10 AM, Eli Cohen wrote:
> > > > > On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote:
> > > > > >
> > > > > > On 1/6/2022 5:27 PM, Si-Wei Liu wrote:
> > > > > > >
> > > > > > > On 1/5/2022 3:46 AM, Eli Cohen wrote:
> > > > > > > > Check whether the max number of data virtqueue pairs was provided when a
> > > > > > > > adding a new device and verify the new value does not exceed device
> > > > > > > > capabilities.
> > > > > > > >
> > > > > > > > In addition, change the arrays holding virtqueue and callback contexts
> > > > > > > > to be dynamically allocated.
> > > > > > > >
> > > > > > > > Signed-off-by: Eli Cohen <elic@nvidia.com>
> > > > > > > > ---
> > > > > > > > v6 -> v7:
> > > > > > > > 1. Evaluate RQT table size based on config.max_virtqueue_pairs.
> > > > > > > >
> > > > > > > >    drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++---------
> > > > > > > >    1 file changed, 37 insertions(+), 14 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > index 4a2149f70f1e..d4720444bf78 100644
> > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue {
> > > > > > > >        struct mlx5_vq_restore_info ri;
> > > > > > > >    };
> > > > > > > >    -/* We will remove this limitation once mlx5_vdpa_alloc_resources()
> > > > > > > > - * provides for driver space allocation
> > > > > > > > - */
> > > > > > > > -#define MLX5_MAX_SUPPORTED_VQS 16
> > > > > > > > -
> > > > > > > >    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > > > >    {
> > > > > > > >        if (unlikely(idx > mvdev->max_idx))
> > > > > > > > @@ -148,8 +143,8 @@ struct mlx5_vdpa_net {
> > > > > > > >        struct mlx5_vdpa_dev mvdev;
> > > > > > > >        struct mlx5_vdpa_net_resources res;
> > > > > > > >        struct virtio_net_config config;
> > > > > > > > -    struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
> > > > > > > > -    struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
> > > > > > > > +    struct mlx5_vdpa_virtqueue *vqs;
> > > > > > > > +    struct vdpa_callback *event_cbs;
> > > > > > > >          /* Serialize vq resources creation and destruction. This is
> > > > > > > > required
> > > > > > > >         * since memory map might change and we need to destroy and create
> > > > > > > > @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net
> > > > > > > > *ndev)
> > > > > > > >    {
> > > > > > > >        int i;
> > > > > > > >    -    for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
> > > > > > > > +    for (i = 0; i < ndev->mvdev.max_vqs; i++)
> > > > > > > >            suspend_vq(ndev, &ndev->vqs[i]);
> > > > > > > >    }
> > > > > > > >    @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net
> > > > > > > > *ndev)
> > > > > > > >        void *in;
> > > > > > > >        int i, j;
> > > > > > > >        int err;
> > > > > > > > +    int num;
> > > > > > > >    -    max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> > > > > > > > +    if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> > > > > > > > +        num = 1;
> > > > > > > > +    else
> > > > > > > > +        num = le16_to_cpu(ndev->config.max_virtqueue_pairs);
> > > > > > > > +
> > > > > > > > +    max_rqt = min_t(int, roundup_pow_of_two(num),
> > > > > > > >                1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> > > > > > > >        if (max_rqt < 1)
> > > > > > > >            return -EOPNOTSUPP;
> > > > > > > > @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> > > > > > > >        MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
> > > > > > > >        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> > > > > > > >        for (i = 0, j = 0; i < max_rqt; i++, j += 2)
> > > > > > > > -        list[i] = cpu_to_be32(ndev->vqs[j %
> > > > > > > > ndev->mvdev.max_vqs].virtq_id);
> > > > > > > > +        list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
> > > > > > > Good catch. LGTM.
> > > > > > >
> > > > > > > Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
> > > > > > >
> > > > > > Apologies to reply to myself. It looks to me we need to set cur_num_vqs to
> > > > > > the negotiated num of queues. Otherwise any site referencing cur_num_vqs
> > > > > > won't work properly.
> > > > > You can't really use cur_num_vqs. Consider this scenario:
> > > > > create vdpa device with max VQs = 16 (RQT size created with 8 entries)
> > > > > boot VM
> > > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > > reboot VM.
> > > > Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.
> > >
> > > Why reset to 0? The correct value to set here is
> > > config->max_virtqueue_pairs.
> >
> > Multiqueue is disabled by default so I think it should be 1 here. This
> > is the behavior of Qemu.
> >
> > It looks to me we need to clarify this in the spec.
> >
> > Thanks
>
> Not sure what do you want to clarify.

Ok, I see this:

Multiqueue is disabled by setting virtqueue_pairs to 1 (this is the
default) and waiting for the device to use the command buffer.

So we are fine.

Thanks

>
>
> > > This is what happens when you add the
> > > device. Setting cur_num_vqs to config->max_virtqueue_pairs will address
> > > your concerns with respect to modify_rqt. Once you reset cur_num_vqs to
> > > config->max_virtqueue_pairs your concerns with respect to changing
> > > mumber of QPs should be addressed. We could even leave create_rqt
> > > untouched or modify the code to use cur_num_vqs. It should work the
> > > same.
> > >
> > > > > create RQT with 1 entry only.
> > > > Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt)
> > > > again.
> > > >
> > > > > ethtool modify num QPs to 4. modify RQT will fail since it was created
> > > > > with max QPs equals 1.
> > > > It won't fail as the cur_num_vqs will be consistent with the number of
> > > > queues newly negotiated (i.e the max_rqt in create_rqt) for modify.
> > > >
> > > > >
> > > > > I think it is ok to create the RQT always with the value configured when
> > > > > the device was created.
> > > > The problem of not setting cur_num_vqs in create_rqt (and resetting it in
> > > > mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset,
> > > > the value doesn't go with the actual number of rqt entries hence would break
> > > > various assumptions in change_num_qps() or modify_rqt(). For instances,
> > > >
> > > > create vdpa device with max data VQs = 16
> > > > boot VM
> > > > features_ok set with MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > reboot VM
> > > > features_ok set with MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 6.
> > > > cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2)
> > > > change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken
> > > >
> > > > and this case too, which calls out the need to validate the presence of
> > > > VIRTIO_NET_F_MQ in handle_ctrl_mq()
> > > >
> > > > create vdpa device with max data VQs = 16 (RQT size created with 8 entries)
> > > > boot VM
> > > > features_ok set with MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 2. cur_num_vqs now equals 2.
> > > > reboot VM
> > > > features_ok set with no MQ negotiated
> > > > RQT created with 8 entries in create_rqt
> > > > ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure
> > > > change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue
> > > >
> > > > Thanks,
> > > > -Siwei
> > > > >
> > > > > > Further, we need to validate VIRTIO_NET_F_MQ is present
> > > > > > in handle_ctrl_mq() before changing the number of queue pairs.
> > > > > >
> > > > > > So just disregard my previous R-b for this patch.
> > > > > >
> > > > > > Thanks,
> > > > > > -Siwei
> > > > > >
> > > > > >
> > > > > > > >          MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
> > > > > > > >        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
> > > > > > > > &ndev->res.rqtn);
> > > > > > > > @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device
> > > > > > > > *vdev)
> > > > > > > >        clear_vqs_ready(ndev);
> > > > > > > >        mlx5_vdpa_destroy_mr(&ndev->mvdev);
> > > > > > > >        ndev->mvdev.status = 0;
> > > > > > > > -    memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
> > > > > > > > +    memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) *
> > > > > > > > (mvdev->max_vqs + 1));
> > > > > > > >        ndev->mvdev.actual_features = 0;
> > > > > > > >        ++mvdev->generation;
> > > > > > > >        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> > > > > > > > @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device
> > > > > > > > *vdev)
> > > > > > > >        }
> > > > > > > >        mlx5_vdpa_free_resources(&ndev->mvdev);
> > > > > > > >        mutex_destroy(&ndev->reslock);
> > > > > > > > +    kfree(ndev->event_cbs);
> > > > > > > > +    kfree(ndev->vqs);
> > > > > > > >    }
> > > > > > > >      static struct vdpa_notification_area
> > > > > > > > mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
> > > > > > > > @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > > >            return -EOPNOTSUPP;
> > > > > > > >        }
> > > > > > > >    -    /* we save one virtqueue for control virtqueue should we
> > > > > > > > require it */
> > > > > > > >        max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
> > > > > > > > max_num_virtio_queues);
> > > > > > > > -    max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> > > > > > > > +    if (max_vqs < 2) {
> > > > > > > > +        dev_warn(mdev->device,
> > > > > > > > +             "%d virtqueues are supported. At least 2 are required\n",
> > > > > > > > +             max_vqs);
> > > > > > > > +        return -EAGAIN;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
> > > > > > > > +        if (add_config->net.max_vq_pairs > max_vqs / 2)
> > > > > > > > +            return -EINVAL;
> > > > > > > > +        max_vqs = min_t(u32, max_vqs, 2 *
> > > > > > > > add_config->net.max_vq_pairs);
> > > > > > > > +    } else {
> > > > > > > > +        max_vqs = 2;
> > > > > > > > +    }
> > > > > > > >          ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev,
> > > > > > > > mdev->device, &mlx5_vdpa_ops,
> > > > > > > >                     name, false);
> > > > > > > >        if (IS_ERR(ndev))
> > > > > > > >            return PTR_ERR(ndev);
> > > > > > > >    +    ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
> > > > > > > > +    ndev->event_cbs = kcalloc(max_vqs + 1,
> > > > > > > > sizeof(*ndev->event_cbs), GFP_KERNEL);
> > > > > > > > +    if (!ndev->vqs || !ndev->event_cbs) {
> > > > > > > > +        err = -ENOMEM;
> > > > > > > > +        goto err_alloc;
> > > > > > > > +    }
> > > > > > > >        ndev->mvdev.max_vqs = max_vqs;
> > > > > > > >        mvdev = &ndev->mvdev;
> > > > > > > >        mvdev->mdev = mdev;
> > > > > > > > @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct
> > > > > > > > vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > > >            mlx5_mpfs_del_mac(pfmdev, config->mac);
> > > > > > > >    err_mtu:
> > > > > > > >        mutex_destroy(&ndev->reslock);
> > > > > > > > +err_alloc:
> > > > > > > >        put_device(&mvdev->vdev.dev);
> > > > > > > >        return err;
> > > > > > > >    }
> > > > > > > > @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device
> > > > > > > > *adev,
> > > > > > > >        mgtdev->mgtdev.ops = &mdev_ops;
> > > > > > > >        mgtdev->mgtdev.device = mdev->device;
> > > > > > > >        mgtdev->mgtdev.id_table = id_table;
> > > > > > > > -    mgtdev->mgtdev.config_attr_mask =
> > > > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
> > > > > > > > +    mgtdev->mgtdev.config_attr_mask =
> > > > > > > > BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
> > > > > > > > +                      BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
> > > > > > > >        mgtdev->madev = madev;
> > > > > > > >          err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
> > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2022-01-12  2:37 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220105114646.577224-1-elic@nvidia.com>
     [not found] ` <20220105114646.577224-4-elic@nvidia.com>
2022-01-07  0:33   ` [PATCH v7 03/14] vdpa: Sync calls set/get config/status with cf_mutex Si-Wei Liu
2022-01-07  5:08     ` Jason Wang
2022-01-08  1:23       ` Si-Wei Liu
2022-01-10  6:05         ` Jason Wang
2022-01-11  1:30           ` Si-Wei Liu
2022-01-11  4:46             ` Jason Wang
2022-01-11  6:26               ` Parav Pandit via Virtualization
2022-01-11  9:15                 ` Si-Wei Liu
2022-01-11  9:23               ` Si-Wei Liu
     [not found]     ` <20220109140956.GA70879@mtl-vdi-166.wap.labs.mlnx>
2022-01-11  1:14       ` Si-Wei Liu
     [not found] ` <20220105114646.577224-6-elic@nvidia.com>
2022-01-07  1:25   ` [PATCH v7 05/14] vdpa: Allow to configure max data virtqueues Si-Wei Liu
     [not found] ` <20220105114646.577224-8-elic@nvidia.com>
2022-01-07  1:27   ` [PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue Si-Wei Liu
2022-01-07  1:50     ` Si-Wei Liu
2022-01-07  5:43       ` Jason Wang
2022-01-08  1:38         ` Si-Wei Liu
     [not found]       ` <20220109141023.GB70879@mtl-vdi-166.wap.labs.mlnx>
2022-01-11  1:00         ` Si-Wei Liu
     [not found]           ` <20220111073416.GB149570@mtl-vdi-166.wap.labs.mlnx>
2022-01-11  8:28             ` Jason Wang
2022-01-11 12:05               ` Michael S. Tsirkin
2022-01-12  2:36                 ` Jason Wang
2022-01-11  8:52             ` Si-Wei Liu
     [not found]               ` <20220111152154.GA165838@mtl-vdi-166.wap.labs.mlnx>
2022-01-11 15:31                 ` Michael S. Tsirkin
2022-01-11 22:21                 ` Si-Wei Liu
2022-01-07 18:01     ` Nathan Chancellor
2022-01-08  1:43       ` Si-Wei Liu
2022-01-08  1:43         ` Si-Wei Liu
2022-01-10  6:53         ` Michael S. Tsirkin
2022-01-10  6:53           ` Michael S. Tsirkin
2022-01-10  6:58           ` Eli Cohen
     [not found] ` <20220105114646.577224-11-elic@nvidia.com>
2022-01-07  2:12   ` [PATCH v7 10/14] vdpa: Support reporting max device capabilities Si-Wei Liu
     [not found] ` <20220105114646.577224-12-elic@nvidia.com>
2022-01-07  2:12   ` [PATCH v7 11/14] vdpa/mlx5: Report " Si-Wei Liu
2022-01-07  5:49     ` Jason Wang
     [not found] ` <20220105114646.577224-5-elic@nvidia.com>
2022-01-07  5:14   ` [PATCH v7 04/14] vdpa: Read device configuration only if FEATURES_OK Jason Wang
     [not found] ` <20220105114646.577224-9-elic@nvidia.com>
2022-01-07  5:46   ` [PATCH v7 08/14] vdpa: Add support for returning device configuration information Jason Wang
     [not found] ` <20220105114646.577224-13-elic@nvidia.com>
2022-01-07  5:50   ` [PATCH v7 12/14] vdpa/vdpa_sim: Configure max supported virtqueues Jason Wang
     [not found] ` <20220105114646.577224-14-elic@nvidia.com>
2022-01-07  5:51   ` [PATCH v7 13/14] vdpa: Use BIT_ULL for bit operations Jason Wang
     [not found] ` <20220105114646.577224-15-elic@nvidia.com>
2022-01-07  5:51   ` [PATCH v7 14/14] vdpa/vdpa_sim_net: Report max device capabilities Jason Wang
2022-01-10  7:04 ` [PATCH v7 00/14] Allow for configuring max number of virtqueue pairs Michael S. Tsirkin
2022-01-10  7:09   ` Jason Wang
2022-01-11  1:59   ` Si-Wei Liu
     [not found]   ` <20220110074958.GA105688@mtl-vdi-166.wap.labs.mlnx>
2022-01-11  2:02     ` Si-Wei Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.