All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found] ` <20220216080022.56707-2-elic@nvidia.com>
@ 2022-02-16 18:49   ` Si-Wei Liu
       [not found]     ` <20220217064619.GB86497@mtl-vdi-166.wap.labs.mlnx>
  2022-03-03  7:53   ` Jason Wang
  2022-03-07 11:03   ` Parav Pandit via Virtualization
  2 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-02-16 18:49 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization, eperezma, amorenoz,
	lvivier, sgarzare, parav



On 2/16/2022 12:00 AM, Eli Cohen wrote:
> Allows to read vendor statistics of a vdpa device. The specific statistics
> data is received by the upstream driver in the form of an (attribute
> name, attribute value) pairs.
>
> An example of statistics for mlx5_vdpa device are:
>
> received_desc - number of descriptors received by the virtqueue
> completed_desc - number of descriptors completed by the virtqueue
>
> A descriptor using indirect buffers is still counted as 1. In addition,
> N chained descriptors are counted correctly N times as one would expect.
>
> A new callback was added to vdpa_config_ops which provides the means for
> the vdpa driver to return statistics results.
>
> The interface allows for reading all the supported virtqueues, including
> the control virtqueue if it exists.
>
> Below are some examples taken from mlx5_vdpa which are introduced in the
> following patch:
>
> 1. Read statistics for the virtqueue at index 1
>
> $ vdpa dev vstats show vdpa-a qidx 1
> vdpa-a:
> queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836
>
> 2. Read statistics for the virtqueue at index 32
> $ vdpa dev vstats show vdpa-a qidx 32
> vdpa-a:
> queue_type control_vq queue_index 32 received_desc 62 completed_desc 62
>
> 3. Read statisitics for the virtqueue at index 0 with json output
> $ vdpa -j dev vstats show vdpa-a qidx 0
> {"vstats":{"vdpa-a":{
> "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\
>   "name":"completed_desc","value":417548}}}
>
> 4. Read statistics for the virtqueue at index 0 with preety json output
> $ vdpa -jp dev vstats show vdpa-a qidx 0
> {
>      "vstats": {
>          "vdpa-a": {
>
>              "queue_type": "rx",
I wonder where this info can be inferred? I don't see relevant change in 
the patch series that helps gather the VDPA_ATTR_DEV_QUEUE_TYPE? Is this 
an arbitrary string defined by the vendor as well? If so, how does the 
user expect to consume it?

>              "queue_index": 0,
>              "name": "received_desc",
>              "value": 417776,
>              "name": "completed_desc",
>              "value": 417548
Not for this kernel patch, but IMHO it's the best to put the name & 
value pairs in an array instead of flat entries in json's 
hash/dictionary. The hash entries can be re-ordered deliberately by 
external json parsing tool, ending up with inconsistent stat values.

Thanks,
-Siwei
>          }
>      }
> }
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>   drivers/vdpa/vdpa.c       | 129 ++++++++++++++++++++++++++++++++++++++
>   include/linux/vdpa.h      |   5 ++
>   include/uapi/linux/vdpa.h |   7 +++
>   3 files changed, 141 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 9846c9de4bfa..d0ff671baf88 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
>   	return err;
>   }
>   
> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
> +			       struct genl_info *info, u32 index)
> +{
> +	int err;
> +
> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> +		return -EMSGSIZE;
> +
> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg,
> +			     struct genl_info *info, u32 index)
> +{
> +	int err;
> +
> +	if (!vdev->config->get_vendor_vq_stats)
> +		return -EOPNOTSUPP;
> +
> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
> +				      struct sk_buff *msg,
> +				      struct genl_info *info, u32 index)
> +{
> +	u32 device_id;
> +	void *hdr;
> +	int err;
> +	u32 portid = info->snd_portid;
> +	u32 seq = info->snd_seq;
> +	u32 flags = 0;
> +
> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> +			  VDPA_CMD_DEV_VSTATS_GET);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
> +		err = -EMSGSIZE;
> +		goto undo_msg;
> +	}
> +
> +	device_id = vdev->config->get_device_id(vdev);
> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> +		err = -EMSGSIZE;
> +		goto undo_msg;
> +	}
> +
> +	err = vendor_stats_fill(vdev, msg, info, index);
> +
> +	genlmsg_end(msg, hdr);
> +
> +	return err;
> +
> +undo_msg:
> +	genlmsg_cancel(msg, hdr);
> +	return err;
> +}
> +
>   static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
>   {
>   	struct vdpa_device *vdev;
> @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *
>   	return msg->len;
>   }
>   
> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> +					  struct genl_info *info)
> +{
> +	struct vdpa_device *vdev;
> +	struct sk_buff *msg;
> +	const char *devname;
> +	struct device *dev;
> +	u32 index;
> +	int err;
> +
> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
> +		return -EINVAL;
> +
> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> +		return -EINVAL;
> +
> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +
> +	index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> +	mutex_lock(&vdpa_dev_mutex);
> +	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
> +	if (!dev) {
> +		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
> +		err = -ENODEV;
> +		goto dev_err;
> +	}
> +	vdev = container_of(dev, struct vdpa_device, dev);
> +	if (!vdev->mdev) {
> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
> +		err = -EINVAL;
> +		goto mdev_err;
> +	}
> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> +	if (!err)
> +		err = genlmsg_reply(msg, info);
> +
> +	put_device(dev);
> +	mutex_unlock(&vdpa_dev_mutex);
> +
> +	if (err)
> +		nlmsg_free(msg);
> +
> +	return err;
> +
> +mdev_err:
> +	put_device(dev);
> +dev_err:
> +	mutex_unlock(&vdpa_dev_mutex);
> +	return err;
> +}
> +
>   static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>   	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
>   	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
> @@ -997,6 +1119,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>   	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
>   	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>   	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
> +	[VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0, 65535),
>   };
>   
>   static const struct genl_ops vdpa_nl_ops[] = {
> @@ -1030,6 +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>   		.doit = vdpa_nl_cmd_dev_config_get_doit,
>   		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>   	},
> +	{
> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
> +		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
> +		.flags = GENL_ADMIN_PERM,
> +	},
>   };
>   
>   static struct genl_family vdpa_nl_family __ro_after_init = {
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 2de442ececae..274203845cfc 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>   			    const struct vdpa_vq_state *state);
>   	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>   			    struct vdpa_vq_state *state);
> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
> +				   struct sk_buff *msg,
> +				   struct netlink_ext_ack *extack);
>   	struct vdpa_notification_area
>   	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>   	/* vq irq is not expected to be changed once DRIVER_OK is set */
> @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>   int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>   void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>   
> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> +
>   #endif /* _LINUX_VDPA_H */
> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
> index 1061d8d2d09d..c5f229a41dc2 100644
> --- a/include/uapi/linux/vdpa.h
> +++ b/include/uapi/linux/vdpa.h
> @@ -18,6 +18,7 @@ enum vdpa_command {
>   	VDPA_CMD_DEV_DEL,
>   	VDPA_CMD_DEV_GET,		/* can dump */
>   	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
> +	VDPA_CMD_DEV_VSTATS_GET,
>   };
>   
>   enum vdpa_attr {
> @@ -46,6 +47,12 @@ enum vdpa_attr {
>   	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>   	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
>   	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
> +
> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> +
>   	/* new attributes must be added above here */
>   	VDPA_ATTR_MAX,
>   };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found] ` <20220216080022.56707-2-elic@nvidia.com>
  2022-02-16 18:49   ` [PATCH v1 1/2] vdpa: Add support for querying vendor statistics Si-Wei Liu
@ 2022-03-03  7:53   ` Jason Wang
  2022-03-07 11:03   ` Parav Pandit via Virtualization
  2 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2022-03-03  7:53 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization, si-wei.liu, eperezma, amorenoz,
	lvivier, sgarzare, parav


在 2022/2/16 下午4:00, Eli Cohen 写道:
> Allows to read vendor statistics of a vdpa device. The specific statistics
> data is received by the upstream driver in the form of an (attribute
> name, attribute value) pairs.
>
> An example of statistics for mlx5_vdpa device are:
>
> received_desc - number of descriptors received by the virtqueue
> completed_desc - number of descriptors completed by the virtqueue
>
> A descriptor using indirect buffers is still counted as 1. In addition,
> N chained descriptors are counted correctly N times as one would expect.
>
> A new callback was added to vdpa_config_ops which provides the means for
> the vdpa driver to return statistics results.
>
> The interface allows for reading all the supported virtqueues, including
> the control virtqueue if it exists.
>
> Below are some examples taken from mlx5_vdpa which are introduced in the
> following patch:
>
> 1. Read statistics for the virtqueue at index 1
>
> $ vdpa dev vstats show vdpa-a qidx 1
> vdpa-a:
> queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836
>
> 2. Read statistics for the virtqueue at index 32
> $ vdpa dev vstats show vdpa-a qidx 32
> vdpa-a:
> queue_type control_vq queue_index 32 received_desc 62 completed_desc 62
>
> 3. Read statisitics for the virtqueue at index 0 with json output
> $ vdpa -j dev vstats show vdpa-a qidx 0
> {"vstats":{"vdpa-a":{
> "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\
>   "name":"completed_desc","value":417548}}}
>
> 4. Read statistics for the virtqueue at index 0 with preety json output
> $ vdpa -jp dev vstats show vdpa-a qidx 0
> {
>      "vstats": {
>          "vdpa-a": {
>
>              "queue_type": "rx",
>              "queue_index": 0,
>              "name": "received_desc",
>              "value": 417776,
>              "name": "completed_desc",
>              "value": 417548
>          }
>      }
> }
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> ---
>   drivers/vdpa/vdpa.c       | 129 ++++++++++++++++++++++++++++++++++++++
>   include/linux/vdpa.h      |   5 ++
>   include/uapi/linux/vdpa.h |   7 +++
>   3 files changed, 141 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 9846c9de4bfa..d0ff671baf88 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
>   	return err;
>   }
>   
> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
> +			       struct genl_info *info, u32 index)
> +{
> +	int err;
> +
> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> +		return -EMSGSIZE;
> +
> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg,
> +			     struct genl_info *info, u32 index)
> +{
> +	int err;
> +
> +	if (!vdev->config->get_vendor_vq_stats)
> +		return -EOPNOTSUPP;
> +
> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
> +				      struct sk_buff *msg,
> +				      struct genl_info *info, u32 index)
> +{
> +	u32 device_id;
> +	void *hdr;
> +	int err;
> +	u32 portid = info->snd_portid;
> +	u32 seq = info->snd_seq;
> +	u32 flags = 0;
> +
> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> +			  VDPA_CMD_DEV_VSTATS_GET);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
> +		err = -EMSGSIZE;
> +		goto undo_msg;
> +	}
> +
> +	device_id = vdev->config->get_device_id(vdev);
> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> +		err = -EMSGSIZE;
> +		goto undo_msg;
> +	}
> +
> +	err = vendor_stats_fill(vdev, msg, info, index);
> +
> +	genlmsg_end(msg, hdr);
> +
> +	return err;
> +
> +undo_msg:
> +	genlmsg_cancel(msg, hdr);
> +	return err;
> +}
> +
>   static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
>   {
>   	struct vdpa_device *vdev;
> @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *
>   	return msg->len;
>   }
>   
> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> +					  struct genl_info *info)
> +{
> +	struct vdpa_device *vdev;
> +	struct sk_buff *msg;
> +	const char *devname;
> +	struct device *dev;
> +	u32 index;
> +	int err;
> +
> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
> +		return -EINVAL;
> +
> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> +		return -EINVAL;
> +
> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +
> +	index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> +	mutex_lock(&vdpa_dev_mutex);
> +	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
> +	if (!dev) {
> +		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
> +		err = -ENODEV;
> +		goto dev_err;
> +	}
> +	vdev = container_of(dev, struct vdpa_device, dev);
> +	if (!vdev->mdev) {
> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
> +		err = -EINVAL;
> +		goto mdev_err;
> +	}
> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> +	if (!err)
> +		err = genlmsg_reply(msg, info);
> +
> +	put_device(dev);
> +	mutex_unlock(&vdpa_dev_mutex);
> +
> +	if (err)
> +		nlmsg_free(msg);
> +
> +	return err;
> +
> +mdev_err:
> +	put_device(dev);
> +dev_err:
> +	mutex_unlock(&vdpa_dev_mutex);
> +	return err;
> +}
> +
>   static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>   	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
>   	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
> @@ -997,6 +1119,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>   	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
>   	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>   	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
> +	[VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0, 65535),
>   };
>   
>   static const struct genl_ops vdpa_nl_ops[] = {
> @@ -1030,6 +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>   		.doit = vdpa_nl_cmd_dev_config_get_doit,
>   		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>   	},
> +	{
> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
> +		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
> +		.flags = GENL_ADMIN_PERM,
> +	},
>   };
>   
>   static struct genl_family vdpa_nl_family __ro_after_init = {
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 2de442ececae..274203845cfc 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>   			    const struct vdpa_vq_state *state);
>   	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>   			    struct vdpa_vq_state *state);
> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
> +				   struct sk_buff *msg,
> +				   struct netlink_ext_ack *extack);
>   	struct vdpa_notification_area
>   	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>   	/* vq irq is not expected to be changed once DRIVER_OK is set */
> @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>   int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>   void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>   
> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> +
>   #endif /* _LINUX_VDPA_H */
> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
> index 1061d8d2d09d..c5f229a41dc2 100644
> --- a/include/uapi/linux/vdpa.h
> +++ b/include/uapi/linux/vdpa.h
> @@ -18,6 +18,7 @@ enum vdpa_command {
>   	VDPA_CMD_DEV_DEL,
>   	VDPA_CMD_DEV_GET,		/* can dump */
>   	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
> +	VDPA_CMD_DEV_VSTATS_GET,
>   };
>   
>   enum vdpa_attr {
> @@ -46,6 +47,12 @@ enum vdpa_attr {
>   	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>   	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
>   	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
> +
> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */


This is unused.

Other looks good.

Thanks


> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> +
>   	/* new attributes must be added above here */
>   	VDPA_ATTR_MAX,
>   };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 2/2] vdpa/mlx5: Add support for reading descriptor statistics
       [not found] ` <20220216080022.56707-3-elic@nvidia.com>
@ 2022-03-03  7:55   ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2022-03-03  7:55 UTC (permalink / raw)
  To: Eli Cohen, mst, virtualization, si-wei.liu, eperezma, amorenoz,
	lvivier, sgarzare, parav


在 2022/2/16 下午4:00, Eli Cohen 写道:
> Implement the get_vq_stats calback of vdpa_config_ops to return the
> statistics for a virtqueue.
>
> The statistics are provided as vendor specific statistics where the
> driver provides a pair of attribute name and attribute value.
>
> Currently supported are received descriptors and completed descriptors.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>


Acked-by: Jason Wang <jasowang@redhat.com>


> ---
>   drivers/vdpa/mlx5/core/mlx5_vdpa.h |   2 +
>   drivers/vdpa/mlx5/net/mlx5_vnet.c  | 156 +++++++++++++++++++++++++++++
>   include/linux/mlx5/mlx5_ifc.h      |   1 +
>   include/linux/mlx5/mlx5_ifc_vdpa.h |  39 ++++++++
>   4 files changed, 198 insertions(+)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index daaf7b503677..44104093163b 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -61,6 +61,8 @@ struct mlx5_control_vq {
>   	struct vringh_kiov riov;
>   	struct vringh_kiov wiov;
>   	unsigned short head;
> +	unsigned int received_desc;
> +	unsigned int completed_desc;
>   };
>   
>   struct mlx5_vdpa_wq_ent {
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index b53603d94082..6156cf6e9377 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -119,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
>   	struct mlx5_vdpa_umem umem2;
>   	struct mlx5_vdpa_umem umem3;
>   
> +	u32 counter_set_id;
>   	bool initialized;
>   	int index;
>   	u32 virtq_id;
> @@ -163,6 +164,8 @@ struct mlx5_vdpa_net {
>   	u32 cur_num_vqs;
>   	struct notifier_block nb;
>   	struct vdpa_callback config_cb;
> +	/* sync access to virtqueues statistics */
> +	struct mutex numq_lock;
>   };
>   
>   static void free_resources(struct mlx5_vdpa_net *ndev);
> @@ -821,6 +824,12 @@ static u16 get_features_12_3(u64 features)
>   	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
>   }
>   
> +static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
> +{
> +	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
> +	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> +}
> +
>   static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>   {
>   	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> @@ -875,6 +884,8 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>   	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
>   	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
>   	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
> +	if (counters_supported(&ndev->mvdev))
> +		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
>   
>   	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
>   	if (err)
> @@ -1138,6 +1149,47 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
>   	return err;
>   }
>   
> +static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> +{
> +	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
> +	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
> +	void *cmd_hdr;
> +	int err;
> +
> +	if (!counters_supported(&ndev->mvdev))
> +		return 0;
> +
> +	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
> +
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
> +
> +	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
> +	if (err)
> +		return err;
> +
> +	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> +
> +	return 0;
> +}
> +
> +static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> +{
> +	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
> +	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
> +
> +	if (!counters_supported(&ndev->mvdev))
> +		return;
> +
> +	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
> +	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
> +	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
> +	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> +	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
> +		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
> +}
> +
>   static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>   {
>   	u16 idx = mvq->index;
> @@ -1165,6 +1217,10 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>   	if (err)
>   		goto err_connect;
>   
> +	err = counter_set_alloc(ndev, mvq);
> +	if (err)
> +		goto err_counter;
> +
>   	err = create_virtqueue(ndev, mvq);
>   	if (err)
>   		goto err_connect;
> @@ -1182,6 +1238,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>   	return 0;
>   
>   err_connect:
> +	counter_set_dealloc(ndev, mvq);
> +err_counter:
>   	qp_destroy(ndev, &mvq->vqqp);
>   err_vqqp:
>   	qp_destroy(ndev, &mvq->fwqp);
> @@ -1226,6 +1284,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
>   
>   	suspend_vq(ndev, mvq);
>   	destroy_virtqueue(ndev, mvq);
> +	counter_set_dealloc(ndev, mvq);
>   	qp_destroy(ndev, &mvq->vqqp);
>   	qp_destroy(ndev, &mvq->fwqp);
>   	cq_destroy(ndev, mvq->index);
> @@ -1573,8 +1632,10 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
>   			break;
>   		}
>   
> +		mutex_lock(&ndev->numq_lock);
>   		if (!change_num_qps(mvdev, newqps))
>   			status = VIRTIO_NET_OK;
> +		mutex_unlock(&ndev->numq_lock);
>   
>   		break;
>   	default:
> @@ -1615,6 +1676,7 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
>   		if (read != sizeof(ctrl))
>   			break;
>   
> +		cvq->received_desc++;
>   		switch (ctrl.class) {
>   		case VIRTIO_NET_CTRL_MAC:
>   			status = handle_ctrl_mac(mvdev, ctrl.cmd);
> @@ -1637,6 +1699,7 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
>   
>   		if (vringh_need_notify_iotlb(&cvq->vring))
>   			vringh_notify(&cvq->vring);
> +		cvq->completed_desc++;
>   	}
>   out:
>   	kfree(wqent);
> @@ -2238,6 +2301,8 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>   	mlx5_vdpa_destroy_mr(&ndev->mvdev);
>   	ndev->mvdev.status = 0;
>   	ndev->cur_num_vqs = 0;
> +	ndev->mvdev.cvq.received_desc = 0;
> +	ndev->mvdev.cvq.completed_desc = 0;
>   	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>   	ndev->mvdev.actual_features = 0;
>   	++mvdev->generation;
> @@ -2310,6 +2375,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
>   		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
>   	}
>   	mlx5_vdpa_free_resources(&ndev->mvdev);
> +	mutex_destroy(&ndev->numq_lock);
>   	mutex_destroy(&ndev->reslock);
>   	kfree(ndev->event_cbs);
>   	kfree(ndev->vqs);
> @@ -2351,6 +2417,93 @@ static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
>   	return mvdev->actual_features;
>   }
>   
> +static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
> +			     u64 *received_desc, u64 *completed_desc)
> +{
> +	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
> +	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
> +	void *cmd_hdr;
> +	void *ctx;
> +	int err;
> +
> +	if (!counters_supported(&ndev->mvdev))
> +		return -EOPNOTSUPP;
> +
> +	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
> +		return -EAGAIN;
> +
> +	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
> +
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
> +	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
> +
> +	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
> +	if (err)
> +		return err;
> +
> +	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
> +	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
> +	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
> +	return 0;
> +}
> +
> +static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> +					 struct sk_buff *msg,
> +					 struct netlink_ext_ack *extack)
> +{
> +	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> +	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +	struct mlx5_vdpa_virtqueue *mvq;
> +	struct mlx5_control_vq *cvq;
> +	u64 received_desc;
> +	u64 completed_desc;
> +	int err = 0;
> +
> +	mutex_lock(&ndev->numq_lock);
> +	if (!is_index_valid(mvdev, idx)) {
> +		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
> +		err = -EINVAL;
> +		goto out_err;
> +	}
> +
> +	if (idx == ctrl_vq_idx(mvdev)) {
> +		cvq = &mvdev->cvq;
> +		received_desc = cvq->received_desc;
> +		completed_desc = cvq->completed_desc;
> +		goto out;
> +	}
> +
> +	mvq = &ndev->vqs[idx];
> +	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
> +		goto out_err;
> +	}
> +
> +out:
> +	err = -EMSGSIZE;
> +	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
> +		goto out_err;
> +
> +	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
> +			      VDPA_ATTR_PAD))
> +		goto out_err;
> +
> +	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
> +		goto out_err;
> +
> +	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
> +			      VDPA_ATTR_PAD))
> +		goto out_err;
> +
> +	err = 0;
> +out_err:
> +	mutex_unlock(&ndev->numq_lock);
> +	return err;
> +}
> +
>   static const struct vdpa_config_ops mlx5_vdpa_ops = {
>   	.set_vq_address = mlx5_vdpa_set_vq_address,
>   	.set_vq_num = mlx5_vdpa_set_vq_num,
> @@ -2360,6 +2513,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
>   	.get_vq_ready = mlx5_vdpa_get_vq_ready,
>   	.set_vq_state = mlx5_vdpa_set_vq_state,
>   	.get_vq_state = mlx5_vdpa_get_vq_state,
> +	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
>   	.get_vq_notification = mlx5_get_vq_notification,
>   	.get_vq_irq = mlx5_get_vq_irq,
>   	.get_vq_align = mlx5_vdpa_get_vq_align,
> @@ -2593,6 +2747,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   
>   	init_mvqs(ndev);
>   	mutex_init(&ndev->reslock);
> +	mutex_init(&ndev->numq_lock);
>   	config = &ndev->config;
>   	err = query_mtu(mdev, &mtu);
>   	if (err)
> @@ -2666,6 +2821,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   	if (!is_zero_ether_addr(config->mac))
>   		mlx5_mpfs_del_mac(pfmdev, config->mac);
>   err_mtu:
> +	mutex_destroy(&ndev->numq_lock);
>   	mutex_destroy(&ndev->reslock);
>   err_alloc:
>   	put_device(&mvdev->vdev.dev);
> diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
> index fbaab440a484..15f50becfcfb 100644
> --- a/include/linux/mlx5/mlx5_ifc.h
> +++ b/include/linux/mlx5/mlx5_ifc.h
> @@ -94,6 +94,7 @@ enum {
>   enum {
>   	MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b,
>   	MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d,
> +	MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c,
>   	MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018,
>   	MLX5_OBJ_TYPE_MKEY = 0xff01,
>   	MLX5_OBJ_TYPE_QP = 0xff02,
> diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> index 1a9c9d94cb59..4414ed5b6ed2 100644
> --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> @@ -165,4 +165,43 @@ struct mlx5_ifc_modify_virtio_net_q_out_bits {
>   	struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
>   };
>   
> +struct mlx5_ifc_virtio_q_counters_bits {
> +	u8    modify_field_select[0x40];
> +	u8    reserved_at_40[0x40];
> +	u8    received_desc[0x40];
> +	u8    completed_desc[0x40];
> +	u8    error_cqes[0x20];
> +	u8    bad_desc_errors[0x20];
> +	u8    exceed_max_chain[0x20];
> +	u8    invalid_buffer[0x20];
> +	u8    reserved_at_180[0x280];
> +};
> +
> +struct mlx5_ifc_create_virtio_q_counters_in_bits {
> +	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
> +	struct mlx5_ifc_virtio_q_counters_bits virtio_q_counters;
> +};
> +
> +struct mlx5_ifc_create_virtio_q_counters_out_bits {
> +	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
> +	struct mlx5_ifc_virtio_q_counters_bits virtio_q_counters;
> +};
> +
> +struct mlx5_ifc_destroy_virtio_q_counters_in_bits {
> +	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
> +};
> +
> +struct mlx5_ifc_destroy_virtio_q_counters_out_bits {
> +	struct mlx5_ifc_general_obj_out_cmd_hdr_bits hdr;
> +};
> +
> +struct mlx5_ifc_query_virtio_q_counters_in_bits {
> +	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
> +};
> +
> +struct mlx5_ifc_query_virtio_q_counters_out_bits {
> +	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
> +	struct mlx5_ifc_virtio_q_counters_bits counters;
> +};
> +
>   #endif /* __MLX5_IFC_VDPA_H_ */

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 0/2] Show statistics for a vdpa device
       [not found] <20220216080022.56707-1-elic@nvidia.com>
       [not found] ` <20220216080022.56707-3-elic@nvidia.com>
@ 2022-03-04 16:06 ` Michael S. Tsirkin
       [not found] ` <20220216080022.56707-2-elic@nvidia.com>
  2 siblings, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2022-03-04 16:06 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, virtualization, eperezma, si-wei.liu

On Wed, Feb 16, 2022 at 10:00:20AM +0200, Eli Cohen wrote:
> The following two patch series adds support to read vendor statistics
> for a vdpa device.
> 
> The first patch lays the ground to allow an upstream driver to provide
> statistics in the form of an attribute name/attribute value pairs.
> 
> The second patch implements this for mlx5_vdpa which gives received
> descriptors and completed descriptors information for all the
> virtqueues. 
> 
> V0 -> V1:
> 1. Function name changes to emphasize the fact that this is for vendor
> statistics.
> 2. Increase the size of VDPA_ATTR_DEV_QUEUE_INDEX to U32 so it can
> handle the entire range of virtqueue indices. 
> 3. Change output string names to avoid abbreviations.

Jason had a minor comment. Were you goint to address it?

> Eli Cohen (2):
>   vdpa: Add support for querying vendor statistics
>   vdpa/mlx5: Add support for reading descriptor statistics
> 
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |   2 +
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 156 +++++++++++++++++++++++++++++
>  drivers/vdpa/vdpa.c                | 129 ++++++++++++++++++++++++
>  include/linux/mlx5/mlx5_ifc.h      |   1 +
>  include/linux/mlx5/mlx5_ifc_vdpa.h |  39 ++++++++
>  include/linux/vdpa.h               |   5 +
>  include/uapi/linux/vdpa.h          |   7 ++
>  7 files changed, 339 insertions(+)
> 
> -- 
> 2.34.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]     ` <20220217064619.GB86497@mtl-vdi-166.wap.labs.mlnx>
@ 2022-03-04 22:34       ` Si-Wei Liu
       [not found]         ` <DM8PR12MB5400E80073521E898056578BAB089@DM8PR12MB5400.namprd12.prod.outlook.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-04 22:34 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma

Sorry, I somehow missed this after my break. Please see comments in line.

On 2/16/2022 10:46 PM, Eli Cohen wrote:
> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
>>
>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
>>> Allows to read vendor statistics of a vdpa device. The specific statistics
>>> data is received by the upstream driver in the form of an (attribute
>>> name, attribute value) pairs.
>>>
>>> An example of statistics for mlx5_vdpa device are:
>>>
>>> received_desc - number of descriptors received by the virtqueue
>>> completed_desc - number of descriptors completed by the virtqueue
>>>
>>> A descriptor using indirect buffers is still counted as 1. In addition,
>>> N chained descriptors are counted correctly N times as one would expect.
>>>
>>> A new callback was added to vdpa_config_ops which provides the means for
>>> the vdpa driver to return statistics results.
>>>
>>> The interface allows for reading all the supported virtqueues, including
>>> the control virtqueue if it exists.
>>>
>>> Below are some examples taken from mlx5_vdpa which are introduced in the
>>> following patch:
>>>
>>> 1. Read statistics for the virtqueue at index 1
>>>
>>> $ vdpa dev vstats show vdpa-a qidx 1
>>> vdpa-a:
>>> queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836
>>>
>>> 2. Read statistics for the virtqueue at index 32
>>> $ vdpa dev vstats show vdpa-a qidx 32
>>> vdpa-a:
>>> queue_type control_vq queue_index 32 received_desc 62 completed_desc 62
>>>
>>> 3. Read statisitics for the virtqueue at index 0 with json output
>>> $ vdpa -j dev vstats show vdpa-a qidx 0
>>> {"vstats":{"vdpa-a":{
>>> "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\
>>>    "name":"completed_desc","value":417548}}}
>>>
>>> 4. Read statistics for the virtqueue at index 0 with preety json output
>>> $ vdpa -jp dev vstats show vdpa-a qidx 0
>>> {
>>>       "vstats": {
>>>           "vdpa-a": {
>>>
>>>               "queue_type": "rx",
>> I wonder where this info can be inferred? I don't see relevant change in the
>> patch series that helps gather the VDPA_ATTR_DEV_QUEUE_TYPE? Is this an
>> arbitrary string defined by the vendor as well? If so, how does the user
>> expect to consume it?
> The queue tupe is deduced from the index and whether we have a
> virtqueue. Even numbers are rx, odd numbers are tx and if there is CVQ,
> the last one is CVQ.
OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this patch 
might not be useful at all? And how do you determine in the vdpa tool if 
CVQ is negotiated or not? Looks to me there are still some loose end I 
don't quite yet understand.


>
>>>               "queue_index": 0,
>>>               "name": "received_desc",
>>>               "value": 417776,
>>>               "name": "completed_desc",
>>>               "value": 417548
>> Not for this kernel patch, but IMHO it's the best to put the name & value
>> pairs in an array instead of flat entries in json's hash/dictionary. The
>> hash entries can be re-ordered deliberately by external json parsing tool,
>> ending up with inconsistent stat values.
This comment is missed for some reason. Please change the example in the 
log if you agree to address it in vdpa tool. Or justify why keeping the 
order for json hash/dictionary is fine.

Thanks,
-Siwei

>>
>> Thanks,
>> -Siwei
>>>           }
>>>       }
>>> }
>>>
>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>> ---
>>>    drivers/vdpa/vdpa.c       | 129 ++++++++++++++++++++++++++++++++++++++
>>>    include/linux/vdpa.h      |   5 ++
>>>    include/uapi/linux/vdpa.h |   7 +++
>>>    3 files changed, 141 insertions(+)
>>>
>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>> index 9846c9de4bfa..d0ff671baf88 100644
>>> --- a/drivers/vdpa/vdpa.c
>>> +++ b/drivers/vdpa/vdpa.c
>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
>>>    	return err;
>>>    }
>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
>>> +			       struct genl_info *info, u32 index)
>>> +{
>>> +	int err;
>>> +
>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>> +		return -EMSGSIZE;
>>> +
>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack);
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg,
>>> +			     struct genl_info *info, u32 index)
>>> +{
>>> +	int err;
>>> +
>>> +	if (!vdev->config->get_vendor_vq_stats)
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>> +				      struct sk_buff *msg,
>>> +				      struct genl_info *info, u32 index)
>>> +{
>>> +	u32 device_id;
>>> +	void *hdr;
>>> +	int err;
>>> +	u32 portid = info->snd_portid;
>>> +	u32 seq = info->snd_seq;
>>> +	u32 flags = 0;
>>> +
>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>> +	if (!hdr)
>>> +		return -EMSGSIZE;
>>> +
>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
>>> +		err = -EMSGSIZE;
>>> +		goto undo_msg;
>>> +	}
>>> +
>>> +	device_id = vdev->config->get_device_id(vdev);
>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>> +		err = -EMSGSIZE;
>>> +		goto undo_msg;
>>> +	}
>>> +
>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>> +
>>> +	genlmsg_end(msg, hdr);
>>> +
>>> +	return err;
>>> +
>>> +undo_msg:
>>> +	genlmsg_cancel(msg, hdr);
>>> +	return err;
>>> +}
>>> +
>>>    static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
>>>    {
>>>    	struct vdpa_device *vdev;
>>> @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *
>>>    	return msg->len;
>>>    }
>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>> +					  struct genl_info *info)
>>> +{
>>> +	struct vdpa_device *vdev;
>>> +	struct sk_buff *msg;
>>> +	const char *devname;
>>> +	struct device *dev;
>>> +	u32 index;
>>> +	int err;
>>> +
>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>> +		return -EINVAL;
>>> +
>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>> +		return -EINVAL;
>>> +
>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>> +	if (!msg)
>>> +		return -ENOMEM;
>>> +
>>> +	index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>> +	mutex_lock(&vdpa_dev_mutex);
>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
>>> +	if (!dev) {
>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
>>> +		err = -ENODEV;
>>> +		goto dev_err;
>>> +	}
>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>> +	if (!vdev->mdev) {
>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
>>> +		err = -EINVAL;
>>> +		goto mdev_err;
>>> +	}
>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>> +	if (!err)
>>> +		err = genlmsg_reply(msg, info);
>>> +
>>> +	put_device(dev);
>>> +	mutex_unlock(&vdpa_dev_mutex);
>>> +
>>> +	if (err)
>>> +		nlmsg_free(msg);
>>> +
>>> +	return err;
>>> +
>>> +mdev_err:
>>> +	put_device(dev);
>>> +dev_err:
>>> +	mutex_unlock(&vdpa_dev_mutex);
>>> +	return err;
>>> +}
>>> +
>>>    static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>    	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
>>>    	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
>>> @@ -997,6 +1119,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>    	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
>>>    	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>    	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0, 65535),
>>>    };
>>>    static const struct genl_ops vdpa_nl_ops[] = {
>>> @@ -1030,6 +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>    		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>    		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>    	},
>>> +	{
>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>> +		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>> +		.flags = GENL_ADMIN_PERM,
>>> +	},
>>>    };
>>>    static struct genl_family vdpa_nl_family __ro_after_init = {
>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>> index 2de442ececae..274203845cfc 100644
>>> --- a/include/linux/vdpa.h
>>> +++ b/include/linux/vdpa.h
>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>    			    const struct vdpa_vq_state *state);
>>>    	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>    			    struct vdpa_vq_state *state);
>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>> +				   struct sk_buff *msg,
>>> +				   struct netlink_ext_ack *extack);
>>>    	struct vdpa_notification_area
>>>    	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>    	/* vq irq is not expected to be changed once DRIVER_OK is set */
>>> @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>    int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>    void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>> +
>>>    #endif /* _LINUX_VDPA_H */
>>> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
>>> index 1061d8d2d09d..c5f229a41dc2 100644
>>> --- a/include/uapi/linux/vdpa.h
>>> +++ b/include/uapi/linux/vdpa.h
>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>    	VDPA_CMD_DEV_DEL,
>>>    	VDPA_CMD_DEV_GET,		/* can dump */
>>>    	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>    };
>>>    enum vdpa_attr {
>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>    	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>    	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
>>>    	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>> +
>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>> +
>>>    	/* new attributes must be added above here */
>>>    	VDPA_ATTR_MAX,
>>>    };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found] ` <20220216080022.56707-2-elic@nvidia.com>
  2022-02-16 18:49   ` [PATCH v1 1/2] vdpa: Add support for querying vendor statistics Si-Wei Liu
  2022-03-03  7:53   ` Jason Wang
@ 2022-03-07 11:03   ` Parav Pandit via Virtualization
  2 siblings, 0 replies; 20+ messages in thread
From: Parav Pandit via Virtualization @ 2022-03-07 11:03 UTC (permalink / raw)
  To: Eli Cohen, mst, jasowang, virtualization, si-wei.liu, eperezma,
	amorenoz, lvivier, sgarzare

Hi Eli,

Sorry for my so delayed response.
Please see below. 

> From: Eli Cohen <elic@nvidia.com>
> Sent: Wednesday, February 16, 2022 1:30 PM
> 
> Allows to read vendor statistics of a vdpa device. The specific statistics data is
> received by the upstream driver in the form of an (attribute name, attribute
> value) pairs.
> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
> +			       struct genl_info *info, u32 index) {
> +	int err;
> +
> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> +		return -EMSGSIZE;
> +
> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info-
> >extack);

There is implicit assumption about placement of netlink attributes, like name1, value1, name2, value2 etc.
Things can break here with such message framing in in place in the future.

Netlink has more generic way for addressing and avoiding above strict placement of fields etc.

A more elegant way in kernel is to use nested and self-describing attributes.
We also want to use standard netlink infra built in iproute2 to parse in generic way.

So please change it do like below.
A bit long response, as it contains a pseudo code example.
(ignored all error checks to keep code short)

overview:
---------------
Each vendor stats entry is a nested entry.
This nested entry contains: 
a. stat name ("rx_desc", "cmpl_desc" etc)
b. value of this variable as u64

All of these individual stats entry are put under a new vstats nested entry.
This enables us to parse and reuse existing netlink interface for nested list of entries.
Such as iproute2 mnl_attr_for_each_nested() API.

pseudo code:
------------------
enum {
	[...]
	VDPA_ATTR_VSTAT_LIST, /* nested, indicating list of vstat entries */
	VDPA_ATTR_VSTAT_ENTRY, /* nested, indiating each vstat entry is self-contained */
	VDPA_ATTR_VSTATS_ENTRY_NAME, /* string of the entry */
	VDPA_ATTRS_VSTATS_ENTRY_DATA, /* u64 value of the entry */
	MAX,
}

/**
 * vdpa_vstats_entry_fill - This is an API expose to vendor driver to fill the vendor specific stats
 * A vendor driver should call this in a loop for all the valid vendor statistics entry for the specified queue.
 * A vendor driver should call this API in the get_vendor_vq_stats() callback.
 */
int vdpa_vstats_entry_fill(struct sk_buff *msg, const struct vdpa_vstat_entry *entry, u32 q_index)
{
	/* created a nested attribute in a msg  for this entry */
	vstats_nl_entry = nla_nest_start_noflag(msg, VDPA_ATTR_VSTAT_ENTRY);

	/* now fill value of name + its value in it.
	nla_put_string(msg, VDPA_ATTR_VSTATS_NAME, "string1);
	nla_put_u64_64bit(msg, entry->val.u64);

	/* end this entry nested attribute */
	nla_nest_end(msg, vstats_nl_entry);
	return 0;
}
EXPORT_SYMBOL(vdev_vstats_entry_fill);

static int vdpa_vstat_fill(struct sk_buff *msg,
			   const struct vdpa_vstat_entry *vstats, u32 q_index)
{
	int i = 0;
	int ret;

	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_VSTATS_GET);

	/* put the device name also, so that same routine can work 
	  * for the dumpit too in future for all the queues
	  */
	nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev)));

	nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index);

	/* start list type to indicate that we will have list of nested */
	vstats = nla_nest_start_noflag(msg, VDPA_ATTR_VSTAT_LIST);
	
	ret = vdev->config->get_vendor_vq_stats(msg, vdev, q_idx);
	nla_nest_end(msg, vstats);

	genlmsg_end(msg, hdr);
}

iproute2 to leverage mnl_attr_for_each_nested(), like below.

vstats_show(...)
{
	mnl_attr_for_each_nested(cur_attr, nla_param[VDPA_ATTR_VSTAT_LIST]) {
		vdpa_vstat_entry_parse(nl);
}

static vdpa_vstat_entry_parse(struct nlattr *nl)
{
	mnl_attr_parse_nested(nl, cb, nla_value);
	/* get the value of each entry placed by driver */
}

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]         ` <DM8PR12MB5400E80073521E898056578BAB089@DM8PR12MB5400.namprd12.prod.outlook.com>
@ 2022-03-08  6:15           ` Si-Wei Liu
       [not found]             ` <DM8PR12MB5400E03D7AD7833CEBF8DF9DAB099@DM8PR12MB5400.namprd12.prod.outlook.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-08  6:15 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 3/6/2022 11:57 PM, Eli Cohen wrote:
>
>> -----Original Message-----
>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>> Sent: Saturday, March 5, 2022 12:34 AM
>> To: Eli Cohen <elic@nvidia.com>
>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>
>> Sorry, I somehow missed this after my break. Please see comments in line.
>>
>> On 2/16/2022 10:46 PM, Eli Cohen wrote:
>>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
>>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
>>>>> Allows to read vendor statistics of a vdpa device. The specific
>>>>> statistics data is received by the upstream driver in the form of an
>>>>> (attribute name, attribute value) pairs.
>>>>>
>>>>> An example of statistics for mlx5_vdpa device are:
>>>>>
>>>>> received_desc - number of descriptors received by the virtqueue
>>>>> completed_desc - number of descriptors completed by the virtqueue
>>>>>
>>>>> A descriptor using indirect buffers is still counted as 1. In
>>>>> addition, N chained descriptors are counted correctly N times as one
>> would expect.
>>>>> A new callback was added to vdpa_config_ops which provides the means
>>>>> for the vdpa driver to return statistics results.
>>>>>
>>>>> The interface allows for reading all the supported virtqueues,
>>>>> including the control virtqueue if it exists.
>>>>>
>>>>> Below are some examples taken from mlx5_vdpa which are introduced in
>>>>> the following patch:
>>>>>
>>>>> 1. Read statistics for the virtqueue at index 1
>>>>>
>>>>> $ vdpa dev vstats show vdpa-a qidx 1
>>>>> vdpa-a:
>>>>> queue_type tx queue_index 1 received_desc 3844836 completed_desc
>>>>> 3844836
>>>>>
>>>>> 2. Read statistics for the virtqueue at index 32 $ vdpa dev vstats
>>>>> show vdpa-a qidx 32
>>>>> vdpa-a:
>>>>> queue_type control_vq queue_index 32 received_desc 62 completed_desc
>>>>> 62
>>>>>
>>>>> 3. Read statisitics for the virtqueue at index 0 with json output $
>>>>> vdpa -j dev vstats show vdpa-a qidx 0 {"vstats":{"vdpa-a":{
>>>>>
>> "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\
>>>>>     "name":"completed_desc","value":417548}}}
>>>>>
>>>>> 4. Read statistics for the virtqueue at index 0 with preety json
>>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>        "vstats": {
>>>>>            "vdpa-a": {
>>>>>
>>>>>                "queue_type": "rx",
>>>> I wonder where this info can be inferred? I don't see relevant change
>>>> in the patch series that helps gather the VDPA_ATTR_DEV_QUEUE_TYPE?
>>>> Is this an arbitrary string defined by the vendor as well? If so, how
>>>> does the user expect to consume it?
>>> The queue tupe is deduced from the index and whether we have a
>>> virtqueue. Even numbers are rx, odd numbers are tx and if there is
>>> CVQ, the last one is CVQ.
>> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this patch
>> might not be useful at all?
> Right, will remove.
>
>> And how do you determine in the vdpa tool if CVQ
>> is negotiated or not?
> I make a netlink call to get the same information as " vdpa dev config show" retrieves. I use the negotiated features to determine if a CVQ is available. If it is, the number of VQs equals the control VQ index. So there are two netlink calls under the hood.
The lock vdpa_dev_mutex won't hold across the two separate netlink 
calls, and it may end up with inconsistent state - theoretically things 
could happen like that the first call gets CVQ negotiated, but the later 
call for get_vendor_vq_stats() on the cvq might get -EINVAL due to 
device reset. Can the negotiated status and stat query be done within 
one single netlink call?

What worried me is that the queue index being dynamic and depended on 
negotiation status would make host admin user quite hard to follow. The 
guest may or may not advertise F_MQ and/or F_CTRL_VQ across various 
phases, e.g. firmware (UEFI), boot loader (grub) till OS driver is up 
and running, which can be agnostic to host admin. For most of the part 
it's not easy to script and predict the queue index which can change 
from time to time. Can we define the order of host predictable queue 
index, which is independent from any guest negotiated state?

>
>> Looks to me there are still some loose end I don't quite
>> yet understand.
>>
>>
>>>>>                "queue_index": 0,
> I think this can be removed since the command is for a specific index.
>
>>>>>                "name": "received_desc",
>>>>>                "value": 417776,
>>>>>                "name": "completed_desc",
>>>>>                "value": 417548
>>>> Not for this kernel patch, but IMHO it's the best to put the name &
>>>> value pairs in an array instead of flat entries in json's
>>>> hash/dictionary. The hash entries can be re-ordered deliberately by
>>>> external json parsing tool, ending up with inconsistent stat values.
>> This comment is missed for some reason. Please change the example in the log
>> if you agree to address it in vdpa tool. Or justify why keeping the order for json
>> hash/dictionary is fine.
> Sorry for skipping this comment.
> Do you mean to present the information like:
> "received_desc": 417776,
> "completed_desc": 417548,
I mean the following presentation:

$ vdpa -jp dev vstats show vdpa-a qidx 0
{
     "vstats": {
         "vdpa-a": {
             "queue_stats": [{
                 "queue_index": 0,
                 "queue_type": "rx",
                 "stat_name": [ "received_desc","completed_desc" ],
                 "stat_value": [ 417776,417548 ],
             }]
         }
     }
}

I think Parav had similar suggestion, too.

Thanks,
-Siwei

>
>> Thanks,
>> -Siwei
>>
>>>> Thanks,
>>>> -Siwei
>>>>>            }
>>>>>        }
>>>>> }
>>>>>
>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>> ---
>>>>>     drivers/vdpa/vdpa.c       | 129
>> ++++++++++++++++++++++++++++++++++++++
>>>>>     include/linux/vdpa.h      |   5 ++
>>>>>     include/uapi/linux/vdpa.h |   7 +++
>>>>>     3 files changed, 141 insertions(+)
>>>>>
>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>> --- a/drivers/vdpa/vdpa.c
>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device *vdev,
>> struct sk_buff *msg, u32 portid,
>>>>>     	return err;
>>>>>     }
>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff
>> *msg,
>>>>> +			       struct genl_info *info, u32 index) {
>>>>> +	int err;
>>>>> +
>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>> +		return -EMSGSIZE;
>>>>> +
>>>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info-
>>> extack);
>>>>> +	if (err)
>>>>> +		return err;
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg,
>>>>> +			     struct genl_info *info, u32 index) {
>>>>> +	int err;
>>>>> +
>>>>> +	if (!vdev->config->get_vendor_vq_stats)
>>>>> +		return -EOPNOTSUPP;
>>>>> +
>>>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>> +	if (err)
>>>>> +		return err;
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>> +				      struct sk_buff *msg,
>>>>> +				      struct genl_info *info, u32 index) {
>>>>> +	u32 device_id;
>>>>> +	void *hdr;
>>>>> +	int err;
>>>>> +	u32 portid = info->snd_portid;
>>>>> +	u32 seq = info->snd_seq;
>>>>> +	u32 flags = 0;
>>>>> +
>>>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>>>> +	if (!hdr)
>>>>> +		return -EMSGSIZE;
>>>>> +
>>>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev-
>>> dev))) {
>>>>> +		err = -EMSGSIZE;
>>>>> +		goto undo_msg;
>>>>> +	}
>>>>> +
>>>>> +	device_id = vdev->config->get_device_id(vdev);
>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>> +		err = -EMSGSIZE;
>>>>> +		goto undo_msg;
>>>>> +	}
>>>>> +
>>>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>>>> +
>>>>> +	genlmsg_end(msg, hdr);
>>>>> +
>>>>> +	return err;
>>>>> +
>>>>> +undo_msg:
>>>>> +	genlmsg_cancel(msg, hdr);
>>>>> +	return err;
>>>>> +}
>>>>> +
>>>>>     static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct
>> genl_info *info)
>>>>>     {
>>>>>     	struct vdpa_device *vdev;
>>>>> @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct
>> sk_buff *msg, struct netlink_callback *
>>>>>     	return msg->len;
>>>>>     }
>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>> +					  struct genl_info *info)
>>>>> +{
>>>>> +	struct vdpa_device *vdev;
>>>>> +	struct sk_buff *msg;
>>>>> +	const char *devname;
>>>>> +	struct device *dev;
>>>>> +	u32 index;
>>>>> +	int err;
>>>>> +
>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>> +	if (!msg)
>>>>> +		return -ENOMEM;
>>>>> +
>>>>> +	index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>> +	mutex_lock(&vdpa_dev_mutex);
>>>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname,
>> vdpa_name_match);
>>>>> +	if (!dev) {
>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
>>>>> +		err = -ENODEV;
>>>>> +		goto dev_err;
>>>>> +	}
>>>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>>>> +	if (!vdev->mdev) {
>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa
>> device");
>>>>> +		err = -EINVAL;
>>>>> +		goto mdev_err;
>>>>> +	}
>>>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>> +	if (!err)
>>>>> +		err = genlmsg_reply(msg, info);
>>>>> +
>>>>> +	put_device(dev);
>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>> +
>>>>> +	if (err)
>>>>> +		nlmsg_free(msg);
>>>>> +
>>>>> +	return err;
>>>>> +
>>>>> +mdev_err:
>>>>> +	put_device(dev);
>>>>> +dev_err:
>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>> +	return err;
>>>>> +}
>>>>> +
>>>>>     static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>     	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
>>>>>     	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, @@ -
>> 997,6
>>>>> +1119,7 @@ static const struct nla_policy
>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>     	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
>>>>>     	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>     	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
>>>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0,
>> 65535),
>>>>>     };
>>>>>     static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>     		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>     		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>     	},
>>>>> +	{
>>>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>> +		.validate = GENL_DONT_VALIDATE_STRICT |
>> GENL_DONT_VALIDATE_DUMP,
>>>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>> +		.flags = GENL_ADMIN_PERM,
>>>>> +	},
>>>>>     };
>>>>>     static struct genl_family vdpa_nl_family __ro_after_init = { diff
>>>>> --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>> 2de442ececae..274203845cfc 100644
>>>>> --- a/include/linux/vdpa.h
>>>>> +++ b/include/linux/vdpa.h
>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>     			    const struct vdpa_vq_state *state);
>>>>>     	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>     			    struct vdpa_vq_state *state);
>>>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>> +				   struct sk_buff *msg,
>>>>> +				   struct netlink_ext_ack *extack);
>>>>>     	struct vdpa_notification_area
>>>>>     	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>     	/* vq irq is not expected to be changed once DRIVER_OK is set */
>>>>> @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>     int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>     void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>> +
>>>>>     #endif /* _LINUX_VDPA_H */
>>>>> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
>>>>> index 1061d8d2d09d..c5f229a41dc2 100644
>>>>> --- a/include/uapi/linux/vdpa.h
>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>     	VDPA_CMD_DEV_DEL,
>>>>>     	VDPA_CMD_DEV_GET,		/* can dump */
>>>>>     	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>>>     };
>>>>>     enum vdpa_attr {
>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>     	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>>>     	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
>>>>>     	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>>>> +
>>>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>> +
>>>>>     	/* new attributes must be added above here */
>>>>>     	VDPA_ATTR_MAX,
>>>>>     };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]             ` <DM8PR12MB5400E03D7AD7833CEBF8DF9DAB099@DM8PR12MB5400.namprd12.prod.outlook.com>
@ 2022-03-09  2:39               ` Jason Wang
  2022-03-09  3:32               ` Si-Wei Liu
  1 sibling, 0 replies; 20+ messages in thread
From: Jason Wang @ 2022-03-09  2:39 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma, Si-Wei Liu


[-- Attachment #1.1: Type: text/plain, Size: 17415 bytes --]

On Tue, Mar 8, 2022 at 10:13 PM Eli Cohen <elic@nvidia.com> wrote:

>
>
> > -----Original Message-----
> > From: Si-Wei Liu <si-wei.liu@oracle.com>
> > Sent: Tuesday, March 8, 2022 8:16 AM
> > To: Eli Cohen <elic@nvidia.com>
> > Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
> > foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> > lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
> > Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
> statistics
> >
> >
> >
> > On 3/6/2022 11:57 PM, Eli Cohen wrote:
> > >
> > >> -----Original Message-----
> > >> From: Si-Wei Liu <si-wei.liu@oracle.com>
> > >> Sent: Saturday, March 5, 2022 12:34 AM
> > >> To: Eli Cohen <elic@nvidia.com>
> > >> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
> > >> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> > >> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
> > >> <parav@nvidia.com>
> > >> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
> > >> statistics
> > >>
> > >> Sorry, I somehow missed this after my break. Please see comments in
> line.
> > >>
> > >> On 2/16/2022 10:46 PM, Eli Cohen wrote:
> > >>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
> > >>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
> > >>>>> Allows to read vendor statistics of a vdpa device. The specific
> > >>>>> statistics data is received by the upstream driver in the form of
> > >>>>> an (attribute name, attribute value) pairs.
> > >>>>>
> > >>>>> An example of statistics for mlx5_vdpa device are:
> > >>>>>
> > >>>>> received_desc - number of descriptors received by the virtqueue
> > >>>>> completed_desc - number of descriptors completed by the virtqueue
> > >>>>>
> > >>>>> A descriptor using indirect buffers is still counted as 1. In
> > >>>>> addition, N chained descriptors are counted correctly N times as
> > >>>>> one
> > >> would expect.
> > >>>>> A new callback was added to vdpa_config_ops which provides the
> > >>>>> means for the vdpa driver to return statistics results.
> > >>>>>
> > >>>>> The interface allows for reading all the supported virtqueues,
> > >>>>> including the control virtqueue if it exists.
> > >>>>>
> > >>>>> Below are some examples taken from mlx5_vdpa which are introduced
> > >>>>> in the following patch:
> > >>>>>
> > >>>>> 1. Read statistics for the virtqueue at index 1
> > >>>>>
> > >>>>> $ vdpa dev vstats show vdpa-a qidx 1
> > >>>>> vdpa-a:
> > >>>>> queue_type tx queue_index 1 received_desc 3844836 completed_desc
> > >>>>> 3844836
> > >>>>>
> > >>>>> 2. Read statistics for the virtqueue at index 32 $ vdpa dev vstats
> > >>>>> show vdpa-a qidx 32
> > >>>>> vdpa-a:
> > >>>>> queue_type control_vq queue_index 32 received_desc 62
> > >>>>> completed_desc
> > >>>>> 62
> > >>>>>
> > >>>>> 3. Read statisitics for the virtqueue at index 0 with json output
> > >>>>> $ vdpa -j dev vstats show vdpa-a qidx 0 {"vstats":{"vdpa-a":{
> > >>>>>
> > >> "queue_type":"rx","queue_index":0,"name":"received_desc","value":4177
> > >> 76,\
> > >>>>>     "name":"completed_desc","value":417548}}}
> > >>>>>
> > >>>>> 4. Read statistics for the virtqueue at index 0 with preety json
> > >>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> > >>>>>        "vstats": {
> > >>>>>            "vdpa-a": {
> > >>>>>
> > >>>>>                "queue_type": "rx",
> > >>>> I wonder where this info can be inferred? I don't see relevant
> > >>>> change in the patch series that helps gather the
> > VDPA_ATTR_DEV_QUEUE_TYPE?
> > >>>> Is this an arbitrary string defined by the vendor as well? If so,
> > >>>> how does the user expect to consume it?
> > >>> The queue tupe is deduced from the index and whether we have a
> > >>> virtqueue. Even numbers are rx, odd numbers are tx and if there is
> > >>> CVQ, the last one is CVQ.
> > >> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this patch
> > >> might not be useful at all?
> > > Right, will remove.
> > >
> > >> And how do you determine in the vdpa tool if CVQ is negotiated or
> > >> not?
> > > I make a netlink call to get the same information as " vdpa dev config
> show"
> > retrieves. I use the negotiated features to determine if a CVQ is
> available. If it
> > is, the number of VQs equals the control VQ index. So there are two
> netlink
> > calls under the hood.
> > The lock vdpa_dev_mutex won't hold across the two separate netlink
> calls, and
> > it may end up with inconsistent state - theoretically things could
> happen like
> > that the first call gets CVQ negotiated, but the later call for
> > get_vendor_vq_stats() on the cvq might get -EINVAL due to device reset.
> Can
> > the negotiated status and stat query be done within one single netlink
> call?
>
> I see your concern.
> The only reason I do the extra call is to know if we have a control VQ and
> what
> index it is, just to print a descriptive string telling if it's a either
> rx, tx or control VQ.
>
> So the cure can be simple. Let's have a new attribute that returns the
> type of
> virtqueue. I think Jason did not like the idea of communicating the kind
> of VQ
> from kernel to userspace but under these circumstances, maybe he would
> approve.
> Jason?
>


I may miss something but I still don't get the reason for this. I think the
atom could be done by using a single netlink call. Then the userspace could
deduce the queue type based on the negotiated features.

For returning the type, it means the vdpa core has the device specific
knowledge which still seems sub-optimal.

Thanks



>
> >
> > What worried me is that the queue index being dynamic and depended on
> > negotiation status would make host admin user quite hard to follow. The
> guest
> > may or may not advertise F_MQ and/or F_CTRL_VQ across various phases,
> e.g.
> > firmware (UEFI), boot loader (grub) till OS driver is up and running,
> which can
> > be agnostic to host admin. For most of the part it's not easy to script
> and
> > predict the queue index which can change from time to time. Can we define
> > the order of host predictable queue index, which is independent from any
> > guest negotiated state?
> >
> > >
> > >> Looks to me there are still some loose end I don't quite yet
> > >> understand.
> > >>
> > >>
> > >>>>>                "queue_index": 0,
> > > I think this can be removed since the command is for a specific index.
> > >
> > >>>>>                "name": "received_desc",
> > >>>>>                "value": 417776,
> > >>>>>                "name": "completed_desc",
> > >>>>>                "value": 417548
> > >>>> Not for this kernel patch, but IMHO it's the best to put the name &
> > >>>> value pairs in an array instead of flat entries in json's
> > >>>> hash/dictionary. The hash entries can be re-ordered deliberately by
> > >>>> external json parsing tool, ending up with inconsistent stat values.
> > >> This comment is missed for some reason. Please change the example in
> > >> the log if you agree to address it in vdpa tool. Or justify why
> > >> keeping the order for json hash/dictionary is fine.
> > > Sorry for skipping this comment.
> > > Do you mean to present the information like:
> > > "received_desc": 417776,
> > > "completed_desc": 417548,
> > I mean the following presentation:
> >
> > $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> >      "vstats": {
> >          "vdpa-a": {
> >              "queue_stats": [{
> >                  "queue_index": 0,
> >                  "queue_type": "rx",
> >                  "stat_name": [ "received_desc","completed_desc" ],
> >                  "stat_value": [ 417776,417548 ],
> >              }]
> >          }
> >      }
> > }
> >
> > I think Parav had similar suggestion, too.
> >
> > Thanks,
> > -Siwei
> >
> > >
> > >> Thanks,
> > >> -Siwei
> > >>
> > >>>> Thanks,
> > >>>> -Siwei
> > >>>>>            }
> > >>>>>        }
> > >>>>> }
> > >>>>>
> > >>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> > >>>>> ---
> > >>>>>     drivers/vdpa/vdpa.c       | 129
> > >> ++++++++++++++++++++++++++++++++++++++
> > >>>>>     include/linux/vdpa.h      |   5 ++
> > >>>>>     include/uapi/linux/vdpa.h |   7 +++
> > >>>>>     3 files changed, 141 insertions(+)
> > >>>>>
> > >>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> > >>>>> 9846c9de4bfa..d0ff671baf88 100644
> > >>>>> --- a/drivers/vdpa/vdpa.c
> > >>>>> +++ b/drivers/vdpa/vdpa.c
> > >>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
> > >>>>> *vdev,
> > >> struct sk_buff *msg, u32 portid,
> > >>>>>         return err;
> > >>>>>     }
> > >>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
> > >>>>> +sk_buff
> > >> *msg,
> > >>>>> +                              struct genl_info *info, u32 index) {
> > >>>>> +       int err;
> > >>>>> +
> > >>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> > >>>>> +               return -EMSGSIZE;
> > >>>>> +
> > >>>>> +       err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
> info-
> > >>> extack);
> > >>>>> +       if (err)
> > >>>>> +               return err;
> > >>>>> +
> > >>>>> +       return 0;
> > >>>>> +}
> > >>>>> +
> > >>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
> sk_buff
> > *msg,
> > >>>>> +                            struct genl_info *info, u32 index) {
> > >>>>> +       int err;
> > >>>>> +
> > >>>>> +       if (!vdev->config->get_vendor_vq_stats)
> > >>>>> +               return -EOPNOTSUPP;
> > >>>>> +
> > >>>>> +       err = vdpa_fill_stats_rec(vdev, msg, info, index);
> > >>>>> +       if (err)
> > >>>>> +               return err;
> > >>>>> +
> > >>>>> +       return 0;
> > >>>>> +}
> > >>>>> +
> > >>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
> > >>>>> +                                     struct sk_buff *msg,
> > >>>>> +                                     struct genl_info *info, u32
> index) {
> > >>>>> +       u32 device_id;
> > >>>>> +       void *hdr;
> > >>>>> +       int err;
> > >>>>> +       u32 portid = info->snd_portid;
> > >>>>> +       u32 seq = info->snd_seq;
> > >>>>> +       u32 flags = 0;
> > >>>>> +
> > >>>>> +       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> > >>>>> +                         VDPA_CMD_DEV_VSTATS_GET);
> > >>>>> +       if (!hdr)
> > >>>>> +               return -EMSGSIZE;
> > >>>>> +
> > >>>>> +       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev-
> > >>> dev))) {
> > >>>>> +               err = -EMSGSIZE;
> > >>>>> +               goto undo_msg;
> > >>>>> +       }
> > >>>>> +
> > >>>>> +       device_id = vdev->config->get_device_id(vdev);
> > >>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> > >>>>> +               err = -EMSGSIZE;
> > >>>>> +               goto undo_msg;
> > >>>>> +       }
> > >>>>> +
> > >>>>> +       err = vendor_stats_fill(vdev, msg, info, index);
> > >>>>> +
> > >>>>> +       genlmsg_end(msg, hdr);
> > >>>>> +
> > >>>>> +       return err;
> > >>>>> +
> > >>>>> +undo_msg:
> > >>>>> +       genlmsg_cancel(msg, hdr);
> > >>>>> +       return err;
> > >>>>> +}
> > >>>>> +
> > >>>>>     static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
> > >>>>> *skb, struct
> > >> genl_info *info)
> > >>>>>     {
> > >>>>>         struct vdpa_device *vdev;
> > >>>>> @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct
> > >> sk_buff *msg, struct netlink_callback *
> > >>>>>         return msg->len;
> > >>>>>     }
> > >>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> > >>>>> +                                         struct genl_info *info)
> > >>>>> +{
> > >>>>> +       struct vdpa_device *vdev;
> > >>>>> +       struct sk_buff *msg;
> > >>>>> +       const char *devname;
> > >>>>> +       struct device *dev;
> > >>>>> +       u32 index;
> > >>>>> +       int err;
> > >>>>> +
> > >>>>> +       if (!info->attrs[VDPA_ATTR_DEV_NAME])
> > >>>>> +               return -EINVAL;
> > >>>>> +
> > >>>>> +       if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> > >>>>> +               return -EINVAL;
> > >>>>> +
> > >>>>> +       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> > >>>>> +       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > >>>>> +       if (!msg)
> > >>>>> +               return -ENOMEM;
> > >>>>> +
> > >>>>> +       index =
> nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> > >>>>> +       mutex_lock(&vdpa_dev_mutex);
> > >>>>> +       dev = bus_find_device(&vdpa_bus, NULL, devname,
> > >> vdpa_name_match);
> > >>>>> +       if (!dev) {
> > >>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "device not
> found");
> > >>>>> +               err = -ENODEV;
> > >>>>> +               goto dev_err;
> > >>>>> +       }
> > >>>>> +       vdev = container_of(dev, struct vdpa_device, dev);
> > >>>>> +       if (!vdev->mdev) {
> > >>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa
> > >> device");
> > >>>>> +               err = -EINVAL;
> > >>>>> +               goto mdev_err;
> > >>>>> +       }
> > >>>>> +       err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> > >>>>> +       if (!err)
> > >>>>> +               err = genlmsg_reply(msg, info);
> > >>>>> +
> > >>>>> +       put_device(dev);
> > >>>>> +       mutex_unlock(&vdpa_dev_mutex);
> > >>>>> +
> > >>>>> +       if (err)
> > >>>>> +               nlmsg_free(msg);
> > >>>>> +
> > >>>>> +       return err;
> > >>>>> +
> > >>>>> +mdev_err:
> > >>>>> +       put_device(dev);
> > >>>>> +dev_err:
> > >>>>> +       mutex_unlock(&vdpa_dev_mutex);
> > >>>>> +       return err;
> > >>>>> +}
> > >>>>> +
> > >>>>>     static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX +
> 1] = {
> > >>>>>         [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
> > NLA_NUL_STRING },
> > >>>>>         [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
> > }, @@ -
> > >> 997,6
> > >>>>> +1119,7 @@ static const struct nla_policy
> > >> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
> > >>>>>         [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
> > NLA_POLICY_ETH_ADDR,
> > >>>>>         /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
> > >>>>>         [VDPA_ATTR_DEV_NET_CFG_MTU] =
> > NLA_POLICY_MIN(NLA_U16, 68),
> > >>>>> +       [VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0,
> > >> 65535),
> > >>>>>     };
> > >>>>>     static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
> > >>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
> > >>>>>                 .doit = vdpa_nl_cmd_dev_config_get_doit,
> > >>>>>                 .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
> > >>>>>         },
> > >>>>> +       {
> > >>>>> +               .cmd = VDPA_CMD_DEV_VSTATS_GET,
> > >>>>> +               .validate = GENL_DONT_VALIDATE_STRICT |
> > >> GENL_DONT_VALIDATE_DUMP,
> > >>>>> +               .doit = vdpa_nl_cmd_dev_stats_get_doit,
> > >>>>> +               .flags = GENL_ADMIN_PERM,
> > >>>>> +       },
> > >>>>>     };
> > >>>>>     static struct genl_family vdpa_nl_family __ro_after_init = {
> > >>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
> > >>>>> 2de442ececae..274203845cfc 100644
> > >>>>> --- a/include/linux/vdpa.h
> > >>>>> +++ b/include/linux/vdpa.h
> > >>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
> > >>>>>                             const struct vdpa_vq_state *state);
> > >>>>>         int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
> > >>>>>                             struct vdpa_vq_state *state);
> > >>>>> +       int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16
> idx,
> > >>>>> +                                  struct sk_buff *msg,
> > >>>>> +                                  struct netlink_ext_ack *extack);
> > >>>>>         struct vdpa_notification_area
> > >>>>>         (*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
> > >>>>>         /* vq irq is not expected to be changed once DRIVER_OK is
> set
> > >>>>> */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
> > >>>>>     int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
> > >>>>>     void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
> > >>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> > >>>>> +
> > >>>>>     #endif /* _LINUX_VDPA_H */
> > >>>>> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
> > >>>>> index 1061d8d2d09d..c5f229a41dc2 100644
> > >>>>> --- a/include/uapi/linux/vdpa.h
> > >>>>> +++ b/include/uapi/linux/vdpa.h
> > >>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
> > >>>>>         VDPA_CMD_DEV_DEL,
> > >>>>>         VDPA_CMD_DEV_GET,               /* can dump */
> > >>>>>         VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
> > >>>>> +       VDPA_CMD_DEV_VSTATS_GET,
> > >>>>>     };
> > >>>>>     enum vdpa_attr {
> > >>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
> > >>>>>         VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
> > >>>>>         VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
> > u32 */
> > >>>>>         VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
> > >>>>> +
> > >>>>> +       VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> > >>>>> +       VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
> > >>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /* string */
> > >>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> > >>>>> +
> > >>>>>         /* new attributes must be added above here */
> > >>>>>         VDPA_ATTR_MAX,
> > >>>>>     };
>
>

[-- Attachment #1.2: Type: text/html, Size: 27437 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]             ` <DM8PR12MB5400E03D7AD7833CEBF8DF9DAB099@DM8PR12MB5400.namprd12.prod.outlook.com>
  2022-03-09  2:39               ` Jason Wang
@ 2022-03-09  3:32               ` Si-Wei Liu
       [not found]                 ` <DM8PR12MB540086CCD1F535668D05E546AB0A9@DM8PR12MB5400.namprd12.prod.outlook.com>
  1 sibling, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-09  3:32 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 3/8/2022 6:13 AM, Eli Cohen wrote:
>
>> -----Original Message-----
>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>> Sent: Tuesday, March 8, 2022 8:16 AM
>> To: Eli Cohen <elic@nvidia.com>
>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>
>>
>>
>> On 3/6/2022 11:57 PM, Eli Cohen wrote:
>>>> -----Original Message-----
>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> Sent: Saturday, March 5, 2022 12:34 AM
>>>> To: Eli Cohen <elic@nvidia.com>
>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>>>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
>>>> <parav@nvidia.com>
>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
>>>> statistics
>>>>
>>>> Sorry, I somehow missed this after my break. Please see comments in line.
>>>>
>>>> On 2/16/2022 10:46 PM, Eli Cohen wrote:
>>>>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
>>>>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
>>>>>>> Allows to read vendor statistics of a vdpa device. The specific
>>>>>>> statistics data is received by the upstream driver in the form of
>>>>>>> an (attribute name, attribute value) pairs.
>>>>>>>
>>>>>>> An example of statistics for mlx5_vdpa device are:
>>>>>>>
>>>>>>> received_desc - number of descriptors received by the virtqueue
>>>>>>> completed_desc - number of descriptors completed by the virtqueue
>>>>>>>
>>>>>>> A descriptor using indirect buffers is still counted as 1. In
>>>>>>> addition, N chained descriptors are counted correctly N times as
>>>>>>> one
>>>> would expect.
>>>>>>> A new callback was added to vdpa_config_ops which provides the
>>>>>>> means for the vdpa driver to return statistics results.
>>>>>>>
>>>>>>> The interface allows for reading all the supported virtqueues,
>>>>>>> including the control virtqueue if it exists.
>>>>>>>
>>>>>>> Below are some examples taken from mlx5_vdpa which are introduced
>>>>>>> in the following patch:
>>>>>>>
>>>>>>> 1. Read statistics for the virtqueue at index 1
>>>>>>>
>>>>>>> $ vdpa dev vstats show vdpa-a qidx 1
>>>>>>> vdpa-a:
>>>>>>> queue_type tx queue_index 1 received_desc 3844836 completed_desc
>>>>>>> 3844836
>>>>>>>
>>>>>>> 2. Read statistics for the virtqueue at index 32 $ vdpa dev vstats
>>>>>>> show vdpa-a qidx 32
>>>>>>> vdpa-a:
>>>>>>> queue_type control_vq queue_index 32 received_desc 62
>>>>>>> completed_desc
>>>>>>> 62
>>>>>>>
>>>>>>> 3. Read statisitics for the virtqueue at index 0 with json output
>>>>>>> $ vdpa -j dev vstats show vdpa-a qidx 0 {"vstats":{"vdpa-a":{
>>>>>>>
>>>> "queue_type":"rx","queue_index":0,"name":"received_desc","value":4177
>>>> 76,\
>>>>>>>      "name":"completed_desc","value":417548}}}
>>>>>>>
>>>>>>> 4. Read statistics for the virtqueue at index 0 with preety json
>>>>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>         "vstats": {
>>>>>>>             "vdpa-a": {
>>>>>>>
>>>>>>>                 "queue_type": "rx",
>>>>>> I wonder where this info can be inferred? I don't see relevant
>>>>>> change in the patch series that helps gather the
>> VDPA_ATTR_DEV_QUEUE_TYPE?
>>>>>> Is this an arbitrary string defined by the vendor as well? If so,
>>>>>> how does the user expect to consume it?
>>>>> The queue tupe is deduced from the index and whether we have a
>>>>> virtqueue. Even numbers are rx, odd numbers are tx and if there is
>>>>> CVQ, the last one is CVQ.
>>>> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this patch
>>>> might not be useful at all?
>>> Right, will remove.
>>>
>>>> And how do you determine in the vdpa tool if CVQ is negotiated or
>>>> not?
>>> I make a netlink call to get the same information as " vdpa dev config show"
>> retrieves. I use the negotiated features to determine if a CVQ is available. If it
>> is, the number of VQs equals the control VQ index. So there are two netlink
>> calls under the hood.
>> The lock vdpa_dev_mutex won't hold across the two separate netlink calls, and
>> it may end up with inconsistent state - theoretically things could happen like
>> that the first call gets CVQ negotiated, but the later call for
>> get_vendor_vq_stats() on the cvq might get -EINVAL due to device reset. Can
>> the negotiated status and stat query be done within one single netlink call?
> I see your concern.
> The only reason I do the extra call is to know if we have a control VQ and what
> index it is, just to print a descriptive string telling if it's a either rx, tx or control VQ.
>
> So the cure can be simple. Let's have a new attribute that returns the type of
> virtqueue.
I am not sure I follow the cure. Wouldn't it be possible to get both 
negotiated status and the queue stat in vdpa_nl_cmd_dev_stats_get_doit() 
under the same vdpa_dev_mutex lock? And I am not even sure if it is a 
must to display the queue type - it doesn't seem the output includes the 
vdpa class info, which makes it hard for script to parse the this field 
in generic way.

>   I think Jason did not like the idea of communicating the kind of VQ
> from kernel to userspace but under these circumstances, maybe he would approve.
> Jason?
>
>> What worried me is that the queue index being dynamic and depended on
>> negotiation status would make host admin user quite hard to follow. The guest
>> may or may not advertise F_MQ and/or F_CTRL_VQ across various phases, e.g.
>> firmware (UEFI), boot loader (grub) till OS driver is up and running, which can
>> be agnostic to host admin. For most of the part it's not easy to script and
>> predict the queue index which can change from time to time. Can we define
>> the order of host predictable queue index, which is independent from any
>> guest negotiated state?
Here I think we can just use the plain queue index in the host view - 
say if vdpa net has 4 pairs of data vqs and 1 control vq, user may use 
qindex 8 across the board to identify the control vq, regardless if the 
F_MQ feature is negotiated or not in guest.


Regards,
-Siwei

>>
>>>> Looks to me there are still some loose end I don't quite yet
>>>> understand.
>>>>
>>>>
>>>>>>>                 "queue_index": 0,
>>> I think this can be removed since the command is for a specific index.
>>>
>>>>>>>                 "name": "received_desc",
>>>>>>>                 "value": 417776,
>>>>>>>                 "name": "completed_desc",
>>>>>>>                 "value": 417548
>>>>>> Not for this kernel patch, but IMHO it's the best to put the name &
>>>>>> value pairs in an array instead of flat entries in json's
>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately by
>>>>>> external json parsing tool, ending up with inconsistent stat values.
>>>> This comment is missed for some reason. Please change the example in
>>>> the log if you agree to address it in vdpa tool. Or justify why
>>>> keeping the order for json hash/dictionary is fine.
>>> Sorry for skipping this comment.
>>> Do you mean to present the information like:
>>> "received_desc": 417776,
>>> "completed_desc": 417548,
>> I mean the following presentation:
>>
>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>       "vstats": {
>>           "vdpa-a": {
>>               "queue_stats": [{
>>                   "queue_index": 0,
>>                   "queue_type": "rx",
>>                   "stat_name": [ "received_desc","completed_desc" ],
>>                   "stat_value": [ 417776,417548 ],
>>               }]
>>           }
>>       }
>> }
>>
>> I think Parav had similar suggestion, too.
>>
>> Thanks,
>> -Siwei
>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>>             }
>>>>>>>         }
>>>>>>> }
>>>>>>>
>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>> ---
>>>>>>>      drivers/vdpa/vdpa.c       | 129
>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>      include/linux/vdpa.h      |   5 ++
>>>>>>>      include/uapi/linux/vdpa.h |   7 +++
>>>>>>>      3 files changed, 141 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>> *vdev,
>>>> struct sk_buff *msg, u32 portid,
>>>>>>>      	return err;
>>>>>>>      }
>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>> +sk_buff
>>>> *msg,
>>>>>>> +			       struct genl_info *info, u32 index) {
>>>>>>> +	int err;
>>>>>>> +
>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>> +		return -EMSGSIZE;
>>>>>>> +
>>>>>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info-
>>>>> extack);
>>>>>>> +	if (err)
>>>>>>> +		return err;
>>>>>>> +
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff
>> *msg,
>>>>>>> +			     struct genl_info *info, u32 index) {
>>>>>>> +	int err;
>>>>>>> +
>>>>>>> +	if (!vdev->config->get_vendor_vq_stats)
>>>>>>> +		return -EOPNOTSUPP;
>>>>>>> +
>>>>>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>> +	if (err)
>>>>>>> +		return err;
>>>>>>> +
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>> +				      struct sk_buff *msg,
>>>>>>> +				      struct genl_info *info, u32 index) {
>>>>>>> +	u32 device_id;
>>>>>>> +	void *hdr;
>>>>>>> +	int err;
>>>>>>> +	u32 portid = info->snd_portid;
>>>>>>> +	u32 seq = info->snd_seq;
>>>>>>> +	u32 flags = 0;
>>>>>>> +
>>>>>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>>>>>> +	if (!hdr)
>>>>>>> +		return -EMSGSIZE;
>>>>>>> +
>>>>>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev-
>>>>> dev))) {
>>>>>>> +		err = -EMSGSIZE;
>>>>>>> +		goto undo_msg;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	device_id = vdev->config->get_device_id(vdev);
>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>> +		err = -EMSGSIZE;
>>>>>>> +		goto undo_msg;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>> +
>>>>>>> +	genlmsg_end(msg, hdr);
>>>>>>> +
>>>>>>> +	return err;
>>>>>>> +
>>>>>>> +undo_msg:
>>>>>>> +	genlmsg_cancel(msg, hdr);
>>>>>>> +	return err;
>>>>>>> +}
>>>>>>> +
>>>>>>>      static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>> *skb, struct
>>>> genl_info *info)
>>>>>>>      {
>>>>>>>      	struct vdpa_device *vdev;
>>>>>>> @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>      	return msg->len;
>>>>>>>      }
>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>> +					  struct genl_info *info)
>>>>>>> +{
>>>>>>> +	struct vdpa_device *vdev;
>>>>>>> +	struct sk_buff *msg;
>>>>>>> +	const char *devname;
>>>>>>> +	struct device *dev;
>>>>>>> +	u32 index;
>>>>>>> +	int err;
>>>>>>> +
>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>> +		return -EINVAL;
>>>>>>> +
>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>> +		return -EINVAL;
>>>>>>> +
>>>>>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>> +	if (!msg)
>>>>>>> +		return -ENOMEM;
>>>>>>> +
>>>>>>> +	index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>> +	mutex_lock(&vdpa_dev_mutex);
>>>>>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>> vdpa_name_match);
>>>>>>> +	if (!dev) {
>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
>>>>>>> +		err = -ENODEV;
>>>>>>> +		goto dev_err;
>>>>>>> +	}
>>>>>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>> +	if (!vdev->mdev) {
>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa
>>>> device");
>>>>>>> +		err = -EINVAL;
>>>>>>> +		goto mdev_err;
>>>>>>> +	}
>>>>>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>> +	if (!err)
>>>>>>> +		err = genlmsg_reply(msg, info);
>>>>>>> +
>>>>>>> +	put_device(dev);
>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>> +
>>>>>>> +	if (err)
>>>>>>> +		nlmsg_free(msg);
>>>>>>> +
>>>>>>> +	return err;
>>>>>>> +
>>>>>>> +mdev_err:
>>>>>>> +	put_device(dev);
>>>>>>> +dev_err:
>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>> +	return err;
>>>>>>> +}
>>>>>>> +
>>>>>>>      static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>      	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>> NLA_NUL_STRING },
>>>>>>>      	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>> }, @@ -
>>>> 997,6
>>>>>>> +1119,7 @@ static const struct nla_policy
>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>      	[VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>> NLA_POLICY_ETH_ADDR,
>>>>>>>      	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>      	[VDPA_ATTR_DEV_NET_CFG_MTU] =
>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0,
>>>> 65535),
>>>>>>>      };
>>>>>>>      static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>      		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>      		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>      	},
>>>>>>> +	{
>>>>>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>> +		.validate = GENL_DONT_VALIDATE_STRICT |
>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>> +		.flags = GENL_ADMIN_PERM,
>>>>>>> +	},
>>>>>>>      };
>>>>>>>      static struct genl_family vdpa_nl_family __ro_after_init = {
>>>>>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>> --- a/include/linux/vdpa.h
>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>      			    const struct vdpa_vq_state *state);
>>>>>>>      	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>      			    struct vdpa_vq_state *state);
>>>>>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>> +				   struct sk_buff *msg,
>>>>>>> +				   struct netlink_ext_ack *extack);
>>>>>>>      	struct vdpa_notification_area
>>>>>>>      	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>      	/* vq irq is not expected to be changed once DRIVER_OK is set
>>>>>>> */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>      int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>      void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>> +
>>>>>>>      #endif /* _LINUX_VDPA_H */
>>>>>>> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
>>>>>>> index 1061d8d2d09d..c5f229a41dc2 100644
>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>      	VDPA_CMD_DEV_DEL,
>>>>>>>      	VDPA_CMD_DEV_GET,		/* can dump */
>>>>>>>      	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>>>>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>      };
>>>>>>>      enum vdpa_attr {
>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>      	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>>>>>      	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/*
>> u32 */
>>>>>>>      	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>>>>>> +
>>>>>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>> +
>>>>>>>      	/* new attributes must be added above here */
>>>>>>>      	VDPA_ATTR_MAX,
>>>>>>>      };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]                 ` <DM8PR12MB540086CCD1F535668D05E546AB0A9@DM8PR12MB5400.namprd12.prod.outlook.com>
@ 2022-03-10  1:45                   ` Si-Wei Liu
       [not found]                     ` <DM8PR12MB54000042A48FDFA446EFE792AB0E9@DM8PR12MB5400.namprd12.prod.outlook.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-10  1:45 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 3/8/2022 9:07 PM, Eli Cohen wrote:
>
>> -----Original Message-----
>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>> Sent: Wednesday, March 9, 2022 5:33 AM
>> To: Eli Cohen <elic@nvidia.com>
>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>
>>
>>
>> On 3/8/2022 6:13 AM, Eli Cohen wrote:
>>>> -----Original Message-----
>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> Sent: Tuesday, March 8, 2022 8:16 AM
>>>> To: Eli Cohen <elic@nvidia.com>
>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>>>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
>>>> <parav@nvidia.com>
>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
>>>> statistics
>>>>
>>>>
>>>>
>>>> On 3/6/2022 11:57 PM, Eli Cohen wrote:
>>>>>> -----Original Message-----
>>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>> Sent: Saturday, March 5, 2022 12:34 AM
>>>>>> To: Eli Cohen <elic@nvidia.com>
>>>>>> Cc: mst@redhat.com; jasowang@redhat.com;
>>>>>> virtualization@lists.linux- foundation.org; eperezma@redhat.com;
>>>>>> amorenoz@redhat.com; lvivier@redhat.com; sgarzare@redhat.com;
>> Parav
>>>>>> Pandit <parav@nvidia.com>
>>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
>>>>>> statistics
>>>>>>
>>>>>> Sorry, I somehow missed this after my break. Please see comments in
>> line.
>>>>>> On 2/16/2022 10:46 PM, Eli Cohen wrote:
>>>>>>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
>>>>>>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
>>>>>>>>> Allows to read vendor statistics of a vdpa device. The specific
>>>>>>>>> statistics data is received by the upstream driver in the form
>>>>>>>>> of an (attribute name, attribute value) pairs.
>>>>>>>>>
>>>>>>>>> An example of statistics for mlx5_vdpa device are:
>>>>>>>>>
>>>>>>>>> received_desc - number of descriptors received by the virtqueue
>>>>>>>>> completed_desc - number of descriptors completed by the
>>>>>>>>> virtqueue
>>>>>>>>>
>>>>>>>>> A descriptor using indirect buffers is still counted as 1. In
>>>>>>>>> addition, N chained descriptors are counted correctly N times as
>>>>>>>>> one
>>>>>> would expect.
>>>>>>>>> A new callback was added to vdpa_config_ops which provides the
>>>>>>>>> means for the vdpa driver to return statistics results.
>>>>>>>>>
>>>>>>>>> The interface allows for reading all the supported virtqueues,
>>>>>>>>> including the control virtqueue if it exists.
>>>>>>>>>
>>>>>>>>> Below are some examples taken from mlx5_vdpa which are
>>>>>>>>> introduced in the following patch:
>>>>>>>>>
>>>>>>>>> 1. Read statistics for the virtqueue at index 1
>>>>>>>>>
>>>>>>>>> $ vdpa dev vstats show vdpa-a qidx 1
>>>>>>>>> vdpa-a:
>>>>>>>>> queue_type tx queue_index 1 received_desc 3844836
>> completed_desc
>>>>>>>>> 3844836
>>>>>>>>>
>>>>>>>>> 2. Read statistics for the virtqueue at index 32 $ vdpa dev
>>>>>>>>> vstats show vdpa-a qidx 32
>>>>>>>>> vdpa-a:
>>>>>>>>> queue_type control_vq queue_index 32 received_desc 62
>>>>>>>>> completed_desc
>>>>>>>>> 62
>>>>>>>>>
>>>>>>>>> 3. Read statisitics for the virtqueue at index 0 with json
>>>>>>>>> output $ vdpa -j dev vstats show vdpa-a qidx 0
>>>>>>>>> {"vstats":{"vdpa-a":{
>>>>>>>>>
>>>>>> "queue_type":"rx","queue_index":0,"name":"received_desc","value":41
>>>>>> 77
>>>>>> 76,\
>>>>>>>>>       "name":"completed_desc","value":417548}}}
>>>>>>>>>
>>>>>>>>> 4. Read statistics for the virtqueue at index 0 with preety json
>>>>>>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>>>          "vstats": {
>>>>>>>>>              "vdpa-a": {
>>>>>>>>>
>>>>>>>>>                  "queue_type": "rx",
>>>>>>>> I wonder where this info can be inferred? I don't see relevant
>>>>>>>> change in the patch series that helps gather the
>>>> VDPA_ATTR_DEV_QUEUE_TYPE?
>>>>>>>> Is this an arbitrary string defined by the vendor as well? If so,
>>>>>>>> how does the user expect to consume it?
>>>>>>> The queue tupe is deduced from the index and whether we have a
>>>>>>> virtqueue. Even numbers are rx, odd numbers are tx and if there is
>>>>>>> CVQ, the last one is CVQ.
>>>>>> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this
>>>>>> patch might not be useful at all?
>>>>> Right, will remove.
>>>>>
>>>>>> And how do you determine in the vdpa tool if CVQ is negotiated or
>>>>>> not?
>>>>> I make a netlink call to get the same information as " vdpa dev config
>> show"
>>>> retrieves. I use the negotiated features to determine if a CVQ is
>>>> available. If it is, the number of VQs equals the control VQ index.
>>>> So there are two netlink calls under the hood.
>>>> The lock vdpa_dev_mutex won't hold across the two separate netlink
>>>> calls, and it may end up with inconsistent state - theoretically
>>>> things could happen like that the first call gets CVQ negotiated, but
>>>> the later call for
>>>> get_vendor_vq_stats() on the cvq might get -EINVAL due to device
>>>> reset. Can the negotiated status and stat query be done within one single
>> netlink call?
>>> I see your concern.
>>> The only reason I do the extra call is to know if we have a control VQ
>>> and what index it is, just to print a descriptive string telling if it's a either rx,
>> tx or control VQ.
>>> So the cure can be simple. Let's have a new attribute that returns the
>>> type of virtqueue.
>> I am not sure I follow the cure. Wouldn't it be possible to get both negotiated
>> status and the queue stat in vdpa_nl_cmd_dev_stats_get_doit() under the
>> same vdpa_dev_mutex lock?
> Yes we can, but I suggested to get only the type of the queue as a new attribute.
> The kernel will do the digest and decide per a given VQ if it's rx, tx or control and
> return the result in that new attribute.
The rx, tx and control queue type is net specific, while the vdpa core 
is currently agnostic to the vdpa class.

>
>> And I am not even sure if it is a must to display
>> the queue type - it doesn't seem the output includes the vdpa class info, which
>> makes it hard for script to parse the this field in generic way.
> I don't get you. You say you don't think you need the queue type and at the same
> time you're concerned lack of information makes it hard for scripts.
> BTW, class info is something you can get for the device through "vdpa dev show"
> so your know the class of your device.
Stepping back, may I ask if there's a case that queue type specific stat 
may be defined by vendor, such that deciphering of certain vendor stat 
would need type specific knowledge? So far the received_desc and 
completed_desc stats offered through the mlx5_vdpa patch look to be 
general ones and not associated with any queue type in particular. Is 
there some future stat in your mind that needs specific knowledge of 
queue type and vdpa class?

I'd prefer the vstat output to be self-contained and self-descriptive. 
You may argue the class of vdpa never changes in "vdpa dev show" after 
creation. This is true, however the queue type is not - say you got a 
control queue for qindex 2, but the next moment you may get a rx queue 
with the same qindex. Particularly you seem want to tie this with queue 
index in the guest view, which is quite dynamic for host admin or script 
running on the host to follow.

>
>>>    I think Jason did not like the idea of communicating the kind of VQ
>>> from kernel to userspace but under these circumstances, maybe he would
>> approve.
>>> Jason?
>>>
>>>> What worried me is that the queue index being dynamic and depended on
>>>> negotiation status would make host admin user quite hard to follow.
>>>> The guest may or may not advertise F_MQ and/or F_CTRL_VQ across
>> various phases, e.g.
>>>> firmware (UEFI), boot loader (grub) till OS driver is up and running,
>>>> which can be agnostic to host admin. For most of the part it's not
>>>> easy to script and predict the queue index which can change from time
>>>> to time. Can we define the order of host predictable queue index,
>>>> which is independent from any guest negotiated state?
>> Here I think we can just use the plain queue index in the host view - say if vdpa
>> net has 4 pairs of data vqs and 1 control vq, user may use qindex 8 across the
>> board to identify the control vq, regardless if the F_MQ feature is negotiated
>> or not in guest.
> Right, but the idea that a userspace tool should provide useful information to the
> user so it does not need to do complex logic to infer that from bare data.
The host side qindex and qtype would never change regardless of guest 
feature negotiation, by nature it reflects the real construct and object 
in the hardware. I don't feel it's a simple task for host users to 
figure out the correct guest side qindex for the control queue -  it's 
always racy for one to check some other vdpa command output if the vstat 
output is not self-contained.

Thanks,
-Siwei

>
>>
>> Regards,
>> -Siwei
>>
>>>>>> Looks to me there are still some loose end I don't quite yet
>>>>>> understand.
>>>>>>
>>>>>>
>>>>>>>>>                  "queue_index": 0,
>>>>> I think this can be removed since the command is for a specific index.
>>>>>
>>>>>>>>>                  "name": "received_desc",
>>>>>>>>>                  "value": 417776,
>>>>>>>>>                  "name": "completed_desc",
>>>>>>>>>                  "value": 417548
>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
>>>>>>>> & value pairs in an array instead of flat entries in json's
>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
>>>>>> This comment is missed for some reason. Please change the example
>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
>>>>>> keeping the order for json hash/dictionary is fine.
>>>>> Sorry for skipping this comment.
>>>>> Do you mean to present the information like:
>>>>> "received_desc": 417776,
>>>>> "completed_desc": 417548,
>>>> I mean the following presentation:
>>>>
>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>        "vstats": {
>>>>            "vdpa-a": {
>>>>                "queue_stats": [{
>>>>                    "queue_index": 0,
>>>>                    "queue_type": "rx",
>>>>                    "stat_name": [ "received_desc","completed_desc" ],
>>>>                    "stat_value": [ 417776,417548 ],
>>>>                }]
>>>>            }
>>>>        }
>>>> }
>>>>
>>>> I think Parav had similar suggestion, too.
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>>> Thanks,
>>>>>>>> -Siwei
>>>>>>>>>              }
>>>>>>>>>          }
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>>> ---
>>>>>>>>>       drivers/vdpa/vdpa.c       | 129
>>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>>>       include/linux/vdpa.h      |   5 ++
>>>>>>>>>       include/uapi/linux/vdpa.h |   7 +++
>>>>>>>>>       3 files changed, 141 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>>>> *vdev,
>>>>>> struct sk_buff *msg, u32 portid,
>>>>>>>>>       	return err;
>>>>>>>>>       }
>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>>>> +sk_buff
>>>>>> *msg,
>>>>>>>>> +			       struct genl_info *info, u32 index) {
>>>>>>>>> +	int err;
>>>>>>>>> +
>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>> +
>>>>>>>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
>>>>>>>>> +info-
>>>>>>> extack);
>>>>>>>>> +	if (err)
>>>>>>>>> +		return err;
>>>>>>>>> +
>>>>>>>>> +	return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
>>>>>>>>> +sk_buff
>>>> *msg,
>>>>>>>>> +			     struct genl_info *info, u32 index) {
>>>>>>>>> +	int err;
>>>>>>>>> +
>>>>>>>>> +	if (!vdev->config->get_vendor_vq_stats)
>>>>>>>>> +		return -EOPNOTSUPP;
>>>>>>>>> +
>>>>>>>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>>>> +	if (err)
>>>>>>>>> +		return err;
>>>>>>>>> +
>>>>>>>>> +	return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>>>> +				      struct sk_buff *msg,
>>>>>>>>> +				      struct genl_info *info, u32 index) {
>>>>>>>>> +	u32 device_id;
>>>>>>>>> +	void *hdr;
>>>>>>>>> +	int err;
>>>>>>>>> +	u32 portid = info->snd_portid;
>>>>>>>>> +	u32 seq = info->snd_seq;
>>>>>>>>> +	u32 flags = 0;
>>>>>>>>> +
>>>>>>>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>>>>>>>> +	if (!hdr)
>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>> +
>>>>>>>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>> dev_name(&vdev-
>>>>>>> dev))) {
>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>> +		goto undo_msg;
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>> +	device_id = vdev->config->get_device_id(vdev);
>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>> +		goto undo_msg;
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>> +
>>>>>>>>> +	genlmsg_end(msg, hdr);
>>>>>>>>> +
>>>>>>>>> +	return err;
>>>>>>>>> +
>>>>>>>>> +undo_msg:
>>>>>>>>> +	genlmsg_cancel(msg, hdr);
>>>>>>>>> +	return err;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>       static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>>>> *skb, struct
>>>>>> genl_info *info)
>>>>>>>>>       {
>>>>>>>>>       	struct vdpa_device *vdev;
>>>>>>>>> @@ -990,6 +1058,60 @@
>> vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>>>       	return msg->len;
>>>>>>>>>       }
>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>>>> +					  struct genl_info *info)
>>>>>>>>> +{
>>>>>>>>> +	struct vdpa_device *vdev;
>>>>>>>>> +	struct sk_buff *msg;
>>>>>>>>> +	const char *devname;
>>>>>>>>> +	struct device *dev;
>>>>>>>>> +	u32 index;
>>>>>>>>> +	int err;
>>>>>>>>> +
>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>>>> +		return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>>>> +		return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>>>> +	if (!msg)
>>>>>>>>> +		return -ENOMEM;
>>>>>>>>> +
>>>>>>>>> +	index = nla_get_u32(info-
>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>>>> +	mutex_lock(&vdpa_dev_mutex);
>>>>>>>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>>>> vdpa_name_match);
>>>>>>>>> +	if (!dev) {
>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not
>> found");
>>>>>>>>> +		err = -ENODEV;
>>>>>>>>> +		goto dev_err;
>>>>>>>>> +	}
>>>>>>>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>>>> +	if (!vdev->mdev) {
>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>> vdpa
>>>>>> device");
>>>>>>>>> +		err = -EINVAL;
>>>>>>>>> +		goto mdev_err;
>>>>>>>>> +	}
>>>>>>>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>> +	if (!err)
>>>>>>>>> +		err = genlmsg_reply(msg, info);
>>>>>>>>> +
>>>>>>>>> +	put_device(dev);
>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>> +
>>>>>>>>> +	if (err)
>>>>>>>>> +		nlmsg_free(msg);
>>>>>>>>> +
>>>>>>>>> +	return err;
>>>>>>>>> +
>>>>>>>>> +mdev_err:
>>>>>>>>> +	put_device(dev);
>>>>>>>>> +dev_err:
>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>> +	return err;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>       static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>> = {
>>>>>>>>>       	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>>>> NLA_NUL_STRING },
>>>>>>>>>       	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>>>> }, @@ -
>>>>>> 997,6
>>>>>>>>> +1119,7 @@ static const struct nla_policy
>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>>>       	[VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>>>> NLA_POLICY_ETH_ADDR,
>>>>>>>>>       	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>>>       	[VDPA_ATTR_DEV_NET_CFG_MTU] =
>>>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] =
>> NLA_POLICY_RANGE(NLA_U32, 0,
>>>>>> 65535),
>>>>>>>>>       };
>>>>>>>>>       static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>>>       		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>>>       		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>>>       	},
>>>>>>>>> +	{
>>>>>>>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>> +		.validate = GENL_DONT_VALIDATE_STRICT |
>>>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>>>> +		.flags = GENL_ADMIN_PERM,
>>>>>>>>> +	},
>>>>>>>>>       };
>>>>>>>>>       static struct genl_family vdpa_nl_family __ro_after_init =
>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>>>> --- a/include/linux/vdpa.h
>>>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>>>       			    const struct vdpa_vq_state *state);
>>>>>>>>>       	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>       			    struct vdpa_vq_state *state);
>>>>>>>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>> +				   struct sk_buff *msg,
>>>>>>>>> +				   struct netlink_ext_ack *extack);
>>>>>>>>>       	struct vdpa_notification_area
>>>>>>>>>       	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>>>       	/* vq irq is not expected to be changed once DRIVER_OK is
>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>>>       int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>       void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>>>> +
>>>>>>>>>       #endif /* _LINUX_VDPA_H */
>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
>>>>>>>>> 100644
>>>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>>>       	VDPA_CMD_DEV_DEL,
>>>>>>>>>       	VDPA_CMD_DEV_GET,		/* can dump */
>>>>>>>>>       	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>>>>>>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>       };
>>>>>>>>>       enum vdpa_attr {
>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>>>       	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>>>>>>>       	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/*
>>>> u32 */
>>>>>>>>>       	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>>>>>>>> +
>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/*
>> string */
>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>>>> +
>>>>>>>>>       	/* new attributes must be added above here */
>>>>>>>>>       	VDPA_ATTR_MAX,
>>>>>>>>>       };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]                     ` <DM8PR12MB54000042A48FDFA446EFE792AB0E9@DM8PR12MB5400.namprd12.prod.outlook.com>
@ 2022-03-14  6:25                       ` Jason Wang
  2022-03-15  8:11                         ` Si-Wei Liu
  2022-03-15  7:53                       ` Si-Wei Liu
  1 sibling, 1 reply; 20+ messages in thread
From: Jason Wang @ 2022-03-14  6:25 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma, Si-Wei Liu


[-- Attachment #1.1: Type: text/plain, Size: 23163 bytes --]

On Sun, Mar 13, 2022 at 11:26 PM Eli Cohen <elic@nvidia.com> wrote:

> > On 3/8/2022 9:07 PM, Eli Cohen wrote:
> > >
> > >> -----Original Message-----
> > >> From: Si-Wei Liu <si-wei.liu@oracle.com>
> > >> Sent: Wednesday, March 9, 2022 5:33 AM
> > >> To: Eli Cohen <elic@nvidia.com>
> > >> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
> > >> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> > >> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <
> parav@nvidia.com>
> > >> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
> statistics
> > >>
> > >>
> > >>
> > >> On 3/8/2022 6:13 AM, Eli Cohen wrote:
> > >>>> -----Original Message-----
> > >>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
> > >>>> Sent: Tuesday, March 8, 2022 8:16 AM
> > >>>> To: Eli Cohen <elic@nvidia.com>
> > >>>> Cc: mst@redhat.com; jasowang@redhat.com;
> virtualization@lists.linux-
> > >>>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> > >>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
> > >>>> <parav@nvidia.com>
> > >>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
> > >>>> statistics
> > >>>>
> > >>>>
> > >>>>
> > >>>> On 3/6/2022 11:57 PM, Eli Cohen wrote:
> > >>>>>> -----Original Message-----
> > >>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
> > >>>>>> Sent: Saturday, March 5, 2022 12:34 AM
> > >>>>>> To: Eli Cohen <elic@nvidia.com>
> > >>>>>> Cc: mst@redhat.com; jasowang@redhat.com;
> > >>>>>> virtualization@lists.linux- foundation.org; eperezma@redhat.com;
> > >>>>>> amorenoz@redhat.com; lvivier@redhat.com; sgarzare@redhat.com;
> > >> Parav
> > >>>>>> Pandit <parav@nvidia.com>
> > >>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
> > >>>>>> statistics
> > >>>>>>
> > >>>>>> Sorry, I somehow missed this after my break. Please see comments
> in
> > >> line.
> > >>>>>> On 2/16/2022 10:46 PM, Eli Cohen wrote:
> > >>>>>>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
> > >>>>>>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
> > >>>>>>>>> Allows to read vendor statistics of a vdpa device. The specific
> > >>>>>>>>> statistics data is received by the upstream driver in the form
> > >>>>>>>>> of an (attribute name, attribute value) pairs.
> > >>>>>>>>>
> > >>>>>>>>> An example of statistics for mlx5_vdpa device are:
> > >>>>>>>>>
> > >>>>>>>>> received_desc - number of descriptors received by the virtqueue
> > >>>>>>>>> completed_desc - number of descriptors completed by the
> > >>>>>>>>> virtqueue
> > >>>>>>>>>
> > >>>>>>>>> A descriptor using indirect buffers is still counted as 1. In
> > >>>>>>>>> addition, N chained descriptors are counted correctly N times
> as
> > >>>>>>>>> one
> > >>>>>> would expect.
> > >>>>>>>>> A new callback was added to vdpa_config_ops which provides the
> > >>>>>>>>> means for the vdpa driver to return statistics results.
> > >>>>>>>>>
> > >>>>>>>>> The interface allows for reading all the supported virtqueues,
> > >>>>>>>>> including the control virtqueue if it exists.
> > >>>>>>>>>
> > >>>>>>>>> Below are some examples taken from mlx5_vdpa which are
> > >>>>>>>>> introduced in the following patch:
> > >>>>>>>>>
> > >>>>>>>>> 1. Read statistics for the virtqueue at index 1
> > >>>>>>>>>
> > >>>>>>>>> $ vdpa dev vstats show vdpa-a qidx 1
> > >>>>>>>>> vdpa-a:
> > >>>>>>>>> queue_type tx queue_index 1 received_desc 3844836
> > >> completed_desc
> > >>>>>>>>> 3844836
> > >>>>>>>>>
> > >>>>>>>>> 2. Read statistics for the virtqueue at index 32 $ vdpa dev
> > >>>>>>>>> vstats show vdpa-a qidx 32
> > >>>>>>>>> vdpa-a:
> > >>>>>>>>> queue_type control_vq queue_index 32 received_desc 62
> > >>>>>>>>> completed_desc
> > >>>>>>>>> 62
> > >>>>>>>>>
> > >>>>>>>>> 3. Read statisitics for the virtqueue at index 0 with json
> > >>>>>>>>> output $ vdpa -j dev vstats show vdpa-a qidx 0
> > >>>>>>>>> {"vstats":{"vdpa-a":{
> > >>>>>>>>>
> > >>>>>>
> "queue_type":"rx","queue_index":0,"name":"received_desc","value":41
> > >>>>>> 77
> > >>>>>> 76,\
> > >>>>>>>>>       "name":"completed_desc","value":417548}}}
> > >>>>>>>>>
> > >>>>>>>>> 4. Read statistics for the virtqueue at index 0 with preety
> json
> > >>>>>>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> > >>>>>>>>>          "vstats": {
> > >>>>>>>>>              "vdpa-a": {
> > >>>>>>>>>
> > >>>>>>>>>                  "queue_type": "rx",
> > >>>>>>>> I wonder where this info can be inferred? I don't see relevant
> > >>>>>>>> change in the patch series that helps gather the
> > >>>> VDPA_ATTR_DEV_QUEUE_TYPE?
> > >>>>>>>> Is this an arbitrary string defined by the vendor as well? If
> so,
> > >>>>>>>> how does the user expect to consume it?
> > >>>>>>> The queue tupe is deduced from the index and whether we have a
> > >>>>>>> virtqueue. Even numbers are rx, odd numbers are tx and if there
> is
> > >>>>>>> CVQ, the last one is CVQ.
> > >>>>>> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this
> > >>>>>> patch might not be useful at all?
> > >>>>> Right, will remove.
> > >>>>>
> > >>>>>> And how do you determine in the vdpa tool if CVQ is negotiated or
> > >>>>>> not?
> > >>>>> I make a netlink call to get the same information as " vdpa dev
> config
> > >> show"
> > >>>> retrieves. I use the negotiated features to determine if a CVQ is
> > >>>> available. If it is, the number of VQs equals the control VQ index.
> > >>>> So there are two netlink calls under the hood.
> > >>>> The lock vdpa_dev_mutex won't hold across the two separate netlink
> > >>>> calls, and it may end up with inconsistent state - theoretically
> > >>>> things could happen like that the first call gets CVQ negotiated,
> but
> > >>>> the later call for
> > >>>> get_vendor_vq_stats() on the cvq might get -EINVAL due to device
> > >>>> reset. Can the negotiated status and stat query be done within one
> single
> > >> netlink call?
> > >>> I see your concern.
> > >>> The only reason I do the extra call is to know if we have a control
> VQ
> > >>> and what index it is, just to print a descriptive string telling if
> it's a either rx,
> > >> tx or control VQ.
> > >>> So the cure can be simple. Let's have a new attribute that returns
> the
> > >>> type of virtqueue.
> > >> I am not sure I follow the cure. Wouldn't it be possible to get both
> negotiated
> > >> status and the queue stat in vdpa_nl_cmd_dev_stats_get_doit() under
> the
> > >> same vdpa_dev_mutex lock?
> > > Yes we can, but I suggested to get only the type of the queue as a new
> attribute.
> > > The kernel will do the digest and decide per a given VQ if it's rx, tx
> or control and
> > > return the result in that new attribute.
> > The rx, tx and control queue type is net specific, while the vdpa core
> > is currently agnostic to the vdpa class.
> >
> > >
> > >> And I am not even sure if it is a must to display
> > >> the queue type - it doesn't seem the output includes the vdpa class
> info, which
> > >> makes it hard for script to parse the this field in generic way.
> > > I don't get you. You say you don't think you need the queue type and
> at the same
> > > time you're concerned lack of information makes it hard for scripts.
> > > BTW, class info is something you can get for the device through "vdpa
> dev show"
> > > so your know the class of your device.
> > Stepping back, may I ask if there's a case that queue type specific stat
> > may be defined by vendor, such that deciphering of certain vendor stat
> > would need type specific knowledge? So far the received_desc and
> > completed_desc stats offered through the mlx5_vdpa patch look to be
> > general ones and not associated with any queue type in particular. Is
> > there some future stat in your mind that needs specific knowledge of
> > queue type and vdpa class?
>
> No, the only reason for displaying the queue type is to help users
> know kind of queue they're looking at.
>
> >
> > I'd prefer the vstat output to be self-contained and self-descriptive.
> > You may argue the class of vdpa never changes in "vdpa dev show" after
> > creation. This is true, however the queue type is not - say you got a
> > control queue for qindex 2, but the next moment you may get a rx queue
> > with the same qindex.
>
> I don't think this is possible unless you destroyed the device and
> re-created it.
> What operation do you think could cause that?
>
> > Particularly you seem want to tie this with queue
> > index in the guest view, which is quite dynamic for host admin or script
> > running on the host to follow.
>
> For rx and tx queues, some index may become invalid if the user changed
> the number of queues with ethtool -L but I don't think this is an issue.
> >
> > >
> > >>>    I think Jason did not like the idea of communicating the kind of
> VQ
> > >>> from kernel to userspace but under these circumstances, maybe he
> would
> > >> approve.
> > >>> Jason?
> > >>>
> > >>>> What worried me is that the queue index being dynamic and depended
> on
> > >>>> negotiation status would make host admin user quite hard to follow.
> > >>>> The guest may or may not advertise F_MQ and/or F_CTRL_VQ across
> > >> various phases, e.g.
> > >>>> firmware (UEFI), boot loader (grub) till OS driver is up and
> running,
> > >>>> which can be agnostic to host admin. For most of the part it's not
> > >>>> easy to script and predict the queue index which can change from
> time
> > >>>> to time. Can we define the order of host predictable queue index,
> > >>>> which is independent from any guest negotiated state?
> > >> Here I think we can just use the plain queue index in the host view -
> say if vdpa
> > >> net has 4 pairs of data vqs and 1 control vq, user may use qindex 8
> across the
> > >> board to identify the control vq, regardless if the F_MQ feature is
> negotiated
> > >> or not in guest.
> > > Right, but the idea that a userspace tool should provide useful
> information to the
> > > user so it does not need to do complex logic to infer that from bare
> data.
> > The host side qindex and qtype would never change regardless of guest
> > feature negotiation, by nature it reflects the real construct and object
> > in the hardware.


This should be possible for vendor specific stats. But I'm afraid it may
cause more confusion since the spec doesn't have the concept like "host
queue index". And to be self descriptive the vendor need also display the
mappings between virtqueue index and host(vendor) queue index.

Thanks


> I don't feel it's a simple task for host users to
> > figure out the correct guest side qindex for the control queue -  it's
> > always racy for one to check some other vdpa command output if the vstat
> > output is not self-contained.
>
> So what are you actually proposing? Display received and completed
> descriptors
> per queue index without further interpretation?
>
> >
> > Thanks,
> > -Siwei
> >
> > >
> > >>
> > >> Regards,
> > >> -Siwei
> > >>
> > >>>>>> Looks to me there are still some loose end I don't quite yet
> > >>>>>> understand.
> > >>>>>>
> > >>>>>>
> > >>>>>>>>>                  "queue_index": 0,
> > >>>>> I think this can be removed since the command is for a specific
> index.
> > >>>>>
> > >>>>>>>>>                  "name": "received_desc",
> > >>>>>>>>>                  "value": 417776,
> > >>>>>>>>>                  "name": "completed_desc",
> > >>>>>>>>>                  "value": 417548
> > >>>>>>>> Not for this kernel patch, but IMHO it's the best to put the
> name
> > >>>>>>>> & value pairs in an array instead of flat entries in json's
> > >>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
> > >>>>>>>> by external json parsing tool, ending up with inconsistent stat
> values.
> > >>>>>> This comment is missed for some reason. Please change the example
> > >>>>>> in the log if you agree to address it in vdpa tool. Or justify why
> > >>>>>> keeping the order for json hash/dictionary is fine.
> > >>>>> Sorry for skipping this comment.
> > >>>>> Do you mean to present the information like:
> > >>>>> "received_desc": 417776,
> > >>>>> "completed_desc": 417548,
> > >>>> I mean the following presentation:
> > >>>>
> > >>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> > >>>>        "vstats": {
> > >>>>            "vdpa-a": {
> > >>>>                "queue_stats": [{
> > >>>>                    "queue_index": 0,
> > >>>>                    "queue_type": "rx",
> > >>>>                    "stat_name": [ "received_desc","completed_desc"
> ],
> > >>>>                    "stat_value": [ 417776,417548 ],
> > >>>>                }]
> > >>>>            }
> > >>>>        }
> > >>>> }
> > >>>>
> > >>>> I think Parav had similar suggestion, too.
> > >>>>
> > >>>> Thanks,
> > >>>> -Siwei
> > >>>>
> > >>>>>> Thanks,
> > >>>>>> -Siwei
> > >>>>>>
> > >>>>>>>> Thanks,
> > >>>>>>>> -Siwei
> > >>>>>>>>>              }
> > >>>>>>>>>          }
> > >>>>>>>>> }
> > >>>>>>>>>
> > >>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> > >>>>>>>>> ---
> > >>>>>>>>>       drivers/vdpa/vdpa.c       | 129
> > >>>>>> ++++++++++++++++++++++++++++++++++++++
> > >>>>>>>>>       include/linux/vdpa.h      |   5 ++
> > >>>>>>>>>       include/uapi/linux/vdpa.h |   7 +++
> > >>>>>>>>>       3 files changed, 141 insertions(+)
> > >>>>>>>>>
> > >>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> > >>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
> > >>>>>>>>> --- a/drivers/vdpa/vdpa.c
> > >>>>>>>>> +++ b/drivers/vdpa/vdpa.c
> > >>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
> > >>>>>>>>> *vdev,
> > >>>>>> struct sk_buff *msg, u32 portid,
> > >>>>>>>>>             return err;
> > >>>>>>>>>       }
> > >>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev,
> struct
> > >>>>>>>>> +sk_buff
> > >>>>>> *msg,
> > >>>>>>>>> +                          struct genl_info *info, u32 index) {
> > >>>>>>>>> +   int err;
> > >>>>>>>>> +
> > >>>>>>>>> +   if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> > >>>>>>>>> +           return -EMSGSIZE;
> > >>>>>>>>> +
> > >>>>>>>>> +   err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
> > >>>>>>>>> +info-
> > >>>>>>> extack);
> > >>>>>>>>> +   if (err)
> > >>>>>>>>> +           return err;
> > >>>>>>>>> +
> > >>>>>>>>> +   return 0;
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
> > >>>>>>>>> +sk_buff
> > >>>> *msg,
> > >>>>>>>>> +                        struct genl_info *info, u32 index) {
> > >>>>>>>>> +   int err;
> > >>>>>>>>> +
> > >>>>>>>>> +   if (!vdev->config->get_vendor_vq_stats)
> > >>>>>>>>> +           return -EOPNOTSUPP;
> > >>>>>>>>> +
> > >>>>>>>>> +   err = vdpa_fill_stats_rec(vdev, msg, info, index);
> > >>>>>>>>> +   if (err)
> > >>>>>>>>> +           return err;
> > >>>>>>>>> +
> > >>>>>>>>> +   return 0;
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device
> *vdev,
> > >>>>>>>>> +                                 struct sk_buff *msg,
> > >>>>>>>>> +                                 struct genl_info *info, u32
> index) {
> > >>>>>>>>> +   u32 device_id;
> > >>>>>>>>> +   void *hdr;
> > >>>>>>>>> +   int err;
> > >>>>>>>>> +   u32 portid = info->snd_portid;
> > >>>>>>>>> +   u32 seq = info->snd_seq;
> > >>>>>>>>> +   u32 flags = 0;
> > >>>>>>>>> +
> > >>>>>>>>> +   hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> > >>>>>>>>> +                     VDPA_CMD_DEV_VSTATS_GET);
> > >>>>>>>>> +   if (!hdr)
> > >>>>>>>>> +           return -EMSGSIZE;
> > >>>>>>>>> +
> > >>>>>>>>> +   if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
> > >> dev_name(&vdev-
> > >>>>>>> dev))) {
> > >>>>>>>>> +           err = -EMSGSIZE;
> > >>>>>>>>> +           goto undo_msg;
> > >>>>>>>>> +   }
> > >>>>>>>>> +
> > >>>>>>>>> +   device_id = vdev->config->get_device_id(vdev);
> > >>>>>>>>> +   if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> > >>>>>>>>> +           err = -EMSGSIZE;
> > >>>>>>>>> +           goto undo_msg;
> > >>>>>>>>> +   }
> > >>>>>>>>> +
> > >>>>>>>>> +   err = vendor_stats_fill(vdev, msg, info, index);
> > >>>>>>>>> +
> > >>>>>>>>> +   genlmsg_end(msg, hdr);
> > >>>>>>>>> +
> > >>>>>>>>> +   return err;
> > >>>>>>>>> +
> > >>>>>>>>> +undo_msg:
> > >>>>>>>>> +   genlmsg_cancel(msg, hdr);
> > >>>>>>>>> +   return err;
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>>       static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
> > >>>>>>>>> *skb, struct
> > >>>>>> genl_info *info)
> > >>>>>>>>>       {
> > >>>>>>>>>             struct vdpa_device *vdev;
> > >>>>>>>>> @@ -990,6 +1058,60 @@
> > >> vdpa_nl_cmd_dev_config_get_dumpit(struct
> > >>>>>> sk_buff *msg, struct netlink_callback *
> > >>>>>>>>>             return msg->len;
> > >>>>>>>>>       }
> > >>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> > >>>>>>>>> +                                     struct genl_info *info)
> > >>>>>>>>> +{
> > >>>>>>>>> +   struct vdpa_device *vdev;
> > >>>>>>>>> +   struct sk_buff *msg;
> > >>>>>>>>> +   const char *devname;
> > >>>>>>>>> +   struct device *dev;
> > >>>>>>>>> +   u32 index;
> > >>>>>>>>> +   int err;
> > >>>>>>>>> +
> > >>>>>>>>> +   if (!info->attrs[VDPA_ATTR_DEV_NAME])
> > >>>>>>>>> +           return -EINVAL;
> > >>>>>>>>> +
> > >>>>>>>>> +   if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> > >>>>>>>>> +           return -EINVAL;
> > >>>>>>>>> +
> > >>>>>>>>> +   devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> > >>>>>>>>> +   msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > >>>>>>>>> +   if (!msg)
> > >>>>>>>>> +           return -ENOMEM;
> > >>>>>>>>> +
> > >>>>>>>>> +   index = nla_get_u32(info-
> > >>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> > >>>>>>>>> +   mutex_lock(&vdpa_dev_mutex);
> > >>>>>>>>> +   dev = bus_find_device(&vdpa_bus, NULL, devname,
> > >>>>>> vdpa_name_match);
> > >>>>>>>>> +   if (!dev) {
> > >>>>>>>>> +           NL_SET_ERR_MSG_MOD(info->extack, "device not
> > >> found");
> > >>>>>>>>> +           err = -ENODEV;
> > >>>>>>>>> +           goto dev_err;
> > >>>>>>>>> +   }
> > >>>>>>>>> +   vdev = container_of(dev, struct vdpa_device, dev);
> > >>>>>>>>> +   if (!vdev->mdev) {
> > >>>>>>>>> +           NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
> > >> vdpa
> > >>>>>> device");
> > >>>>>>>>> +           err = -EINVAL;
> > >>>>>>>>> +           goto mdev_err;
> > >>>>>>>>> +   }
> > >>>>>>>>> +   err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> > >>>>>>>>> +   if (!err)
> > >>>>>>>>> +           err = genlmsg_reply(msg, info);
> > >>>>>>>>> +
> > >>>>>>>>> +   put_device(dev);
> > >>>>>>>>> +   mutex_unlock(&vdpa_dev_mutex);
> > >>>>>>>>> +
> > >>>>>>>>> +   if (err)
> > >>>>>>>>> +           nlmsg_free(msg);
> > >>>>>>>>> +
> > >>>>>>>>> +   return err;
> > >>>>>>>>> +
> > >>>>>>>>> +mdev_err:
> > >>>>>>>>> +   put_device(dev);
> > >>>>>>>>> +dev_err:
> > >>>>>>>>> +   mutex_unlock(&vdpa_dev_mutex);
> > >>>>>>>>> +   return err;
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>>       static const struct nla_policy
> vdpa_nl_policy[VDPA_ATTR_MAX + 1]
> > >> = {
> > >>>>>>>>>             [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
> > >>>> NLA_NUL_STRING },
> > >>>>>>>>>             [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
> > >>>> }, @@ -
> > >>>>>> 997,6
> > >>>>>>>>> +1119,7 @@ static const struct nla_policy
> > >>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
> > >>>>>>>>>             [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
> > >>>> NLA_POLICY_ETH_ADDR,
> > >>>>>>>>>             /* virtio spec 1.1 section 5.1.4.1 for valid MTU
> range */
> > >>>>>>>>>             [VDPA_ATTR_DEV_NET_CFG_MTU] =
> > >>>> NLA_POLICY_MIN(NLA_U16, 68),
> > >>>>>>>>> +   [VDPA_ATTR_DEV_QUEUE_INDEX] =
> > >> NLA_POLICY_RANGE(NLA_U32, 0,
> > >>>>>> 65535),
> > >>>>>>>>>       };
> > >>>>>>>>>       static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
> > >>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
> > >>>>>>>>>                     .doit = vdpa_nl_cmd_dev_config_get_doit,
> > >>>>>>>>>                     .dumpit =
> vdpa_nl_cmd_dev_config_get_dumpit,
> > >>>>>>>>>             },
> > >>>>>>>>> +   {
> > >>>>>>>>> +           .cmd = VDPA_CMD_DEV_VSTATS_GET,
> > >>>>>>>>> +           .validate = GENL_DONT_VALIDATE_STRICT |
> > >>>>>> GENL_DONT_VALIDATE_DUMP,
> > >>>>>>>>> +           .doit = vdpa_nl_cmd_dev_stats_get_doit,
> > >>>>>>>>> +           .flags = GENL_ADMIN_PERM,
> > >>>>>>>>> +   },
> > >>>>>>>>>       };
> > >>>>>>>>>       static struct genl_family vdpa_nl_family __ro_after_init
> =
> > >>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index
> > >>>>>>>>> 2de442ececae..274203845cfc 100644
> > >>>>>>>>> --- a/include/linux/vdpa.h
> > >>>>>>>>> +++ b/include/linux/vdpa.h
> > >>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
> > >>>>>>>>>                                 const struct vdpa_vq_state
> *state);
> > >>>>>>>>>             int (*get_vq_state)(struct vdpa_device *vdev, u16
> idx,
> > >>>>>>>>>                                 struct vdpa_vq_state *state);
> > >>>>>>>>> +   int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16
> idx,
> > >>>>>>>>> +                              struct sk_buff *msg,
> > >>>>>>>>> +                              struct netlink_ext_ack *extack);
> > >>>>>>>>>             struct vdpa_notification_area
> > >>>>>>>>>             (*get_vq_notification)(struct vdpa_device *vdev,
> u16 idx);
> > >>>>>>>>>             /* vq irq is not expected to be changed once
> DRIVER_OK is
> > >>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
> > >>>>>>>>>       int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
> > >>>>>>>>>       void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
> > >>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> > >>>>>>>>> +
> > >>>>>>>>>       #endif /* _LINUX_VDPA_H */
> > >>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
> > >>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
> > >>>>>>>>> 100644
> > >>>>>>>>> --- a/include/uapi/linux/vdpa.h
> > >>>>>>>>> +++ b/include/uapi/linux/vdpa.h
> > >>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
> > >>>>>>>>>             VDPA_CMD_DEV_DEL,
> > >>>>>>>>>             VDPA_CMD_DEV_GET,               /* can dump */
> > >>>>>>>>>             VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
> > >>>>>>>>> +   VDPA_CMD_DEV_VSTATS_GET,
> > >>>>>>>>>       };
> > >>>>>>>>>       enum vdpa_attr {
> > >>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
> > >>>>>>>>>             VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
> > >>>>>>>>>             VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
> > >>>> u32 */
> > >>>>>>>>>             VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
> > >>>>>>>>> +
> > >>>>>>>>> +   VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> > >>>>>>>>> +   VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
> > >>>>>>>>> +   VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
> > >> string */
> > >>>>>>>>> +   VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> > >>>>>>>>> +
> > >>>>>>>>>             /* new attributes must be added above here */
> > >>>>>>>>>             VDPA_ATTR_MAX,
> > >>>>>>>>>       };
>
>

[-- Attachment #1.2: Type: text/html, Size: 39424 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]                     ` <DM8PR12MB54000042A48FDFA446EFE792AB0E9@DM8PR12MB5400.namprd12.prod.outlook.com>
  2022-03-14  6:25                       ` Jason Wang
@ 2022-03-15  7:53                       ` Si-Wei Liu
       [not found]                         ` <DM8PR12MB540054565515158F9209723EAB109@DM8PR12MB5400.namprd12.prod.outlook.com>
  1 sibling, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-15  7:53 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 3/13/2022 8:26 AM, Eli Cohen wrote:
>> On 3/8/2022 9:07 PM, Eli Cohen wrote:
>>>> -----Original Message-----
>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> Sent: Wednesday, March 9, 2022 5:33 AM
>>>> To: Eli Cohen <elic@nvidia.com>
>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>>>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>>>
>>>>
>>>>
>>>> On 3/8/2022 6:13 AM, Eli Cohen wrote:
>>>>>> -----Original Message-----
>>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>> Sent: Tuesday, March 8, 2022 8:16 AM
>>>>>> To: Eli Cohen <elic@nvidia.com>
>>>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-
>>>>>> foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>>>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
>>>>>> <parav@nvidia.com>
>>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
>>>>>> statistics
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 3/6/2022 11:57 PM, Eli Cohen wrote:
>>>>>>>> -----Original Message-----
>>>>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>>>> Sent: Saturday, March 5, 2022 12:34 AM
>>>>>>>> To: Eli Cohen <elic@nvidia.com>
>>>>>>>> Cc: mst@redhat.com; jasowang@redhat.com;
>>>>>>>> virtualization@lists.linux- foundation.org; eperezma@redhat.com;
>>>>>>>> amorenoz@redhat.com; lvivier@redhat.com; sgarzare@redhat.com;
>>>> Parav
>>>>>>>> Pandit <parav@nvidia.com>
>>>>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor
>>>>>>>> statistics
>>>>>>>>
>>>>>>>> Sorry, I somehow missed this after my break. Please see comments in
>>>> line.
>>>>>>>> On 2/16/2022 10:46 PM, Eli Cohen wrote:
>>>>>>>>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
>>>>>>>>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
>>>>>>>>>>> Allows to read vendor statistics of a vdpa device. The specific
>>>>>>>>>>> statistics data is received by the upstream driver in the form
>>>>>>>>>>> of an (attribute name, attribute value) pairs.
>>>>>>>>>>>
>>>>>>>>>>> An example of statistics for mlx5_vdpa device are:
>>>>>>>>>>>
>>>>>>>>>>> received_desc - number of descriptors received by the virtqueue
>>>>>>>>>>> completed_desc - number of descriptors completed by the
>>>>>>>>>>> virtqueue
>>>>>>>>>>>
>>>>>>>>>>> A descriptor using indirect buffers is still counted as 1. In
>>>>>>>>>>> addition, N chained descriptors are counted correctly N times as
>>>>>>>>>>> one
>>>>>>>> would expect.
>>>>>>>>>>> A new callback was added to vdpa_config_ops which provides the
>>>>>>>>>>> means for the vdpa driver to return statistics results.
>>>>>>>>>>>
>>>>>>>>>>> The interface allows for reading all the supported virtqueues,
>>>>>>>>>>> including the control virtqueue if it exists.
>>>>>>>>>>>
>>>>>>>>>>> Below are some examples taken from mlx5_vdpa which are
>>>>>>>>>>> introduced in the following patch:
>>>>>>>>>>>
>>>>>>>>>>> 1. Read statistics for the virtqueue at index 1
>>>>>>>>>>>
>>>>>>>>>>> $ vdpa dev vstats show vdpa-a qidx 1
>>>>>>>>>>> vdpa-a:
>>>>>>>>>>> queue_type tx queue_index 1 received_desc 3844836
>>>> completed_desc
>>>>>>>>>>> 3844836
>>>>>>>>>>>
>>>>>>>>>>> 2. Read statistics for the virtqueue at index 32 $ vdpa dev
>>>>>>>>>>> vstats show vdpa-a qidx 32
>>>>>>>>>>> vdpa-a:
>>>>>>>>>>> queue_type control_vq queue_index 32 received_desc 62
>>>>>>>>>>> completed_desc
>>>>>>>>>>> 62
>>>>>>>>>>>
>>>>>>>>>>> 3. Read statisitics for the virtqueue at index 0 with json
>>>>>>>>>>> output $ vdpa -j dev vstats show vdpa-a qidx 0
>>>>>>>>>>> {"vstats":{"vdpa-a":{
>>>>>>>>>>>
>>>>>>>> "queue_type":"rx","queue_index":0,"name":"received_desc","value":41
>>>>>>>> 77
>>>>>>>> 76,\
>>>>>>>>>>>        "name":"completed_desc","value":417548}}}
>>>>>>>>>>>
>>>>>>>>>>> 4. Read statistics for the virtqueue at index 0 with preety json
>>>>>>>>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>>>>>           "vstats": {
>>>>>>>>>>>               "vdpa-a": {
>>>>>>>>>>>
>>>>>>>>>>>                   "queue_type": "rx",
>>>>>>>>>> I wonder where this info can be inferred? I don't see relevant
>>>>>>>>>> change in the patch series that helps gather the
>>>>>> VDPA_ATTR_DEV_QUEUE_TYPE?
>>>>>>>>>> Is this an arbitrary string defined by the vendor as well? If so,
>>>>>>>>>> how does the user expect to consume it?
>>>>>>>>> The queue tupe is deduced from the index and whether we have a
>>>>>>>>> virtqueue. Even numbers are rx, odd numbers are tx and if there is
>>>>>>>>> CVQ, the last one is CVQ.
>>>>>>>> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in this
>>>>>>>> patch might not be useful at all?
>>>>>>> Right, will remove.
>>>>>>>
>>>>>>>> And how do you determine in the vdpa tool if CVQ is negotiated or
>>>>>>>> not?
>>>>>>> I make a netlink call to get the same information as " vdpa dev config
>>>> show"
>>>>>> retrieves. I use the negotiated features to determine if a CVQ is
>>>>>> available. If it is, the number of VQs equals the control VQ index.
>>>>>> So there are two netlink calls under the hood.
>>>>>> The lock vdpa_dev_mutex won't hold across the two separate netlink
>>>>>> calls, and it may end up with inconsistent state - theoretically
>>>>>> things could happen like that the first call gets CVQ negotiated, but
>>>>>> the later call for
>>>>>> get_vendor_vq_stats() on the cvq might get -EINVAL due to device
>>>>>> reset. Can the negotiated status and stat query be done within one single
>>>> netlink call?
>>>>> I see your concern.
>>>>> The only reason I do the extra call is to know if we have a control VQ
>>>>> and what index it is, just to print a descriptive string telling if it's a either rx,
>>>> tx or control VQ.
>>>>> So the cure can be simple. Let's have a new attribute that returns the
>>>>> type of virtqueue.
>>>> I am not sure I follow the cure. Wouldn't it be possible to get both negotiated
>>>> status and the queue stat in vdpa_nl_cmd_dev_stats_get_doit() under the
>>>> same vdpa_dev_mutex lock?
>>> Yes we can, but I suggested to get only the type of the queue as a new attribute.
>>> The kernel will do the digest and decide per a given VQ if it's rx, tx or control and
>>> return the result in that new attribute.
>> The rx, tx and control queue type is net specific, while the vdpa core
>> is currently agnostic to the vdpa class.
>>
>>>> And I am not even sure if it is a must to display
>>>> the queue type - it doesn't seem the output includes the vdpa class info, which
>>>> makes it hard for script to parse the this field in generic way.
>>> I don't get you. You say you don't think you need the queue type and at the same
>>> time you're concerned lack of information makes it hard for scripts.
>>> BTW, class info is something you can get for the device through "vdpa dev show"
>>> so your know the class of your device.
>> Stepping back, may I ask if there's a case that queue type specific stat
>> may be defined by vendor, such that deciphering of certain vendor stat
>> would need type specific knowledge? So far the received_desc and
>> completed_desc stats offered through the mlx5_vdpa patch look to be
>> general ones and not associated with any queue type in particular. Is
>> there some future stat in your mind that needs specific knowledge of
>> queue type and vdpa class?
> No, the only reason for displaying the queue type is to help users
> know kind of queue they're looking at.
>
>> I'd prefer the vstat output to be self-contained and self-descriptive.
>> You may argue the class of vdpa never changes in "vdpa dev show" after
>> creation. This is true, however the queue type is not - say you got a
>> control queue for qindex 2, but the next moment you may get a rx queue
>> with the same qindex.
> I don't think this is possible unless you destroyed the device and re-created it.
> What operation do you think could cause that?
Say you got a vdpa net device created with 4 data queue pairs and a 
control vq. On boot some guest firmware may support just F_CTRL_VQ but 
not F_MQ, then the index for the control vq in guest ends up with 2, as 
in this case there's only a single queue pair enabled for rx (index 0) 
and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the 
control vq is the last vq following 8 data vqs of all 4 pairs, hence got 
the 8th index in the rank. Since F_MQ is not negotiated and only 1 data 
queue pair enabled, in such event only host qindex 0,1 and 8 have vendor 
stats available, and the rest of qindex would get invalid/empty stat.

Later on say boot continues towards loading the Linux virtio driver, 
then guest could successfully negotiate both F_CTRL_VQ and F_MQ 
features. In this case, all 8 data virtqueues are fully enabled, the 
index for the control vq ends up as 8, following tightly after all the 4 
data queue pairs. Only until both features are negotiated, the guest and 
host are able to see consistent view in identifying the control vq. 
Since F_MQ is negotiated, all host queues, indexed from 0 through 8, 
should have vendor stats available.

That's why I said the guest qindex is ephemeral and hard to predict 
subjected to negotiated features, but host qindex is reliable and more 
eligible for command line identification purpose.

>
>> Particularly you seem want to tie this with queue
>> index in the guest view, which is quite dynamic for host admin or script
>> running on the host to follow.
> For rx and tx queues, some index may become invalid if the user changed
> the number of queues with ethtool -L but I don't think this is an issue.
This is irrelevant. Ethtool -L may only change the effective number of 
vqs in use (even if ending with one single queue pair), but would never 
flip feature negotiation around F_MQ.

>>>>>     I think Jason did not like the idea of communicating the kind of VQ
>>>>> from kernel to userspace but under these circumstances, maybe he would
>>>> approve.
>>>>> Jason?
>>>>>
>>>>>> What worried me is that the queue index being dynamic and depended on
>>>>>> negotiation status would make host admin user quite hard to follow.
>>>>>> The guest may or may not advertise F_MQ and/or F_CTRL_VQ across
>>>> various phases, e.g.
>>>>>> firmware (UEFI), boot loader (grub) till OS driver is up and running,
>>>>>> which can be agnostic to host admin. For most of the part it's not
>>>>>> easy to script and predict the queue index which can change from time
>>>>>> to time. Can we define the order of host predictable queue index,
>>>>>> which is independent from any guest negotiated state?
>>>> Here I think we can just use the plain queue index in the host view - say if vdpa
>>>> net has 4 pairs of data vqs and 1 control vq, user may use qindex 8 across the
>>>> board to identify the control vq, regardless if the F_MQ feature is negotiated
>>>> or not in guest.
>>> Right, but the idea that a userspace tool should provide useful information to the
>>> user so it does not need to do complex logic to infer that from bare data.
>> The host side qindex and qtype would never change regardless of guest
>> feature negotiation, by nature it reflects the real construct and object
>> in the hardware. I don't feel it's a simple task for host users to
>> figure out the correct guest side qindex for the control queue -  it's
>> always racy for one to check some other vdpa command output if the vstat
>> output is not self-contained.
> So what are you actually proposing? Display received and completed descriptors
> per queue index without further interpretation?

I'd suggest using a more stable queue id i.e. the host queue index to 
represent the qidx (which seems to be what you're doing now?), and 
displaying both the host qindex (queue_index_device in the example 
below), as well as the guest's (queue_index_driver as below) in the output:

$ vdpa -jp dev vstats show vdpa-a qidx 8
{
     "vstats": {
         "vdpa-a": {
             "queue_stats": [{
                 "queue_index_device": 8,
                 "queue_index_driver": 2,
                 "queue_type": "control_vq",
                 "stat_name": [ "received_desc","completed_desc" ],
                 "stat_value": [ 417776,417775 ],
             }]
         }
     }
}

Optionally, user may use guest queue index gqidx, which is kind of an 
ephemeral ID and F_MQ negotiation depended, to query the stat on a 
specific guest queue:

$ vdpa -jp dev vstats show vdpa-a gqidx 2
{
     "vstats": {
         "vdpa-a": {
             "queue_stats": [{
                 "queue_index_device": 8,
                 "queue_index_driver": 2,
                 "queue_type": "control_vq",
                 "stat_name": [ "received_desc","completed_desc" ],
                 "stat_value": [ 417776,417775 ],
             }]
         }
     }
}

Thanks,
-Siwei

>
>> Thanks,
>> -Siwei
>>
>>>> Regards,
>>>> -Siwei
>>>>
>>>>>>>> Looks to me there are still some loose end I don't quite yet
>>>>>>>> understand.
>>>>>>>>
>>>>>>>>
>>>>>>>>>>>                   "queue_index": 0,
>>>>>>> I think this can be removed since the command is for a specific index.
>>>>>>>
>>>>>>>>>>>                   "name": "received_desc",
>>>>>>>>>>>                   "value": 417776,
>>>>>>>>>>>                   "name": "completed_desc",
>>>>>>>>>>>                   "value": 417548
>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
>>>>>>>>>> & value pairs in an array instead of flat entries in json's
>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
>>>>>>>> This comment is missed for some reason. Please change the example
>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
>>>>>>>> keeping the order for json hash/dictionary is fine.
>>>>>>> Sorry for skipping this comment.
>>>>>>> Do you mean to present the information like:
>>>>>>> "received_desc": 417776,
>>>>>>> "completed_desc": 417548,
>>>>>> I mean the following presentation:
>>>>>>
>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>         "vstats": {
>>>>>>             "vdpa-a": {
>>>>>>                 "queue_stats": [{
>>>>>>                     "queue_index": 0,
>>>>>>                     "queue_type": "rx",
>>>>>>                     "stat_name": [ "received_desc","completed_desc" ],
>>>>>>                     "stat_value": [ 417776,417548 ],
>>>>>>                 }]
>>>>>>             }
>>>>>>         }
>>>>>> }
>>>>>>
>>>>>> I think Parav had similar suggestion, too.
>>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>>> Thanks,
>>>>>>>> -Siwei
>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> -Siwei
>>>>>>>>>>>               }
>>>>>>>>>>>           }
>>>>>>>>>>> }
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>>>>> ---
>>>>>>>>>>>        drivers/vdpa/vdpa.c       | 129
>>>>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>>        include/linux/vdpa.h      |   5 ++
>>>>>>>>>>>        include/uapi/linux/vdpa.h |   7 +++
>>>>>>>>>>>        3 files changed, 141 insertions(+)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>>>>>> *vdev,
>>>>>>>> struct sk_buff *msg, u32 portid,
>>>>>>>>>>>        	return err;
>>>>>>>>>>>        }
>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>>>>>> +sk_buff
>>>>>>>> *msg,
>>>>>>>>>>> +			       struct genl_info *info, u32 index) {
>>>>>>>>>>> +	int err;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>>>> +
>>>>>>>>>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
>>>>>>>>>>> +info-
>>>>>>>>> extack);
>>>>>>>>>>> +	if (err)
>>>>>>>>>>> +		return err;
>>>>>>>>>>> +
>>>>>>>>>>> +	return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
>>>>>>>>>>> +sk_buff
>>>>>> *msg,
>>>>>>>>>>> +			     struct genl_info *info, u32 index) {
>>>>>>>>>>> +	int err;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!vdev->config->get_vendor_vq_stats)
>>>>>>>>>>> +		return -EOPNOTSUPP;
>>>>>>>>>>> +
>>>>>>>>>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>>>>>> +	if (err)
>>>>>>>>>>> +		return err;
>>>>>>>>>>> +
>>>>>>>>>>> +	return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>>>>>> +				      struct sk_buff *msg,
>>>>>>>>>>> +				      struct genl_info *info, u32 index) {
>>>>>>>>>>> +	u32 device_id;
>>>>>>>>>>> +	void *hdr;
>>>>>>>>>>> +	int err;
>>>>>>>>>>> +	u32 portid = info->snd_portid;
>>>>>>>>>>> +	u32 seq = info->snd_seq;
>>>>>>>>>>> +	u32 flags = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>>>>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>>>>>>>>>> +	if (!hdr)
>>>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>>>> dev_name(&vdev-
>>>>>>>>> dev))) {
>>>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>>>> +		goto undo_msg;
>>>>>>>>>>> +	}
>>>>>>>>>>> +
>>>>>>>>>>> +	device_id = vdev->config->get_device_id(vdev);
>>>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>>>> +		goto undo_msg;
>>>>>>>>>>> +	}
>>>>>>>>>>> +
>>>>>>>>>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>> +
>>>>>>>>>>> +	genlmsg_end(msg, hdr);
>>>>>>>>>>> +
>>>>>>>>>>> +	return err;
>>>>>>>>>>> +
>>>>>>>>>>> +undo_msg:
>>>>>>>>>>> +	genlmsg_cancel(msg, hdr);
>>>>>>>>>>> +	return err;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>        static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>>>>>> *skb, struct
>>>>>>>> genl_info *info)
>>>>>>>>>>>        {
>>>>>>>>>>>        	struct vdpa_device *vdev;
>>>>>>>>>>> @@ -990,6 +1058,60 @@
>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>>>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>>>>>        	return msg->len;
>>>>>>>>>>>        }
>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>>>>>> +					  struct genl_info *info)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct vdpa_device *vdev;
>>>>>>>>>>> +	struct sk_buff *msg;
>>>>>>>>>>> +	const char *devname;
>>>>>>>>>>> +	struct device *dev;
>>>>>>>>>>> +	u32 index;
>>>>>>>>>>> +	int err;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>>>>>> +		return -EINVAL;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>>>>>> +		return -EINVAL;
>>>>>>>>>>> +
>>>>>>>>>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>>>>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>>>>>> +	if (!msg)
>>>>>>>>>>> +		return -ENOMEM;
>>>>>>>>>>> +
>>>>>>>>>>> +	index = nla_get_u32(info-
>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>>>>>> +	mutex_lock(&vdpa_dev_mutex);
>>>>>>>>>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>>>>>> vdpa_name_match);
>>>>>>>>>>> +	if (!dev) {
>>>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not
>>>> found");
>>>>>>>>>>> +		err = -ENODEV;
>>>>>>>>>>> +		goto dev_err;
>>>>>>>>>>> +	}
>>>>>>>>>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>>>>>> +	if (!vdev->mdev) {
>>>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>>>> vdpa
>>>>>>>> device");
>>>>>>>>>>> +		err = -EINVAL;
>>>>>>>>>>> +		goto mdev_err;
>>>>>>>>>>> +	}
>>>>>>>>>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>> +	if (!err)
>>>>>>>>>>> +		err = genlmsg_reply(msg, info);
>>>>>>>>>>> +
>>>>>>>>>>> +	put_device(dev);
>>>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>> +
>>>>>>>>>>> +	if (err)
>>>>>>>>>>> +		nlmsg_free(msg);
>>>>>>>>>>> +
>>>>>>>>>>> +	return err;
>>>>>>>>>>> +
>>>>>>>>>>> +mdev_err:
>>>>>>>>>>> +	put_device(dev);
>>>>>>>>>>> +dev_err:
>>>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>> +	return err;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>        static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>>>> = {
>>>>>>>>>>>        	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>>>>>> NLA_NUL_STRING },
>>>>>>>>>>>        	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>>>>>> }, @@ -
>>>>>>>> 997,6
>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>>>>>        	[VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>>>>>> NLA_POLICY_ETH_ADDR,
>>>>>>>>>>>        	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>>>>>        	[VDPA_ATTR_DEV_NET_CFG_MTU] =
>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>>>>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] =
>>>> NLA_POLICY_RANGE(NLA_U32, 0,
>>>>>>>> 65535),
>>>>>>>>>>>        };
>>>>>>>>>>>        static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>>>>>        		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>>>>>        		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>>>>>        	},
>>>>>>>>>>> +	{
>>>>>>>>>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>> +		.validate = GENL_DONT_VALIDATE_STRICT |
>>>>>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>>>>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>>>>>> +		.flags = GENL_ADMIN_PERM,
>>>>>>>>>>> +	},
>>>>>>>>>>>        };
>>>>>>>>>>>        static struct genl_family vdpa_nl_family __ro_after_init =
>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>>>>>> --- a/include/linux/vdpa.h
>>>>>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>>>>>        			    const struct vdpa_vq_state *state);
>>>>>>>>>>>        	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>        			    struct vdpa_vq_state *state);
>>>>>>>>>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>> +				   struct sk_buff *msg,
>>>>>>>>>>> +				   struct netlink_ext_ack *extack);
>>>>>>>>>>>        	struct vdpa_notification_area
>>>>>>>>>>>        	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>>>>>        	/* vq irq is not expected to be changed once DRIVER_OK is
>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>>>>>        int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>        void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>>>>>> +
>>>>>>>>>>>        #endif /* _LINUX_VDPA_H */
>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
>>>>>>>>>>> 100644
>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>>>>>        	VDPA_CMD_DEV_DEL,
>>>>>>>>>>>        	VDPA_CMD_DEV_GET,		/* can dump */
>>>>>>>>>>>        	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>>>>>>>>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>        };
>>>>>>>>>>>        enum vdpa_attr {
>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>>>>>        	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>>>>>>>>>        	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/*
>>>>>> u32 */
>>>>>>>>>>>        	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>>>>>>>>>> +
>>>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/*
>>>> string */
>>>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>>>>>> +
>>>>>>>>>>>        	/* new attributes must be added above here */
>>>>>>>>>>>        	VDPA_ATTR_MAX,
>>>>>>>>>>>        };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
  2022-03-14  6:25                       ` Jason Wang
@ 2022-03-15  8:11                         ` Si-Wei Liu
  0 siblings, 0 replies; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-15  8:11 UTC (permalink / raw)
  To: Jason Wang, Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma


[-- Attachment #1.1: Type: text/plain, Size: 26921 bytes --]



On 3/13/2022 11:25 PM, Jason Wang wrote:
>
>
> On Sun, Mar 13, 2022 at 11:26 PM Eli Cohen <elic@nvidia.com> wrote:
>
>     > On 3/8/2022 9:07 PM, Eli Cohen wrote:
>     > >
>     > >> -----Original Message-----
>     > >> From: Si-Wei Liu <si-wei.liu@oracle.com>
>     > >> Sent: Wednesday, March 9, 2022 5:33 AM
>     > >> To: Eli Cohen <elic@nvidia.com>
>     > >> Cc: mst@redhat.com; jasowang@redhat.com;
>     virtualization@lists.linux-
>     > >> foundation.org
>     <https://urldefense.com/v3/__http://foundation.org__;!!ACWV5N9M2RV99hQ!YPMORFmIws8PtrpKEDUfF-5a3cXRrZiABBLXYHHuLKRi3vHz9Uw2vznSWKi79mpV$>;
>     eperezma@redhat.com; amorenoz@redhat.com;
>     > >> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
>     <parav@nvidia.com>
>     > >> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying
>     vendor statistics
>     > >>
>     > >>
>     > >>
>     > >> On 3/8/2022 6:13 AM, Eli Cohen wrote:
>     > >>>> -----Original Message-----
>     > >>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>     > >>>> Sent: Tuesday, March 8, 2022 8:16 AM
>     > >>>> To: Eli Cohen <elic@nvidia.com>
>     > >>>> Cc: mst@redhat.com; jasowang@redhat.com;
>     virtualization@lists.linux-
>     > >>>> foundation.org
>     <https://urldefense.com/v3/__http://foundation.org__;!!ACWV5N9M2RV99hQ!YPMORFmIws8PtrpKEDUfF-5a3cXRrZiABBLXYHHuLKRi3vHz9Uw2vznSWKi79mpV$>;
>     eperezma@redhat.com; amorenoz@redhat.com;
>     > >>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit
>     > >>>> <parav@nvidia.com>
>     > >>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying
>     vendor
>     > >>>> statistics
>     > >>>>
>     > >>>>
>     > >>>>
>     > >>>> On 3/6/2022 11:57 PM, Eli Cohen wrote:
>     > >>>>>> -----Original Message-----
>     > >>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>     > >>>>>> Sent: Saturday, March 5, 2022 12:34 AM
>     > >>>>>> To: Eli Cohen <elic@nvidia.com>
>     > >>>>>> Cc: mst@redhat.com; jasowang@redhat.com;
>     > >>>>>> virtualization@lists.linux- foundation.org
>     <https://urldefense.com/v3/__http://foundation.org__;!!ACWV5N9M2RV99hQ!YPMORFmIws8PtrpKEDUfF-5a3cXRrZiABBLXYHHuLKRi3vHz9Uw2vznSWKi79mpV$>;
>     eperezma@redhat.com;
>     > >>>>>> amorenoz@redhat.com; lvivier@redhat.com; sgarzare@redhat.com;
>     > >> Parav
>     > >>>>>> Pandit <parav@nvidia.com>
>     > >>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for
>     querying vendor
>     > >>>>>> statistics
>     > >>>>>>
>     > >>>>>> Sorry, I somehow missed this after my break. Please see
>     comments in
>     > >> line.
>     > >>>>>> On 2/16/2022 10:46 PM, Eli Cohen wrote:
>     > >>>>>>> On Wed, Feb 16, 2022 at 10:49:26AM -0800, Si-Wei Liu wrote:
>     > >>>>>>>> On 2/16/2022 12:00 AM, Eli Cohen wrote:
>     > >>>>>>>>> Allows to read vendor statistics of a vdpa device. The
>     specific
>     > >>>>>>>>> statistics data is received by the upstream driver in
>     the form
>     > >>>>>>>>> of an (attribute name, attribute value) pairs.
>     > >>>>>>>>>
>     > >>>>>>>>> An example of statistics for mlx5_vdpa device are:
>     > >>>>>>>>>
>     > >>>>>>>>> received_desc - number of descriptors received by the
>     virtqueue
>     > >>>>>>>>> completed_desc - number of descriptors completed by the
>     > >>>>>>>>> virtqueue
>     > >>>>>>>>>
>     > >>>>>>>>> A descriptor using indirect buffers is still counted
>     as 1. In
>     > >>>>>>>>> addition, N chained descriptors are counted correctly
>     N times as
>     > >>>>>>>>> one
>     > >>>>>> would expect.
>     > >>>>>>>>> A new callback was added to vdpa_config_ops which
>     provides the
>     > >>>>>>>>> means for the vdpa driver to return statistics results.
>     > >>>>>>>>>
>     > >>>>>>>>> The interface allows for reading all the supported
>     virtqueues,
>     > >>>>>>>>> including the control virtqueue if it exists.
>     > >>>>>>>>>
>     > >>>>>>>>> Below are some examples taken from mlx5_vdpa which are
>     > >>>>>>>>> introduced in the following patch:
>     > >>>>>>>>>
>     > >>>>>>>>> 1. Read statistics for the virtqueue at index 1
>     > >>>>>>>>>
>     > >>>>>>>>> $ vdpa dev vstats show vdpa-a qidx 1
>     > >>>>>>>>> vdpa-a:
>     > >>>>>>>>> queue_type tx queue_index 1 received_desc 3844836
>     > >> completed_desc
>     > >>>>>>>>> 3844836
>     > >>>>>>>>>
>     > >>>>>>>>> 2. Read statistics for the virtqueue at index 32 $
>     vdpa dev
>     > >>>>>>>>> vstats show vdpa-a qidx 32
>     > >>>>>>>>> vdpa-a:
>     > >>>>>>>>> queue_type control_vq queue_index 32 received_desc 62
>     > >>>>>>>>> completed_desc
>     > >>>>>>>>> 62
>     > >>>>>>>>>
>     > >>>>>>>>> 3. Read statisitics for the virtqueue at index 0 with json
>     > >>>>>>>>> output $ vdpa -j dev vstats show vdpa-a qidx 0
>     > >>>>>>>>> {"vstats":{"vdpa-a":{
>     > >>>>>>>>>
>     > >>>>>>
>     "queue_type":"rx","queue_index":0,"name":"received_desc","value":41
>     > >>>>>> 77
>     > >>>>>> 76,\
>     > >>>>>>>>>  "name":"completed_desc","value":417548}}}
>     > >>>>>>>>>
>     > >>>>>>>>> 4. Read statistics for the virtqueue at index 0 with
>     preety json
>     > >>>>>>>>> output $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>     > >>>>>>>>>          "vstats": {
>     > >>>>>>>>> "vdpa-a": {
>     > >>>>>>>>>
>     > >>>>>>>>> "queue_type": "rx",
>     > >>>>>>>> I wonder where this info can be inferred? I don't see
>     relevant
>     > >>>>>>>> change in the patch series that helps gather the
>     > >>>> VDPA_ATTR_DEV_QUEUE_TYPE?
>     > >>>>>>>> Is this an arbitrary string defined by the vendor as
>     well? If so,
>     > >>>>>>>> how does the user expect to consume it?
>     > >>>>>>> The queue tupe is deduced from the index and whether we
>     have a
>     > >>>>>>> virtqueue. Even numbers are rx, odd numbers are tx and
>     if there is
>     > >>>>>>> CVQ, the last one is CVQ.
>     > >>>>>> OK, then VDPA_ATTR_DEV_QUEUE_TYPE attribute introduced in
>     this
>     > >>>>>> patch might not be useful at all?
>     > >>>>> Right, will remove.
>     > >>>>>
>     > >>>>>> And how do you determine in the vdpa tool if CVQ is
>     negotiated or
>     > >>>>>> not?
>     > >>>>> I make a netlink call to get the same information as "
>     vdpa dev config
>     > >> show"
>     > >>>> retrieves. I use the negotiated features to determine if a
>     CVQ is
>     > >>>> available. If it is, the number of VQs equals the control
>     VQ index.
>     > >>>> So there are two netlink calls under the hood.
>     > >>>> The lock vdpa_dev_mutex won't hold across the two separate
>     netlink
>     > >>>> calls, and it may end up with inconsistent state -
>     theoretically
>     > >>>> things could happen like that the first call gets CVQ
>     negotiated, but
>     > >>>> the later call for
>     > >>>> get_vendor_vq_stats() on the cvq might get -EINVAL due to
>     device
>     > >>>> reset. Can the negotiated status and stat query be done
>     within one single
>     > >> netlink call?
>     > >>> I see your concern.
>     > >>> The only reason I do the extra call is to know if we have a
>     control VQ
>     > >>> and what index it is, just to print a descriptive string
>     telling if it's a either rx,
>     > >> tx or control VQ.
>     > >>> So the cure can be simple. Let's have a new attribute that
>     returns the
>     > >>> type of virtqueue.
>     > >> I am not sure I follow the cure. Wouldn't it be possible to
>     get both negotiated
>     > >> status and the queue stat in vdpa_nl_cmd_dev_stats_get_doit()
>     under the
>     > >> same vdpa_dev_mutex lock?
>     > > Yes we can, but I suggested to get only the type of the queue
>     as a new attribute.
>     > > The kernel will do the digest and decide per a given VQ if
>     it's rx, tx or control and
>     > > return the result in that new attribute.
>     > The rx, tx and control queue type is net specific, while the
>     vdpa core
>     > is currently agnostic to the vdpa class.
>     >
>     > >
>     > >> And I am not even sure if it is a must to display
>     > >> the queue type - it doesn't seem the output includes the vdpa
>     class info, which
>     > >> makes it hard for script to parse the this field in generic way.
>     > > I don't get you. You say you don't think you need the queue
>     type and at the same
>     > > time you're concerned lack of information makes it hard for
>     scripts.
>     > > BTW, class info is something you can get for the device
>     through "vdpa dev show"
>     > > so your know the class of your device.
>     > Stepping back, may I ask if there's a case that queue type
>     specific stat
>     > may be defined by vendor, such that deciphering of certain
>     vendor stat
>     > would need type specific knowledge? So far the received_desc and
>     > completed_desc stats offered through the mlx5_vdpa patch look to be
>     > general ones and not associated with any queue type in
>     particular. Is
>     > there some future stat in your mind that needs specific knowledge of
>     > queue type and vdpa class?
>
>     No, the only reason for displaying the queue type is to help users
>     know kind of queue they're looking at.
>
>     >
>     > I'd prefer the vstat output to be self-contained and
>     self-descriptive.
>     > You may argue the class of vdpa never changes in "vdpa dev show"
>     after
>     > creation. This is true, however the queue type is not - say you
>     got a
>     > control queue for qindex 2, but the next moment you may get a rx
>     queue
>     > with the same qindex.
>
>     I don't think this is possible unless you destroyed the device and
>     re-created it.
>     What operation do you think could cause that?
>
>     > Particularly you seem want to tie this with queue
>     > index in the guest view, which is quite dynamic for host admin
>     or script
>     > running on the host to follow.
>
>     For rx and tx queues, some index may become invalid if the user
>     changed
>     the number of queues with ethtool -L but I don't think this is an
>     issue.
>     >
>     > >
>     > >>>    I think Jason did not like the idea of communicating the
>     kind of VQ
>     > >>> from kernel to userspace but under these circumstances,
>     maybe he would
>     > >> approve.
>     > >>> Jason?
>     > >>>
>     > >>>> What worried me is that the queue index being dynamic and
>     depended on
>     > >>>> negotiation status would make host admin user quite hard to
>     follow.
>     > >>>> The guest may or may not advertise F_MQ and/or F_CTRL_VQ across
>     > >> various phases, e.g.
>     > >>>> firmware (UEFI), boot loader (grub) till OS driver is up
>     and running,
>     > >>>> which can be agnostic to host admin. For most of the part
>     it's not
>     > >>>> easy to script and predict the queue index which can change
>     from time
>     > >>>> to time. Can we define the order of host predictable queue
>     index,
>     > >>>> which is independent from any guest negotiated state?
>     > >> Here I think we can just use the plain queue index in the
>     host view - say if vdpa
>     > >> net has 4 pairs of data vqs and 1 control vq, user may use
>     qindex 8 across the
>     > >> board to identify the control vq, regardless if the F_MQ
>     feature is negotiated
>     > >> or not in guest.
>     > > Right, but the idea that a userspace tool should provide
>     useful information to the
>     > > user so it does not need to do complex logic to infer that
>     from bare data.
>     > The host side qindex and qtype would never change regardless of
>     guest
>     > feature negotiation, by nature it reflects the real construct
>     and object
>     > in the hardware.
>
>
> This should be possible for vendor specific stats. But I'm afraid it 
> may cause more confusion since the spec doesn't have the concept like 
> "host queue index".
I am not sure if worth involving spec changes, but thinking it as a host 
side handle that can be constantly used to identify a certain vdpa 
device queue. Which should be vdpa specifics. Typically host admins 
would need a consistent handle without having to know or blindly guess 
the guest qindex, or query the negotiated features using another command 
upfront (which as said can be racy and problematic).

> And to be self descriptive the vendor need also display the mappings 
> between virtqueue index and host(vendor) queue index.
That's exactly what I had in mind. To ensure atomicity and 
self-describablity, it's a must to display both queue indexes in single 
vstat query output for a specific queue. See the example at the bottom 
of my early reply to Eli's email.

Thanks,
-Siwei

>
> Thanks
>
>     I don't feel it's a simple task for host users to
>     > figure out the correct guest side qindex for the control queue
>     -  it's
>     > always racy for one to check some other vdpa command output if
>     the vstat
>     > output is not self-contained.
>
>     So what are you actually proposing? Display received and completed
>     descriptors
>     per queue index without further interpretation?
>
>     >
>     > Thanks,
>     > -Siwei
>     >
>     > >
>     > >>
>     > >> Regards,
>     > >> -Siwei
>     > >>
>     > >>>>>> Looks to me there are still some loose end I don't quite yet
>     > >>>>>> understand.
>     > >>>>>>
>     > >>>>>>
>     > >>>>>>>>> "queue_index": 0,
>     > >>>>> I think this can be removed since the command is for a
>     specific index.
>     > >>>>>
>     > >>>>>>>>> "name": "received_desc",
>     > >>>>>>>>> "value": 417776,
>     > >>>>>>>>> "name": "completed_desc",
>     > >>>>>>>>> "value": 417548
>     > >>>>>>>> Not for this kernel patch, but IMHO it's the best to
>     put the name
>     > >>>>>>>> & value pairs in an array instead of flat entries in json's
>     > >>>>>>>> hash/dictionary. The hash entries can be re-ordered
>     deliberately
>     > >>>>>>>> by external json parsing tool, ending up with
>     inconsistent stat values.
>     > >>>>>> This comment is missed for some reason. Please change the
>     example
>     > >>>>>> in the log if you agree to address it in vdpa tool. Or
>     justify why
>     > >>>>>> keeping the order for json hash/dictionary is fine.
>     > >>>>> Sorry for skipping this comment.
>     > >>>>> Do you mean to present the information like:
>     > >>>>> "received_desc": 417776,
>     > >>>>> "completed_desc": 417548,
>     > >>>> I mean the following presentation:
>     > >>>>
>     > >>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>     > >>>>        "vstats": {
>     > >>>>            "vdpa-a": {
>     > >>>>                "queue_stats": [{
>     > >>>>                    "queue_index": 0,
>     > >>>>                    "queue_type": "rx",
>     > >>>>                    "stat_name": [
>     "received_desc","completed_desc" ],
>     > >>>>                    "stat_value": [ 417776,417548 ],
>     > >>>>                }]
>     > >>>>            }
>     > >>>>        }
>     > >>>> }
>     > >>>>
>     > >>>> I think Parav had similar suggestion, too.
>     > >>>>
>     > >>>> Thanks,
>     > >>>> -Siwei
>     > >>>>
>     > >>>>>> Thanks,
>     > >>>>>> -Siwei
>     > >>>>>>
>     > >>>>>>>> Thanks,
>     > >>>>>>>> -Siwei
>     > >>>>>>>>>              }
>     > >>>>>>>>>          }
>     > >>>>>>>>> }
>     > >>>>>>>>>
>     > >>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>     > >>>>>>>>> ---
>     > >>>>>>>>>  drivers/vdpa/vdpa.c       | 129
>     > >>>>>> ++++++++++++++++++++++++++++++++++++++
>     > >>>>>>>>>  include/linux/vdpa.h      |   5 ++
>     > >>>>>>>>>  include/uapi/linux/vdpa.h |   7 +++
>     > >>>>>>>>>       3 files changed, 141 insertions(+)
>     > >>>>>>>>>
>     > >>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>     index
>     > >>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>     > >>>>>>>>> --- a/drivers/vdpa/vdpa.c
>     > >>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>     > >>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct
>     vdpa_device
>     > >>>>>>>>> *vdev,
>     > >>>>>> struct sk_buff *msg, u32 portid,
>     > >>>>>>>>>             return err;
>     > >>>>>>>>>       }
>     > >>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device
>     *vdev, struct
>     > >>>>>>>>> +sk_buff
>     > >>>>>> *msg,
>     > >>>>>>>>> +         struct genl_info *info, u32 index) {
>     > >>>>>>>>> +   int err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX,
>     index))
>     > >>>>>>>>> +           return -EMSGSIZE;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   err = vdev->config->get_vendor_vq_stats(vdev,
>     index, msg,
>     > >>>>>>>>> +info-
>     > >>>>>>> extack);
>     > >>>>>>>>> +   if (err)
>     > >>>>>>>>> +           return err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   return 0;
>     > >>>>>>>>> +}
>     > >>>>>>>>> +
>     > >>>>>>>>> +static int vendor_stats_fill(struct vdpa_device
>     *vdev, struct
>     > >>>>>>>>> +sk_buff
>     > >>>> *msg,
>     > >>>>>>>>> +       struct genl_info *info, u32 index) {
>     > >>>>>>>>> +   int err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   if (!vdev->config->get_vendor_vq_stats)
>     > >>>>>>>>> +           return -EOPNOTSUPP;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   err = vdpa_fill_stats_rec(vdev, msg, info, index);
>     > >>>>>>>>> +   if (err)
>     > >>>>>>>>> +           return err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   return 0;
>     > >>>>>>>>> +}
>     > >>>>>>>>> +
>     > >>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct
>     vdpa_device *vdev,
>     > >>>>>>>>> +                struct sk_buff *msg,
>     > >>>>>>>>> +                struct genl_info *info, u32 index) {
>     > >>>>>>>>> +   u32 device_id;
>     > >>>>>>>>> +   void *hdr;
>     > >>>>>>>>> +   int err;
>     > >>>>>>>>> +   u32 portid = info->snd_portid;
>     > >>>>>>>>> +   u32 seq = info->snd_seq;
>     > >>>>>>>>> +   u32 flags = 0;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   hdr = genlmsg_put(msg, portid, seq,
>     &vdpa_nl_family, flags,
>     > >>>>>>>>> +    VDPA_CMD_DEV_VSTATS_GET);
>     > >>>>>>>>> +   if (!hdr)
>     > >>>>>>>>> +           return -EMSGSIZE;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>     > >> dev_name(&vdev-
>     > >>>>>>> dev))) {
>     > >>>>>>>>> +           err = -EMSGSIZE;
>     > >>>>>>>>> +           goto undo_msg;
>     > >>>>>>>>> +   }
>     > >>>>>>>>> +
>     > >>>>>>>>> +   device_id = vdev->config->get_device_id(vdev);
>     > >>>>>>>>> +   if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>     > >>>>>>>>> +           err = -EMSGSIZE;
>     > >>>>>>>>> +           goto undo_msg;
>     > >>>>>>>>> +   }
>     > >>>>>>>>> +
>     > >>>>>>>>> +   err = vendor_stats_fill(vdev, msg, info, index);
>     > >>>>>>>>> +
>     > >>>>>>>>> +  genlmsg_end(msg, hdr);
>     > >>>>>>>>> +
>     > >>>>>>>>> +   return err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +undo_msg:
>     > >>>>>>>>> +  genlmsg_cancel(msg, hdr);
>     > >>>>>>>>> +   return err;
>     > >>>>>>>>> +}
>     > >>>>>>>>> +
>     > >>>>>>>>>       static int
>     vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>     > >>>>>>>>> *skb, struct
>     > >>>>>> genl_info *info)
>     > >>>>>>>>>       {
>     > >>>>>>>>>             struct vdpa_device *vdev;
>     > >>>>>>>>> @@ -990,6 +1058,60 @@
>     > >> vdpa_nl_cmd_dev_config_get_dumpit(struct
>     > >>>>>> sk_buff *msg, struct netlink_callback *
>     > >>>>>>>>>             return msg->len;
>     > >>>>>>>>>       }
>     > >>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct
>     sk_buff *skb,
>     > >>>>>>>>> +                    struct genl_info *info)
>     > >>>>>>>>> +{
>     > >>>>>>>>> +   struct vdpa_device *vdev;
>     > >>>>>>>>> +   struct sk_buff *msg;
>     > >>>>>>>>> +   const char *devname;
>     > >>>>>>>>> +   struct device *dev;
>     > >>>>>>>>> +   u32 index;
>     > >>>>>>>>> +   int err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   if (!info->attrs[VDPA_ATTR_DEV_NAME])
>     > >>>>>>>>> +           return -EINVAL;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>     > >>>>>>>>> +           return -EINVAL;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>     > >>>>>>>>> +   msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>     > >>>>>>>>> +   if (!msg)
>     > >>>>>>>>> +           return -ENOMEM;
>     > >>>>>>>>> +
>     > >>>>>>>>> +   index = nla_get_u32(info-
>     > >>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>     > >>>>>>>>> +  mutex_lock(&vdpa_dev_mutex);
>     > >>>>>>>>> +   dev = bus_find_device(&vdpa_bus, NULL, devname,
>     > >>>>>> vdpa_name_match);
>     > >>>>>>>>> +   if (!dev) {
>     > >>>>>>>>> +  NL_SET_ERR_MSG_MOD(info->extack, "device not
>     > >> found");
>     > >>>>>>>>> +           err = -ENODEV;
>     > >>>>>>>>> +           goto dev_err;
>     > >>>>>>>>> +   }
>     > >>>>>>>>> +   vdev = container_of(dev, struct vdpa_device, dev);
>     > >>>>>>>>> +   if (!vdev->mdev) {
>     > >>>>>>>>> +  NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>     > >> vdpa
>     > >>>>>> device");
>     > >>>>>>>>> +           err = -EINVAL;
>     > >>>>>>>>> +           goto mdev_err;
>     > >>>>>>>>> +   }
>     > >>>>>>>>> +   err = vdpa_dev_vendor_stats_fill(vdev, msg, info,
>     index);
>     > >>>>>>>>> +   if (!err)
>     > >>>>>>>>> +           err = genlmsg_reply(msg, info);
>     > >>>>>>>>> +
>     > >>>>>>>>> +  put_device(dev);
>     > >>>>>>>>> +  mutex_unlock(&vdpa_dev_mutex);
>     > >>>>>>>>> +
>     > >>>>>>>>> +   if (err)
>     > >>>>>>>>> +  nlmsg_free(msg);
>     > >>>>>>>>> +
>     > >>>>>>>>> +   return err;
>     > >>>>>>>>> +
>     > >>>>>>>>> +mdev_err:
>     > >>>>>>>>> +  put_device(dev);
>     > >>>>>>>>> +dev_err:
>     > >>>>>>>>> +  mutex_unlock(&vdpa_dev_mutex);
>     > >>>>>>>>> +   return err;
>     > >>>>>>>>> +}
>     > >>>>>>>>> +
>     > >>>>>>>>>       static const struct nla_policy
>     vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>     > >> = {
>     > >>>>>>>>>  [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>     > >>>> NLA_NUL_STRING },
>     > >>>>>>>>>  [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>     > >>>> }, @@ -
>     > >>>>>> 997,6
>     > >>>>>>>>> +1119,7 @@ static const struct nla_policy
>     > >>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>     > >>>>>>>>>  [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>     > >>>> NLA_POLICY_ETH_ADDR,
>     > >>>>>>>>>             /* virtio spec 1.1 section 5.1.4.1 for
>     valid MTU range */
>     > >>>>>>>>>  [VDPA_ATTR_DEV_NET_CFG_MTU] =
>     > >>>> NLA_POLICY_MIN(NLA_U16, 68),
>     > >>>>>>>>> +  [VDPA_ATTR_DEV_QUEUE_INDEX] =
>     > >> NLA_POLICY_RANGE(NLA_U32, 0,
>     > >>>>>> 65535),
>     > >>>>>>>>>       };
>     > >>>>>>>>>       static const struct genl_ops vdpa_nl_ops[] = {
>     @@ -1030,6
>     > >>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>     > >>>>>>>>>  .doit = vdpa_nl_cmd_dev_config_get_doit,
>     > >>>>>>>>>  .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>     > >>>>>>>>>             },
>     > >>>>>>>>> +   {
>     > >>>>>>>>> +           .cmd = VDPA_CMD_DEV_VSTATS_GET,
>     > >>>>>>>>> +  .validate = GENL_DONT_VALIDATE_STRICT |
>     > >>>>>> GENL_DONT_VALIDATE_DUMP,
>     > >>>>>>>>> +           .doit = vdpa_nl_cmd_dev_stats_get_doit,
>     > >>>>>>>>> +           .flags = GENL_ADMIN_PERM,
>     > >>>>>>>>> +   },
>     > >>>>>>>>>       };
>     > >>>>>>>>>       static struct genl_family vdpa_nl_family
>     __ro_after_init =
>     > >>>>>>>>> { diff --git a/include/linux/vdpa.h
>     b/include/linux/vdpa.h index
>     > >>>>>>>>> 2de442ececae..274203845cfc 100644
>     > >>>>>>>>> --- a/include/linux/vdpa.h
>     > >>>>>>>>> +++ b/include/linux/vdpa.h
>     > >>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>     > >>>>>>>>>              const struct vdpa_vq_state *state);
>     > >>>>>>>>>             int (*get_vq_state)(struct vdpa_device
>     *vdev, u16 idx,
>     > >>>>>>>>>              struct vdpa_vq_state *state);
>     > >>>>>>>>> +   int (*get_vendor_vq_stats)(struct vdpa_device
>     *vdev, u16 idx,
>     > >>>>>>>>> +             struct sk_buff *msg,
>     > >>>>>>>>> +             struct netlink_ext_ack *extack);
>     > >>>>>>>>>             struct vdpa_notification_area
>     > >>>>>>>>>  (*get_vq_notification)(struct vdpa_device *vdev, u16
>     idx);
>     > >>>>>>>>>             /* vq irq is not expected to be changed
>     once DRIVER_OK is
>     > >>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>     > >>>>>>>>>       int vdpa_mgmtdev_register(struct vdpa_mgmt_dev
>     *mdev);
>     > >>>>>>>>>       void vdpa_mgmtdev_unregister(struct
>     vdpa_mgmt_dev *mdev);
>     > >>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>     > >>>>>>>>> +
>     > >>>>>>>>>       #endif /* _LINUX_VDPA_H */
>     > >>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>     > >>>>>>>>> b/include/uapi/linux/vdpa.h index
>     1061d8d2d09d..c5f229a41dc2
>     > >>>>>>>>> 100644
>     > >>>>>>>>> --- a/include/uapi/linux/vdpa.h
>     > >>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>     > >>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>     > >>>>>>>>>  VDPA_CMD_DEV_DEL,
>     > >>>>>>>>>  VDPA_CMD_DEV_GET,               /* can dump */
>     > >>>>>>>>>  VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
>     > >>>>>>>>> +  VDPA_CMD_DEV_VSTATS_GET,
>     > >>>>>>>>>       };
>     > >>>>>>>>>       enum vdpa_attr {
>     > >>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>     > >>>>>>>>>  VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
>     > >>>>>>>>>  VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
>     > >>>> u32 */
>     > >>>>>>>>>  VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
>     > >>>>>>>>> +
>     > >>>>>>>>> +  VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>     > >>>>>>>>> +  VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>     > >>>>>>>>> +  VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
>     > >> string */
>     > >>>>>>>>> +  VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>     > >>>>>>>>> +
>     > >>>>>>>>>             /* new attributes must be added above here */
>     > >>>>>>>>>  VDPA_ATTR_MAX,
>     > >>>>>>>>>       };
>

[-- Attachment #1.2: Type: text/html, Size: 55804 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]                         ` <DM8PR12MB540054565515158F9209723EAB109@DM8PR12MB5400.namprd12.prod.outlook.com>
@ 2022-03-16  6:52                           ` Si-Wei Liu
       [not found]                             ` <DM8PR12MB5400E7B2359FE4797F190AC5AB119@DM8PR12MB5400.namprd12.prod.outlook.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-16  6:52 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 3/15/2022 2:10 AM, Eli Cohen wrote:

<...snip...>

>> Say you got a vdpa net device created with 4 data queue pairs and a
>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
>> not F_MQ, then the index for the control vq in guest ends up with 2, as
>> in this case there's only a single queue pair enabled for rx (index 0)
>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
>> control vq is the last vq following 8
> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
> that control VQ index is 2
Right, but I don't see this feature negotiation info getting returned 
from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do 
you plan for host user to get this info? If you meant another "vdpa dev 
show" command to query negotiated features ahead, this won't get the 
same lock protected as the time you run the stat query. It's very easy 
to miss that ephemeral queue index.

>> data vqs of all 4 pairs, hence got
>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
>> stats available, and the rest of qindex would get invalid/empty stat.
>>
>> Later on say boot continues towards loading the Linux virtio driver,
>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
>> features. In this case, all 8 data virtqueues are fully enabled, the
>> index for the control vq ends up as 8, following tightly after all the 4
>> data queue pairs. Only until both features are negotiated, the guest and
>> host are able to see consistent view in identifying the control vq.
>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
>> should have vendor stats available.
>>
>> That's why I said the guest qindex is ephemeral and hard to predict
>> subjected to negotiated features, but host qindex is reliable and more
>> eligible for command line identification purpose.
>>
<...snip...>
>>> So what are you actually proposing? Display received and completed descriptors
>>> per queue index without further interpretation?
>> I'd suggest using a more stable queue id i.e. the host queue index to
>> represent the qidx (which seems to be what you're doing now?), and
>> displaying both the host qindex (queue_index_device in the example
>> below), as well as the guest's (queue_index_driver as below) in the output:
>>
> Given that per vdpa device you can display statistics only after features have
> been negotiated, you can always know the correct queue index for the control
> VQ.
The stats can be displayed only after features are negotiated, and only 
when the corresponding queue is enabled. If you know it from "vdpa dev 
show" on day 1 that the control vq and mq features are negotiated, but 
then on day2 you got nothing for the predicted control vq index, what 
would you recommend the host admin to do to get the right qindex again? 
Re-run the stat query on the same queue index, or check the "vdpa dev 
show" output again on day 3? This CLI design makes cloud administrator 
really challenging to follow the dynamics of guest activities were to 
manage hundreds or thousands of virtual machines...

It would be easier, in my opinion, to grasp some well-defined handle 
that is easily predictable or fixed across the board, for looking up the 
control virtqueue. This could be a constant host queue index, or a 
special magic keyword like "qidx ctrlvq". If cloud admin runs vstat 
query on the control vq using a determined handle but get nothing back, 
then s/he knows *for sure* the control vq was not available for some 
reason at the point when the stat was being collected. S/he doesn't even 
need to care negotiated status via "vdpa dev show" at all. Why bother?

>
> Do you still hold see your proposal required?
Yes, this is essential to any cloud admin that runs stat query on all of 
the queues on periodic basis. You'd get some deterministic without 
blindly guessing or bothering other irrelevant command.


Thanks,
-Siwei
>
>> $ vdpa -jp dev vstats show vdpa-a qidx 8
>> {
>>       "vstats": {
>>           "vdpa-a": {
>>               "queue_stats": [{
>>                   "queue_index_device": 8,
>>                   "queue_index_driver": 2,
>>                   "queue_type": "control_vq",
>>                   "stat_name": [ "received_desc","completed_desc" ],
>>                   "stat_value": [ 417776,417775 ],
>>               }]
>>           }
>>       }
>> }
>>
>> Optionally, user may use guest queue index gqidx, which is kind of an
>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
>> specific guest queue:
>>
>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
>> {
>>       "vstats": {
>>           "vdpa-a": {
>>               "queue_stats": [{
>>                   "queue_index_device": 8,
>>                   "queue_index_driver": 2,
>>                   "queue_type": "control_vq",
>>                   "stat_name": [ "received_desc","completed_desc" ],
>>                   "stat_value": [ 417776,417775 ],
>>               }]
>>           }
>>       }
>> }
>>
>> Thanks,
>> -Siwei
>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>>> Regards,
>>>>>> -Siwei
>>>>>>
>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
>>>>>>>>>> understand.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>>>>                    "queue_index": 0,
>>>>>>>>> I think this can be removed since the command is for a specific index.
>>>>>>>>>
>>>>>>>>>>>>>                    "name": "received_desc",
>>>>>>>>>>>>>                    "value": 417776,
>>>>>>>>>>>>>                    "name": "completed_desc",
>>>>>>>>>>>>>                    "value": 417548
>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
>>>>>>>>>> This comment is missed for some reason. Please change the example
>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
>>>>>>>>>> keeping the order for json hash/dictionary is fine.
>>>>>>>>> Sorry for skipping this comment.
>>>>>>>>> Do you mean to present the information like:
>>>>>>>>> "received_desc": 417776,
>>>>>>>>> "completed_desc": 417548,
>>>>>>>> I mean the following presentation:
>>>>>>>>
>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>>          "vstats": {
>>>>>>>>              "vdpa-a": {
>>>>>>>>                  "queue_stats": [{
>>>>>>>>                      "queue_index": 0,
>>>>>>>>                      "queue_type": "rx",
>>>>>>>>                      "stat_name": [ "received_desc","completed_desc" ],
>>>>>>>>                      "stat_value": [ 417776,417548 ],
>>>>>>>>                  }]
>>>>>>>>              }
>>>>>>>>          }
>>>>>>>> }
>>>>>>>>
>>>>>>>> I think Parav had similar suggestion, too.
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> -Siwei
>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> -Siwei
>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>                }
>>>>>>>>>>>>>            }
>>>>>>>>>>>>> }
>>>>>>>>>>>>>
>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>         drivers/vdpa/vdpa.c       | 129
>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>>>>         include/linux/vdpa.h      |   5 ++
>>>>>>>>>>>>>         include/uapi/linux/vdpa.h |   7 +++
>>>>>>>>>>>>>         3 files changed, 141 insertions(+)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>>>>>>>> *vdev,
>>>>>>>>>> struct sk_buff *msg, u32 portid,
>>>>>>>>>>>>>         	return err;
>>>>>>>>>>>>>         }
>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>>>>>>>> +sk_buff
>>>>>>>>>> *msg,
>>>>>>>>>>>>> +			       struct genl_info *info, u32 index) {
>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
>>>>>>>>>>>>> +info-
>>>>>>>>>>> extack);
>>>>>>>>>>>>> +	if (err)
>>>>>>>>>>>>> +		return err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	return 0;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
>>>>>>>>>>>>> +sk_buff
>>>>>>>> *msg,
>>>>>>>>>>>>> +			     struct genl_info *info, u32 index) {
>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!vdev->config->get_vendor_vq_stats)
>>>>>>>>>>>>> +		return -EOPNOTSUPP;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>>>>>>>> +	if (err)
>>>>>>>>>>>>> +		return err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	return 0;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>>>>>>>> +				      struct sk_buff *msg,
>>>>>>>>>>>>> +				      struct genl_info *info, u32 index) {
>>>>>>>>>>>>> +	u32 device_id;
>>>>>>>>>>>>> +	void *hdr;
>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>> +	u32 portid = info->snd_portid;
>>>>>>>>>>>>> +	u32 seq = info->snd_seq;
>>>>>>>>>>>>> +	u32 flags = 0;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>>>>>>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>>>>>>>>>>>> +	if (!hdr)
>>>>>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>>>>>> dev_name(&vdev-
>>>>>>>>>>> dev))) {
>>>>>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>>>>>> +		goto undo_msg;
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	device_id = vdev->config->get_device_id(vdev);
>>>>>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>>>>>> +		goto undo_msg;
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	genlmsg_end(msg, hdr);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +undo_msg:
>>>>>>>>>>>>> +	genlmsg_cancel(msg, hdr);
>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>         static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>>>>>>>> *skb, struct
>>>>>>>>>> genl_info *info)
>>>>>>>>>>>>>         {
>>>>>>>>>>>>>         	struct vdpa_device *vdev;
>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>>>>>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>>>>>>>         	return msg->len;
>>>>>>>>>>>>>         }
>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>>>>>>>> +					  struct genl_info *info)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct vdpa_device *vdev;
>>>>>>>>>>>>> +	struct sk_buff *msg;
>>>>>>>>>>>>> +	const char *devname;
>>>>>>>>>>>>> +	struct device *dev;
>>>>>>>>>>>>> +	u32 index;
>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>>>>>>>> +		return -EINVAL;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>>>>>>>> +		return -EINVAL;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>>>>>>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>>>>>>>> +	if (!msg)
>>>>>>>>>>>>> +		return -ENOMEM;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	index = nla_get_u32(info-
>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>>>>>>>> +	mutex_lock(&vdpa_dev_mutex);
>>>>>>>>>>>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>>>>>>>> vdpa_name_match);
>>>>>>>>>>>>> +	if (!dev) {
>>>>>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not
>>>>>> found");
>>>>>>>>>>>>> +		err = -ENODEV;
>>>>>>>>>>>>> +		goto dev_err;
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>>>>>>>> +	if (!vdev->mdev) {
>>>>>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>>>>>> vdpa
>>>>>>>>>> device");
>>>>>>>>>>>>> +		err = -EINVAL;
>>>>>>>>>>>>> +		goto mdev_err;
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>> +	if (!err)
>>>>>>>>>>>>> +		err = genlmsg_reply(msg, info);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	put_device(dev);
>>>>>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (err)
>>>>>>>>>>>>> +		nlmsg_free(msg);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +mdev_err:
>>>>>>>>>>>>> +	put_device(dev);
>>>>>>>>>>>>> +dev_err:
>>>>>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>         static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>>>>>> = {
>>>>>>>>>>>>>         	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>>>>>>>> NLA_NUL_STRING },
>>>>>>>>>>>>>         	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>>>>>>>> }, @@ -
>>>>>>>>>> 997,6
>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>>>>>>>         	[VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>>>>>>>> NLA_POLICY_ETH_ADDR,
>>>>>>>>>>>>>         	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>>>>>>>         	[VDPA_ATTR_DEV_NET_CFG_MTU] =
>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>>>>>>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] =
>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
>>>>>>>>>> 65535),
>>>>>>>>>>>>>         };
>>>>>>>>>>>>>         static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>>>>>>>         		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>>>>>>>         		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>>>>>>>         	},
>>>>>>>>>>>>> +	{
>>>>>>>>>>>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>> +		.validate = GENL_DONT_VALIDATE_STRICT |
>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>>>>>>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>>>>>>>> +		.flags = GENL_ADMIN_PERM,
>>>>>>>>>>>>> +	},
>>>>>>>>>>>>>         };
>>>>>>>>>>>>>         static struct genl_family vdpa_nl_family __ro_after_init =
>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>>>>>>>> --- a/include/linux/vdpa.h
>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>>>>>>>         			    const struct vdpa_vq_state *state);
>>>>>>>>>>>>>         	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>         			    struct vdpa_vq_state *state);
>>>>>>>>>>>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>> +				   struct sk_buff *msg,
>>>>>>>>>>>>> +				   struct netlink_ext_ack *extack);
>>>>>>>>>>>>>         	struct vdpa_notification_area
>>>>>>>>>>>>>         	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>>>>>>>         	/* vq irq is not expected to be changed once DRIVER_OK is
>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>>>>>>>         int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>         void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>>>>>>>> +
>>>>>>>>>>>>>         #endif /* _LINUX_VDPA_H */
>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
>>>>>>>>>>>>> 100644
>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>>>>>>>         	VDPA_CMD_DEV_DEL,
>>>>>>>>>>>>>         	VDPA_CMD_DEV_GET,		/* can dump */
>>>>>>>>>>>>>         	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>>>>>>>>>>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>         };
>>>>>>>>>>>>>         enum vdpa_attr {
>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>>>>>>>         	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>>>>>>>>>>>         	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/*
>>>>>>>> u32 */
>>>>>>>>>>>>>         	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/*
>>>>>> string */
>>>>>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>>>>>>>> +
>>>>>>>>>>>>>         	/* new attributes must be added above here */
>>>>>>>>>>>>>         	VDPA_ATTR_MAX,
>>>>>>>>>>>>>         };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
       [not found]                             ` <DM8PR12MB5400E7B2359FE4797F190AC5AB119@DM8PR12MB5400.namprd12.prod.outlook.com>
@ 2022-03-16 22:00                               ` Si-Wei Liu
  2022-03-17  2:32                                 ` Jason Wang
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-16 22:00 UTC (permalink / raw)
  To: Eli Cohen; +Cc: lvivier, mst, virtualization, eperezma



On 3/16/2022 12:10 AM, Eli Cohen wrote:
>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>> Sent: Wednesday, March 16, 2022 8:52 AM
>> To: Eli Cohen <elic@nvidia.com>
>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>
>>
>>
>> On 3/15/2022 2:10 AM, Eli Cohen wrote:
>>
>> <...snip...>
>>
>>>> Say you got a vdpa net device created with 4 data queue pairs and a
>>>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
>>>> not F_MQ, then the index for the control vq in guest ends up with 2, as
>>>> in this case there's only a single queue pair enabled for rx (index 0)
>>>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
>>>> control vq is the last vq following 8
>>> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
>>> that control VQ index is 2
>> Right, but I don't see this feature negotiation info getting returned
>> from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do
>> you plan for host user to get this info? If you meant another "vdpa dev
>> show" command to query negotiated features ahead, this won't get the
>> same lock protected as the time you run the stat query. It's very easy
>> to miss that ephemeral queue index.
> Right, so I suggested to include the negotiated features in the netlink message
> for the statistics. That would save us from using two system calls to get the
> information required and it answers your concern with respect to locking.
> I think Jason was reluctant to adding this attribute to the message but can't
> find where he explained the reasoning.
Maybe Jason can clarify and correct me, but I just did not get the same 
impression as what you said? I just skimmed through all of the emails in 
the thread, only finding that he didn't want device specific attribute 
such as queue type to get returned by the vdpa core, which I agree. I'm 
not sure if he's explicitly against piggyback negotiated features to aid 
userspace parsing the index.

Another way around, vdpa tool may pass down -1 to get_vq_vstat() to 
represent the queue index for the control queue - but that's less 
favorable as the vdpa core needs to maintain device specific knowledge.



>
>>>> data vqs of all 4 pairs, hence got
>>>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
>>>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
>>>> stats available, and the rest of qindex would get invalid/empty stat.
>>>>
>>>> Later on say boot continues towards loading the Linux virtio driver,
>>>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
>>>> features. In this case, all 8 data virtqueues are fully enabled, the
>>>> index for the control vq ends up as 8, following tightly after all the 4
>>>> data queue pairs. Only until both features are negotiated, the guest and
>>>> host are able to see consistent view in identifying the control vq.
>>>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
>>>> should have vendor stats available.
>>>>
>>>> That's why I said the guest qindex is ephemeral and hard to predict
>>>> subjected to negotiated features, but host qindex is reliable and more
>>>> eligible for command line identification purpose.
>>>>
>> <...snip...>
>>>>> So what are you actually proposing? Display received and completed descriptors
>>>>> per queue index without further interpretation?
>>>> I'd suggest using a more stable queue id i.e. the host queue index to
>>>> represent the qidx (which seems to be what you're doing now?), and
>>>> displaying both the host qindex (queue_index_device in the example
>>>> below), as well as the guest's (queue_index_driver as below) in the output:
>>>>
>>> Given that per vdpa device you can display statistics only after features have
>>> been negotiated, you can always know the correct queue index for the control
>>> VQ.
>> The stats can be displayed only after features are negotiated, and only
>> when the corresponding queue is enabled. If you know it from "vdpa dev
>> show" on day 1 that the control vq and mq features are negotiated, but
>> then on day2 you got nothing for the predicted control vq index, what
>> would you recommend the host admin to do to get the right qindex again?
>> Re-run the stat query on the same queue index, or check the "vdpa dev
>> show" output again on day 3? This CLI design makes cloud administrator
>> really challenging to follow the dynamics of guest activities were to
>> manage hundreds or thousands of virtual machines...
>>
>> It would be easier, in my opinion, to grasp some well-defined handle
>> that is easily predictable or fixed across the board, for looking up the
>> control virtqueue. This could be a constant host queue index, or a
>> special magic keyword like "qidx ctrlvq". If cloud admin runs vstat
>> query on the control vq using a determined handle but get nothing back,
>> then s/he knows *for sure* the control vq was not available for some
>> reason at the point when the stat was being collected. S/he doesn't even
>> need to care negotiated status via "vdpa dev show" at all. Why bother?
> So, per my suggestion above, passing the negotiated attribute in the netlink
> message would satisfy the requirements for atomicity, right?
Yes, it satisfied the atomicity requirement, though not sure how you 
want to represent the queue index for control vq? Basically if cloud 
admin wants to dump control queue stats explicitly with a fixed handle 
or identifier, how that can be done with the negotiated attribute?

Thanks,
-Siwei
>
>>> Do you still hold see your proposal required?
>> Yes, this is essential to any cloud admin that runs stat query on all of
>> the queues on periodic basis. You'd get some deterministic without
>> blindly guessing or bothering other irrelevant command.
>>
>>
>> Thanks,
>> -Siwei
>>>> $ vdpa -jp dev vstats show vdpa-a qidx 8
>>>> {
>>>>        "vstats": {
>>>>            "vdpa-a": {
>>>>                "queue_stats": [{
>>>>                    "queue_index_device": 8,
>>>>                    "queue_index_driver": 2,
>>>>                    "queue_type": "control_vq",
>>>>                    "stat_name": [ "received_desc","completed_desc" ],
>>>>                    "stat_value": [ 417776,417775 ],
>>>>                }]
>>>>            }
>>>>        }
>>>> }
>>>>
>>>> Optionally, user may use guest queue index gqidx, which is kind of an
>>>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
>>>> specific guest queue:
>>>>
>>>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
>>>> {
>>>>        "vstats": {
>>>>            "vdpa-a": {
>>>>                "queue_stats": [{
>>>>                    "queue_index_device": 8,
>>>>                    "queue_index_driver": 2,
>>>>                    "queue_type": "control_vq",
>>>>                    "stat_name": [ "received_desc","completed_desc" ],
>>>>                    "stat_value": [ 417776,417775 ],
>>>>                }]
>>>>            }
>>>>        }
>>>> }
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>>> Regards,
>>>>>>>> -Siwei
>>>>>>>>
>>>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
>>>>>>>>>>>> understand.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>>>>                     "queue_index": 0,
>>>>>>>>>>> I think this can be removed since the command is for a specific index.
>>>>>>>>>>>
>>>>>>>>>>>>>>>                     "name": "received_desc",
>>>>>>>>>>>>>>>                     "value": 417776,
>>>>>>>>>>>>>>>                     "name": "completed_desc",
>>>>>>>>>>>>>>>                     "value": 417548
>>>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
>>>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
>>>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
>>>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
>>>>>>>>>>>> This comment is missed for some reason. Please change the example
>>>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
>>>>>>>>>>>> keeping the order for json hash/dictionary is fine.
>>>>>>>>>>> Sorry for skipping this comment.
>>>>>>>>>>> Do you mean to present the information like:
>>>>>>>>>>> "received_desc": 417776,
>>>>>>>>>>> "completed_desc": 417548,
>>>>>>>>>> I mean the following presentation:
>>>>>>>>>>
>>>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>>>>           "vstats": {
>>>>>>>>>>               "vdpa-a": {
>>>>>>>>>>                   "queue_stats": [{
>>>>>>>>>>                       "queue_index": 0,
>>>>>>>>>>                       "queue_type": "rx",
>>>>>>>>>>                       "stat_name": [ "received_desc","completed_desc" ],
>>>>>>>>>>                       "stat_value": [ 417776,417548 ],
>>>>>>>>>>                   }]
>>>>>>>>>>               }
>>>>>>>>>>           }
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> I think Parav had similar suggestion, too.
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> -Siwei
>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>>>                 }
>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>          drivers/vdpa/vdpa.c       | 129
>>>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>>>>>>          include/linux/vdpa.h      |   5 ++
>>>>>>>>>>>>>>>          include/uapi/linux/vdpa.h |   7 +++
>>>>>>>>>>>>>>>          3 files changed, 141 insertions(+)
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>>>>>>>>>> *vdev,
>>>>>>>>>>>> struct sk_buff *msg, u32 portid,
>>>>>>>>>>>>>>>          	return err;
>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>>>>>>>>>> +sk_buff
>>>>>>>>>>>> *msg,
>>>>>>>>>>>>>>> +			       struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
>>>>>>>>>>>>>>> +info-
>>>>>>>>>>>>> extack);
>>>>>>>>>>>>>>> +	if (err)
>>>>>>>>>>>>>>> +		return err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	return 0;
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
>>>>>>>>>>>>>>> +sk_buff
>>>>>>>>>> *msg,
>>>>>>>>>>>>>>> +			     struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (!vdev->config->get_vendor_vq_stats)
>>>>>>>>>>>>>>> +		return -EOPNOTSUPP;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>>>>>>>>>> +	if (err)
>>>>>>>>>>>>>>> +		return err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	return 0;
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>>>>>>>>>> +				      struct sk_buff *msg,
>>>>>>>>>>>>>>> +				      struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>> +	u32 device_id;
>>>>>>>>>>>>>>> +	void *hdr;
>>>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>>>> +	u32 portid = info->snd_portid;
>>>>>>>>>>>>>>> +	u32 seq = info->snd_seq;
>>>>>>>>>>>>>>> +	u32 flags = 0;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>>>>>>>>>> +			  VDPA_CMD_DEV_VSTATS_GET);
>>>>>>>>>>>>>>> +	if (!hdr)
>>>>>>>>>>>>>>> +		return -EMSGSIZE;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>>>>>>>> dev_name(&vdev-
>>>>>>>>>>>>> dev))) {
>>>>>>>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>>>>>>>> +		goto undo_msg;
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	device_id = vdev->config->get_device_id(vdev);
>>>>>>>>>>>>>>> +	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>>>>>>>>>> +		err = -EMSGSIZE;
>>>>>>>>>>>>>>> +		goto undo_msg;
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	genlmsg_end(msg, hdr);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +undo_msg:
>>>>>>>>>>>>>>> +	genlmsg_cancel(msg, hdr);
>>>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>          static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>>>>>>>>>> *skb, struct
>>>>>>>>>>>> genl_info *info)
>>>>>>>>>>>>>>>          {
>>>>>>>>>>>>>>>          	struct vdpa_device *vdev;
>>>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
>>>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>>>>>>>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>>>>>>>>>          	return msg->len;
>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>>>>>>>>>> +					  struct genl_info *info)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct vdpa_device *vdev;
>>>>>>>>>>>>>>> +	struct sk_buff *msg;
>>>>>>>>>>>>>>> +	const char *devname;
>>>>>>>>>>>>>>> +	struct device *dev;
>>>>>>>>>>>>>>> +	u32 index;
>>>>>>>>>>>>>>> +	int err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>>>>>>>>>> +		return -EINVAL;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>>>>>>>>>> +		return -EINVAL;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>>>>>>>>>> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>>>>>>>>>> +	if (!msg)
>>>>>>>>>>>>>>> +		return -ENOMEM;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	index = nla_get_u32(info-
>>>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>>>>>>>>>> +	mutex_lock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>> +	dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>>>>>>>>>> vdpa_name_match);
>>>>>>>>>>>>>>> +	if (!dev) {
>>>>>>>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "device not
>>>>>>>> found");
>>>>>>>>>>>>>>> +		err = -ENODEV;
>>>>>>>>>>>>>>> +		goto dev_err;
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +	vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>>>>>>>>>> +	if (!vdev->mdev) {
>>>>>>>>>>>>>>> +		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>>>>>>>> vdpa
>>>>>>>>>>>> device");
>>>>>>>>>>>>>>> +		err = -EINVAL;
>>>>>>>>>>>>>>> +		goto mdev_err;
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>>>> +	if (!err)
>>>>>>>>>>>>>>> +		err = genlmsg_reply(msg, info);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	put_device(dev);
>>>>>>>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (err)
>>>>>>>>>>>>>>> +		nlmsg_free(msg);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +mdev_err:
>>>>>>>>>>>>>>> +	put_device(dev);
>>>>>>>>>>>>>>> +dev_err:
>>>>>>>>>>>>>>> +	mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>> +	return err;
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>          static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>>>>>>>> = {
>>>>>>>>>>>>>>>          	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>>>>>>>>>> NLA_NUL_STRING },
>>>>>>>>>>>>>>>          	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>>>>>>>>>> }, @@ -
>>>>>>>>>>>> 997,6
>>>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
>>>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>>>>>>>>>          	[VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>>>>>>>>>> NLA_POLICY_ETH_ADDR,
>>>>>>>>>>>>>>>          	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>>>>>>>>>          	[VDPA_ATTR_DEV_NET_CFG_MTU] =
>>>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>>>>>>>>>> +	[VDPA_ATTR_DEV_QUEUE_INDEX] =
>>>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
>>>>>>>>>>>> 65535),
>>>>>>>>>>>>>>>          };
>>>>>>>>>>>>>>>          static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>>>>>>>>>          		.doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>>>>>>>>>          		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>>>>>>>>>          	},
>>>>>>>>>>>>>>> +	{
>>>>>>>>>>>>>>> +		.cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>>> +		.validate = GENL_DONT_VALIDATE_STRICT |
>>>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>>>>>>>>>> +		.doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>>>>>>>>>> +		.flags = GENL_ADMIN_PERM,
>>>>>>>>>>>>>>> +	},
>>>>>>>>>>>>>>>          };
>>>>>>>>>>>>>>>          static struct genl_family vdpa_nl_family __ro_after_init =
>>>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>>>>>>>>>> --- a/include/linux/vdpa.h
>>>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>>>>>>>>>          			    const struct vdpa_vq_state *state);
>>>>>>>>>>>>>>>          	int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>>>          			    struct vdpa_vq_state *state);
>>>>>>>>>>>>>>> +	int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>>> +				   struct sk_buff *msg,
>>>>>>>>>>>>>>> +				   struct netlink_ext_ack *extack);
>>>>>>>>>>>>>>>          	struct vdpa_notification_area
>>>>>>>>>>>>>>>          	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>>>>>>>>>          	/* vq irq is not expected to be changed once DRIVER_OK is
>>>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>>>>>>>>>          int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>>>          void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>          #endif /* _LINUX_VDPA_H */
>>>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
>>>>>>>>>>>>>>> 100644
>>>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>>>>>>>>>          	VDPA_CMD_DEV_DEL,
>>>>>>>>>>>>>>>          	VDPA_CMD_DEV_GET,		/* can dump */
>>>>>>>>>>>>>>>          	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
>>>>>>>>>>>>>>> +	VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>>>          };
>>>>>>>>>>>>>>>          enum vdpa_attr {
>>>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>>>>>>>>>          	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
>>>>>>>>>>>>>>>          	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/*
>>>>>>>>>> u32 */
>>>>>>>>>>>>>>>          	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>>>>>>>>>> +	VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/*
>>>>>>>> string */
>>>>>>>>>>>>>>> +	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>          	/* new attributes must be added above here */
>>>>>>>>>>>>>>>          	VDPA_ATTR_MAX,
>>>>>>>>>>>>>>>          };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
  2022-03-16 22:00                               ` Si-Wei Liu
@ 2022-03-17  2:32                                 ` Jason Wang
  2022-03-18  0:58                                   ` Si-Wei Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Jason Wang @ 2022-03-17  2:32 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: lvivier, mst, virtualization, eperezma, Eli Cohen

On Thu, Mar 17, 2022 at 6:00 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 3/16/2022 12:10 AM, Eli Cohen wrote:
> >> From: Si-Wei Liu <si-wei.liu@oracle.com>
> >> Sent: Wednesday, March 16, 2022 8:52 AM
> >> To: Eli Cohen <elic@nvidia.com>
> >> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> >> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
> >> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
> >>
> >>
> >>
> >> On 3/15/2022 2:10 AM, Eli Cohen wrote:
> >>
> >> <...snip...>
> >>
> >>>> Say you got a vdpa net device created with 4 data queue pairs and a
> >>>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
> >>>> not F_MQ, then the index for the control vq in guest ends up with 2, as
> >>>> in this case there's only a single queue pair enabled for rx (index 0)
> >>>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
> >>>> control vq is the last vq following 8
> >>> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
> >>> that control VQ index is 2
> >> Right, but I don't see this feature negotiation info getting returned
> >> from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do
> >> you plan for host user to get this info? If you meant another "vdpa dev
> >> show" command to query negotiated features ahead, this won't get the
> >> same lock protected as the time you run the stat query. It's very easy
> >> to miss that ephemeral queue index.
> > Right, so I suggested to include the negotiated features in the netlink message
> > for the statistics. That would save us from using two system calls to get the
> > information required and it answers your concern with respect to locking.
> > I think Jason was reluctant to adding this attribute to the message but can't
> > find where he explained the reasoning.
> Maybe Jason can clarify and correct me, but I just did not get the same
> impression as what you said? I just skimmed through all of the emails in
> the thread, only finding that he didn't want device specific attribute
> such as queue type to get returned by the vdpa core, which I agree. I'm
> not sure if he's explicitly against piggyback negotiated features to aid
> userspace parsing the index.

I think we need piggyback the negotiated features, otherwise as you
mentioned, we will probably get in-consistency.

But a question for the "host queue index", as mentioned before. It's
something that is not defined in the spec, so technically, vendor can
do any mappings between it and the index what guest can see. I feel
like we need to clarify it in the spec first.

Thanks

>
> Another way around, vdpa tool may pass down -1 to get_vq_vstat() to
> represent the queue index for the control queue - but that's less
> favorable as the vdpa core needs to maintain device specific knowledge.
>
>
>
> >
> >>>> data vqs of all 4 pairs, hence got
> >>>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
> >>>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
> >>>> stats available, and the rest of qindex would get invalid/empty stat.
> >>>>
> >>>> Later on say boot continues towards loading the Linux virtio driver,
> >>>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
> >>>> features. In this case, all 8 data virtqueues are fully enabled, the
> >>>> index for the control vq ends up as 8, following tightly after all the 4
> >>>> data queue pairs. Only until both features are negotiated, the guest and
> >>>> host are able to see consistent view in identifying the control vq.
> >>>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
> >>>> should have vendor stats available.
> >>>>
> >>>> That's why I said the guest qindex is ephemeral and hard to predict
> >>>> subjected to negotiated features, but host qindex is reliable and more
> >>>> eligible for command line identification purpose.
> >>>>
> >> <...snip...>
> >>>>> So what are you actually proposing? Display received and completed descriptors
> >>>>> per queue index without further interpretation?
> >>>> I'd suggest using a more stable queue id i.e. the host queue index to
> >>>> represent the qidx (which seems to be what you're doing now?), and
> >>>> displaying both the host qindex (queue_index_device in the example
> >>>> below), as well as the guest's (queue_index_driver as below) in the output:
> >>>>
> >>> Given that per vdpa device you can display statistics only after features have
> >>> been negotiated, you can always know the correct queue index for the control
> >>> VQ.
> >> The stats can be displayed only after features are negotiated, and only
> >> when the corresponding queue is enabled. If you know it from "vdpa dev
> >> show" on day 1 that the control vq and mq features are negotiated, but
> >> then on day2 you got nothing for the predicted control vq index, what
> >> would you recommend the host admin to do to get the right qindex again?
> >> Re-run the stat query on the same queue index, or check the "vdpa dev
> >> show" output again on day 3? This CLI design makes cloud administrator
> >> really challenging to follow the dynamics of guest activities were to
> >> manage hundreds or thousands of virtual machines...
> >>
> >> It would be easier, in my opinion, to grasp some well-defined handle
> >> that is easily predictable or fixed across the board, for looking up the
> >> control virtqueue. This could be a constant host queue index, or a
> >> special magic keyword like "qidx ctrlvq". If cloud admin runs vstat
> >> query on the control vq using a determined handle but get nothing back,
> >> then s/he knows *for sure* the control vq was not available for some
> >> reason at the point when the stat was being collected. S/he doesn't even
> >> need to care negotiated status via "vdpa dev show" at all. Why bother?
> > So, per my suggestion above, passing the negotiated attribute in the netlink
> > message would satisfy the requirements for atomicity, right?
> Yes, it satisfied the atomicity requirement, though not sure how you
> want to represent the queue index for control vq? Basically if cloud
> admin wants to dump control queue stats explicitly with a fixed handle
> or identifier, how that can be done with the negotiated attribute?
>
> Thanks,
> -Siwei
> >
> >>> Do you still hold see your proposal required?
> >> Yes, this is essential to any cloud admin that runs stat query on all of
> >> the queues on periodic basis. You'd get some deterministic without
> >> blindly guessing or bothering other irrelevant command.
> >>
> >>
> >> Thanks,
> >> -Siwei
> >>>> $ vdpa -jp dev vstats show vdpa-a qidx 8
> >>>> {
> >>>>        "vstats": {
> >>>>            "vdpa-a": {
> >>>>                "queue_stats": [{
> >>>>                    "queue_index_device": 8,
> >>>>                    "queue_index_driver": 2,
> >>>>                    "queue_type": "control_vq",
> >>>>                    "stat_name": [ "received_desc","completed_desc" ],
> >>>>                    "stat_value": [ 417776,417775 ],
> >>>>                }]
> >>>>            }
> >>>>        }
> >>>> }
> >>>>
> >>>> Optionally, user may use guest queue index gqidx, which is kind of an
> >>>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
> >>>> specific guest queue:
> >>>>
> >>>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
> >>>> {
> >>>>        "vstats": {
> >>>>            "vdpa-a": {
> >>>>                "queue_stats": [{
> >>>>                    "queue_index_device": 8,
> >>>>                    "queue_index_driver": 2,
> >>>>                    "queue_type": "control_vq",
> >>>>                    "stat_name": [ "received_desc","completed_desc" ],
> >>>>                    "stat_value": [ 417776,417775 ],
> >>>>                }]
> >>>>            }
> >>>>        }
> >>>> }
> >>>>
> >>>> Thanks,
> >>>> -Siwei
> >>>>
> >>>>>> Thanks,
> >>>>>> -Siwei
> >>>>>>
> >>>>>>>> Regards,
> >>>>>>>> -Siwei
> >>>>>>>>
> >>>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
> >>>>>>>>>>>> understand.
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>>>>>                     "queue_index": 0,
> >>>>>>>>>>> I think this can be removed since the command is for a specific index.
> >>>>>>>>>>>
> >>>>>>>>>>>>>>>                     "name": "received_desc",
> >>>>>>>>>>>>>>>                     "value": 417776,
> >>>>>>>>>>>>>>>                     "name": "completed_desc",
> >>>>>>>>>>>>>>>                     "value": 417548
> >>>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
> >>>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
> >>>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
> >>>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
> >>>>>>>>>>>> This comment is missed for some reason. Please change the example
> >>>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
> >>>>>>>>>>>> keeping the order for json hash/dictionary is fine.
> >>>>>>>>>>> Sorry for skipping this comment.
> >>>>>>>>>>> Do you mean to present the information like:
> >>>>>>>>>>> "received_desc": 417776,
> >>>>>>>>>>> "completed_desc": 417548,
> >>>>>>>>>> I mean the following presentation:
> >>>>>>>>>>
> >>>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> >>>>>>>>>>           "vstats": {
> >>>>>>>>>>               "vdpa-a": {
> >>>>>>>>>>                   "queue_stats": [{
> >>>>>>>>>>                       "queue_index": 0,
> >>>>>>>>>>                       "queue_type": "rx",
> >>>>>>>>>>                       "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>>>>>                       "stat_value": [ 417776,417548 ],
> >>>>>>>>>>                   }]
> >>>>>>>>>>               }
> >>>>>>>>>>           }
> >>>>>>>>>> }
> >>>>>>>>>>
> >>>>>>>>>> I think Parav had similar suggestion, too.
> >>>>>>>>>>
> >>>>>>>>>> Thanks,
> >>>>>>>>>> -Siwei
> >>>>>>>>>>
> >>>>>>>>>>>> Thanks,
> >>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>
> >>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>>>>                 }
> >>>>>>>>>>>>>>>             }
> >>>>>>>>>>>>>>> }
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>>>>>>>>>>>>> ---
> >>>>>>>>>>>>>>>          drivers/vdpa/vdpa.c       | 129
> >>>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
> >>>>>>>>>>>>>>>          include/linux/vdpa.h      |   5 ++
> >>>>>>>>>>>>>>>          include/uapi/linux/vdpa.h |   7 +++
> >>>>>>>>>>>>>>>          3 files changed, 141 insertions(+)
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> >>>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
> >>>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
> >>>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
> >>>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
> >>>>>>>>>>>>>>> *vdev,
> >>>>>>>>>>>> struct sk_buff *msg, u32 portid,
> >>>>>>>>>>>>>>>                 return err;
> >>>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
> >>>>>>>>>>>>>>> +sk_buff
> >>>>>>>>>>>> *msg,
> >>>>>>>>>>>>>>> +                              struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> >>>>>>>>>>>>>>> +               return -EMSGSIZE;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
> >>>>>>>>>>>>>>> +info-
> >>>>>>>>>>>>> extack);
> >>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>> +               return err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       return 0;
> >>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
> >>>>>>>>>>>>>>> +sk_buff
> >>>>>>>>>> *msg,
> >>>>>>>>>>>>>>> +                            struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       if (!vdev->config->get_vendor_vq_stats)
> >>>>>>>>>>>>>>> +               return -EOPNOTSUPP;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       err = vdpa_fill_stats_rec(vdev, msg, info, index);
> >>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>> +               return err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       return 0;
> >>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
> >>>>>>>>>>>>>>> +                                     struct sk_buff *msg,
> >>>>>>>>>>>>>>> +                                     struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>> +       u32 device_id;
> >>>>>>>>>>>>>>> +       void *hdr;
> >>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>> +       u32 portid = info->snd_portid;
> >>>>>>>>>>>>>>> +       u32 seq = info->snd_seq;
> >>>>>>>>>>>>>>> +       u32 flags = 0;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> >>>>>>>>>>>>>>> +                         VDPA_CMD_DEV_VSTATS_GET);
> >>>>>>>>>>>>>>> +       if (!hdr)
> >>>>>>>>>>>>>>> +               return -EMSGSIZE;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
> >>>>>>>> dev_name(&vdev-
> >>>>>>>>>>>>> dev))) {
> >>>>>>>>>>>>>>> +               err = -EMSGSIZE;
> >>>>>>>>>>>>>>> +               goto undo_msg;
> >>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       device_id = vdev->config->get_device_id(vdev);
> >>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> >>>>>>>>>>>>>>> +               err = -EMSGSIZE;
> >>>>>>>>>>>>>>> +               goto undo_msg;
> >>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       err = vendor_stats_fill(vdev, msg, info, index);
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       genlmsg_end(msg, hdr);
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +undo_msg:
> >>>>>>>>>>>>>>> +       genlmsg_cancel(msg, hdr);
> >>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>          static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
> >>>>>>>>>>>>>>> *skb, struct
> >>>>>>>>>>>> genl_info *info)
> >>>>>>>>>>>>>>>          {
> >>>>>>>>>>>>>>>                 struct vdpa_device *vdev;
> >>>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
> >>>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
> >>>>>>>>>>>> sk_buff *msg, struct netlink_callback *
> >>>>>>>>>>>>>>>                 return msg->len;
> >>>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> >>>>>>>>>>>>>>> +                                         struct genl_info *info)
> >>>>>>>>>>>>>>> +{
> >>>>>>>>>>>>>>> +       struct vdpa_device *vdev;
> >>>>>>>>>>>>>>> +       struct sk_buff *msg;
> >>>>>>>>>>>>>>> +       const char *devname;
> >>>>>>>>>>>>>>> +       struct device *dev;
> >>>>>>>>>>>>>>> +       u32 index;
> >>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_NAME])
> >>>>>>>>>>>>>>> +               return -EINVAL;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> >>>>>>>>>>>>>>> +               return -EINVAL;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> >>>>>>>>>>>>>>> +       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> >>>>>>>>>>>>>>> +       if (!msg)
> >>>>>>>>>>>>>>> +               return -ENOMEM;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       index = nla_get_u32(info-
> >>>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> >>>>>>>>>>>>>>> +       mutex_lock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>> +       dev = bus_find_device(&vdpa_bus, NULL, devname,
> >>>>>>>>>>>> vdpa_name_match);
> >>>>>>>>>>>>>>> +       if (!dev) {
> >>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "device not
> >>>>>>>> found");
> >>>>>>>>>>>>>>> +               err = -ENODEV;
> >>>>>>>>>>>>>>> +               goto dev_err;
> >>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>> +       vdev = container_of(dev, struct vdpa_device, dev);
> >>>>>>>>>>>>>>> +       if (!vdev->mdev) {
> >>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
> >>>>>>>> vdpa
> >>>>>>>>>>>> device");
> >>>>>>>>>>>>>>> +               err = -EINVAL;
> >>>>>>>>>>>>>>> +               goto mdev_err;
> >>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>> +       err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> >>>>>>>>>>>>>>> +       if (!err)
> >>>>>>>>>>>>>>> +               err = genlmsg_reply(msg, info);
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       put_device(dev);
> >>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>> +               nlmsg_free(msg);
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +mdev_err:
> >>>>>>>>>>>>>>> +       put_device(dev);
> >>>>>>>>>>>>>>> +dev_err:
> >>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>          static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
> >>>>>>>> = {
> >>>>>>>>>>>>>>>                 [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
> >>>>>>>>>> NLA_NUL_STRING },
> >>>>>>>>>>>>>>>                 [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
> >>>>>>>>>> }, @@ -
> >>>>>>>>>>>> 997,6
> >>>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
> >>>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
> >>>>>>>>>>>>>>>                 [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
> >>>>>>>>>> NLA_POLICY_ETH_ADDR,
> >>>>>>>>>>>>>>>                 /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
> >>>>>>>>>>>>>>>                 [VDPA_ATTR_DEV_NET_CFG_MTU] =
> >>>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
> >>>>>>>>>>>>>>> +       [VDPA_ATTR_DEV_QUEUE_INDEX] =
> >>>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
> >>>>>>>>>>>> 65535),
> >>>>>>>>>>>>>>>          };
> >>>>>>>>>>>>>>>          static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
> >>>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
> >>>>>>>>>>>>>>>                         .doit = vdpa_nl_cmd_dev_config_get_doit,
> >>>>>>>>>>>>>>>                         .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
> >>>>>>>>>>>>>>>                 },
> >>>>>>>>>>>>>>> +       {
> >>>>>>>>>>>>>>> +               .cmd = VDPA_CMD_DEV_VSTATS_GET,
> >>>>>>>>>>>>>>> +               .validate = GENL_DONT_VALIDATE_STRICT |
> >>>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
> >>>>>>>>>>>>>>> +               .doit = vdpa_nl_cmd_dev_stats_get_doit,
> >>>>>>>>>>>>>>> +               .flags = GENL_ADMIN_PERM,
> >>>>>>>>>>>>>>> +       },
> >>>>>>>>>>>>>>>          };
> >>>>>>>>>>>>>>>          static struct genl_family vdpa_nl_family __ro_after_init =
> >>>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
> >>>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
> >>>>>>>>>>>>>>> --- a/include/linux/vdpa.h
> >>>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
> >>>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
> >>>>>>>>>>>>>>>                                     const struct vdpa_vq_state *state);
> >>>>>>>>>>>>>>>                 int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
> >>>>>>>>>>>>>>>                                     struct vdpa_vq_state *state);
> >>>>>>>>>>>>>>> +       int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
> >>>>>>>>>>>>>>> +                                  struct sk_buff *msg,
> >>>>>>>>>>>>>>> +                                  struct netlink_ext_ack *extack);
> >>>>>>>>>>>>>>>                 struct vdpa_notification_area
> >>>>>>>>>>>>>>>                 (*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
> >>>>>>>>>>>>>>>                 /* vq irq is not expected to be changed once DRIVER_OK is
> >>>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
> >>>>>>>>>>>>>>>          int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
> >>>>>>>>>>>>>>>          void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
> >>>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>          #endif /* _LINUX_VDPA_H */
> >>>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
> >>>>>>>>>>>>>>> 100644
> >>>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
> >>>>>>>>>>>>>>>                 VDPA_CMD_DEV_DEL,
> >>>>>>>>>>>>>>>                 VDPA_CMD_DEV_GET,               /* can dump */
> >>>>>>>>>>>>>>>                 VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
> >>>>>>>>>>>>>>> +       VDPA_CMD_DEV_VSTATS_GET,
> >>>>>>>>>>>>>>>          };
> >>>>>>>>>>>>>>>          enum vdpa_attr {
> >>>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
> >>>>>>>>>>>>>>>                 VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
> >>>>>>>>>>>>>>>                 VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
> >>>>>>>>>> u32 */
> >>>>>>>>>>>>>>>                 VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> >>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
> >>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
> >>>>>>>> string */
> >>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>                 /* new attributes must be added above here */
> >>>>>>>>>>>>>>>                 VDPA_ATTR_MAX,
> >>>>>>>>>>>>>>>          };
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
  2022-03-17  2:32                                 ` Jason Wang
@ 2022-03-18  0:58                                   ` Si-Wei Liu
  2022-03-18  2:27                                     ` Jason Wang
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-18  0:58 UTC (permalink / raw)
  To: Jason Wang; +Cc: lvivier, mst, virtualization, eperezma, Eli Cohen



On 3/16/2022 7:32 PM, Jason Wang wrote:
> On Thu, Mar 17, 2022 at 6:00 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 3/16/2022 12:10 AM, Eli Cohen wrote:
>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> Sent: Wednesday, March 16, 2022 8:52 AM
>>>> To: Eli Cohen <elic@nvidia.com>
>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>>>
>>>>
>>>>
>>>> On 3/15/2022 2:10 AM, Eli Cohen wrote:
>>>>
>>>> <...snip...>
>>>>
>>>>>> Say you got a vdpa net device created with 4 data queue pairs and a
>>>>>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
>>>>>> not F_MQ, then the index for the control vq in guest ends up with 2, as
>>>>>> in this case there's only a single queue pair enabled for rx (index 0)
>>>>>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
>>>>>> control vq is the last vq following 8
>>>>> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
>>>>> that control VQ index is 2
>>>> Right, but I don't see this feature negotiation info getting returned
>>>> from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do
>>>> you plan for host user to get this info? If you meant another "vdpa dev
>>>> show" command to query negotiated features ahead, this won't get the
>>>> same lock protected as the time you run the stat query. It's very easy
>>>> to miss that ephemeral queue index.
>>> Right, so I suggested to include the negotiated features in the netlink message
>>> for the statistics. That would save us from using two system calls to get the
>>> information required and it answers your concern with respect to locking.
>>> I think Jason was reluctant to adding this attribute to the message but can't
>>> find where he explained the reasoning.
>> Maybe Jason can clarify and correct me, but I just did not get the same
>> impression as what you said? I just skimmed through all of the emails in
>> the thread, only finding that he didn't want device specific attribute
>> such as queue type to get returned by the vdpa core, which I agree. I'm
>> not sure if he's explicitly against piggyback negotiated features to aid
>> userspace parsing the index.
> I think we need piggyback the negotiated features, otherwise as you
> mentioned, we will probably get in-consistency.
Great. Thanks for confirming it.

>
> But a question for the "host queue index", as mentioned before. It's
> something that is not defined in the spec, so technically, vendor can
> do any mappings between it and the index what guest can see. I feel
> like we need to clarify it in the spec first.
I have been thinking about this for some while today. Actually I am not 
against exposing the host queue index to the spec, as we know it's 
somewhat implicitly defined in the QEMU device model for multiqueue. The 
thing is, I'm not sure if there's extra benefit than this minor 
requirement (*) given that all of the other vDPA kAPI are taking the 
guest queue index rather than the host queue index. It works for 
mlx5_vdpa as the control vq is implemented in the software, so it can 
map to whatever guest qindex it wishes to. But would it cause extra 
trouble for some other emulated vDPA device or other vendor's vDPA such 
as ifcvf to fabricate a fake mapping between the host queue index and 
the one guest can see? I would have to send a heads-up ahead that the 
current vhost-vdpa mq implementation in upstream QEMU has some issue in 
mapping the host qindex to the guest one. This would become a problem 
with MQ enabled vdpa device and a non-MQ supporting guest e.g. OVMF, for 
which I'm about to share some RFC patches shortly to demonstrate the 
issue. If exposing the host queue index to the spec turns is essential 
to resolving this issue and maybe help with software virtio QEMU 
implementation too, I won't hesitate to expose this important 
implementation detail to the spec.

(*) another means that may somehow address my use case is to use some 
magic keyword e.g. "ctrlvq" to identify the control vq. Implementation 
wise, we can extensively pass -1 to indicate the last guest qindex to 
the get_vq_vstat() API given that we know for sure the ctrlvq is the 
last queue in the array when the relevant features are present. Since 
the negotiated features are piggybacked, it's not hard for the vdpa tool 
to tell apart whether the last queue is a control vq or not.

I'd also welcome other ideas that can make virtqueue identification 
easier and predictable from the CLI.

Thanks,
-Siwei

>
> Thanks
>
>> Another way around, vdpa tool may pass down -1 to get_vq_vstat() to
>> represent the queue index for the control queue - but that's less
>> favorable as the vdpa core needs to maintain device specific knowledge.
>>
>>
>>
>>>>>> data vqs of all 4 pairs, hence got
>>>>>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
>>>>>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
>>>>>> stats available, and the rest of qindex would get invalid/empty stat.
>>>>>>
>>>>>> Later on say boot continues towards loading the Linux virtio driver,
>>>>>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
>>>>>> features. In this case, all 8 data virtqueues are fully enabled, the
>>>>>> index for the control vq ends up as 8, following tightly after all the 4
>>>>>> data queue pairs. Only until both features are negotiated, the guest and
>>>>>> host are able to see consistent view in identifying the control vq.
>>>>>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
>>>>>> should have vendor stats available.
>>>>>>
>>>>>> That's why I said the guest qindex is ephemeral and hard to predict
>>>>>> subjected to negotiated features, but host qindex is reliable and more
>>>>>> eligible for command line identification purpose.
>>>>>>
>>>> <...snip...>
>>>>>>> So what are you actually proposing? Display received and completed descriptors
>>>>>>> per queue index without further interpretation?
>>>>>> I'd suggest using a more stable queue id i.e. the host queue index to
>>>>>> represent the qidx (which seems to be what you're doing now?), and
>>>>>> displaying both the host qindex (queue_index_device in the example
>>>>>> below), as well as the guest's (queue_index_driver as below) in the output:
>>>>>>
>>>>> Given that per vdpa device you can display statistics only after features have
>>>>> been negotiated, you can always know the correct queue index for the control
>>>>> VQ.
>>>> The stats can be displayed only after features are negotiated, and only
>>>> when the corresponding queue is enabled. If you know it from "vdpa dev
>>>> show" on day 1 that the control vq and mq features are negotiated, but
>>>> then on day2 you got nothing for the predicted control vq index, what
>>>> would you recommend the host admin to do to get the right qindex again?
>>>> Re-run the stat query on the same queue index, or check the "vdpa dev
>>>> show" output again on day 3? This CLI design makes cloud administrator
>>>> really challenging to follow the dynamics of guest activities were to
>>>> manage hundreds or thousands of virtual machines...
>>>>
>>>> It would be easier, in my opinion, to grasp some well-defined handle
>>>> that is easily predictable or fixed across the board, for looking up the
>>>> control virtqueue. This could be a constant host queue index, or a
>>>> special magic keyword like "qidx ctrlvq". If cloud admin runs vstat
>>>> query on the control vq using a determined handle but get nothing back,
>>>> then s/he knows *for sure* the control vq was not available for some
>>>> reason at the point when the stat was being collected. S/he doesn't even
>>>> need to care negotiated status via "vdpa dev show" at all. Why bother?
>>> So, per my suggestion above, passing the negotiated attribute in the netlink
>>> message would satisfy the requirements for atomicity, right?
>> Yes, it satisfied the atomicity requirement, though not sure how you
>> want to represent the queue index for control vq? Basically if cloud
>> admin wants to dump control queue stats explicitly with a fixed handle
>> or identifier, how that can be done with the negotiated attribute?
>>
>> Thanks,
>> -Siwei
>>>>> Do you still hold see your proposal required?
>>>> Yes, this is essential to any cloud admin that runs stat query on all of
>>>> the queues on periodic basis. You'd get some deterministic without
>>>> blindly guessing or bothering other irrelevant command.
>>>>
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 8
>>>>>> {
>>>>>>         "vstats": {
>>>>>>             "vdpa-a": {
>>>>>>                 "queue_stats": [{
>>>>>>                     "queue_index_device": 8,
>>>>>>                     "queue_index_driver": 2,
>>>>>>                     "queue_type": "control_vq",
>>>>>>                     "stat_name": [ "received_desc","completed_desc" ],
>>>>>>                     "stat_value": [ 417776,417775 ],
>>>>>>                 }]
>>>>>>             }
>>>>>>         }
>>>>>> }
>>>>>>
>>>>>> Optionally, user may use guest queue index gqidx, which is kind of an
>>>>>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
>>>>>> specific guest queue:
>>>>>>
>>>>>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
>>>>>> {
>>>>>>         "vstats": {
>>>>>>             "vdpa-a": {
>>>>>>                 "queue_stats": [{
>>>>>>                     "queue_index_device": 8,
>>>>>>                     "queue_index_driver": 2,
>>>>>>                     "queue_type": "control_vq",
>>>>>>                     "stat_name": [ "received_desc","completed_desc" ],
>>>>>>                     "stat_value": [ 417776,417775 ],
>>>>>>                 }]
>>>>>>             }
>>>>>>         }
>>>>>> }
>>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>>> Thanks,
>>>>>>>> -Siwei
>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>> -Siwei
>>>>>>>>>>
>>>>>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
>>>>>>>>>>>>>> understand.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>                      "queue_index": 0,
>>>>>>>>>>>>> I think this can be removed since the command is for a specific index.
>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>                      "name": "received_desc",
>>>>>>>>>>>>>>>>>                      "value": 417776,
>>>>>>>>>>>>>>>>>                      "name": "completed_desc",
>>>>>>>>>>>>>>>>>                      "value": 417548
>>>>>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
>>>>>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
>>>>>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
>>>>>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
>>>>>>>>>>>>>> This comment is missed for some reason. Please change the example
>>>>>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
>>>>>>>>>>>>>> keeping the order for json hash/dictionary is fine.
>>>>>>>>>>>>> Sorry for skipping this comment.
>>>>>>>>>>>>> Do you mean to present the information like:
>>>>>>>>>>>>> "received_desc": 417776,
>>>>>>>>>>>>> "completed_desc": 417548,
>>>>>>>>>>>> I mean the following presentation:
>>>>>>>>>>>>
>>>>>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>>>>>>            "vstats": {
>>>>>>>>>>>>                "vdpa-a": {
>>>>>>>>>>>>                    "queue_stats": [{
>>>>>>>>>>>>                        "queue_index": 0,
>>>>>>>>>>>>                        "queue_type": "rx",
>>>>>>>>>>>>                        "stat_name": [ "received_desc","completed_desc" ],
>>>>>>>>>>>>                        "stat_value": [ 417776,417548 ],
>>>>>>>>>>>>                    }]
>>>>>>>>>>>>                }
>>>>>>>>>>>>            }
>>>>>>>>>>>> }
>>>>>>>>>>>>
>>>>>>>>>>>> I think Parav had similar suggestion, too.
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>>>>>                  }
>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>>           drivers/vdpa/vdpa.c       | 129
>>>>>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>>>>>>>>           include/linux/vdpa.h      |   5 ++
>>>>>>>>>>>>>>>>>           include/uapi/linux/vdpa.h |   7 +++
>>>>>>>>>>>>>>>>>           3 files changed, 141 insertions(+)
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>>>>>>>>>>>> *vdev,
>>>>>>>>>>>>>> struct sk_buff *msg, u32 portid,
>>>>>>>>>>>>>>>>>                  return err;
>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>>>>>>>>>>>> +sk_buff
>>>>>>>>>>>>>> *msg,
>>>>>>>>>>>>>>>>> +                              struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
>>>>>>>>>>>>>>>>> +info-
>>>>>>>>>>>>>>> extack);
>>>>>>>>>>>>>>>>> +       if (err)
>>>>>>>>>>>>>>>>> +               return err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       return 0;
>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
>>>>>>>>>>>>>>>>> +sk_buff
>>>>>>>>>>>> *msg,
>>>>>>>>>>>>>>>>> +                            struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       if (!vdev->config->get_vendor_vq_stats)
>>>>>>>>>>>>>>>>> +               return -EOPNOTSUPP;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>>>>>>>>>>>> +       if (err)
>>>>>>>>>>>>>>>>> +               return err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       return 0;
>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>>>>>>>>>>>> +                                     struct sk_buff *msg,
>>>>>>>>>>>>>>>>> +                                     struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>>>> +       u32 device_id;
>>>>>>>>>>>>>>>>> +       void *hdr;
>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>> +       u32 portid = info->snd_portid;
>>>>>>>>>>>>>>>>> +       u32 seq = info->snd_seq;
>>>>>>>>>>>>>>>>> +       u32 flags = 0;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>>>>>>>>>>>> +                         VDPA_CMD_DEV_VSTATS_GET);
>>>>>>>>>>>>>>>>> +       if (!hdr)
>>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>>>>>>>>>> dev_name(&vdev-
>>>>>>>>>>>>>>> dev))) {
>>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
>>>>>>>>>>>>>>>>> +               goto undo_msg;
>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       device_id = vdev->config->get_device_id(vdev);
>>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
>>>>>>>>>>>>>>>>> +               goto undo_msg;
>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       genlmsg_end(msg, hdr);
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +undo_msg:
>>>>>>>>>>>>>>>>> +       genlmsg_cancel(msg, hdr);
>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>           static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>>>>>>>>>>>> *skb, struct
>>>>>>>>>>>>>> genl_info *info)
>>>>>>>>>>>>>>>>>           {
>>>>>>>>>>>>>>>>>                  struct vdpa_device *vdev;
>>>>>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
>>>>>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>>>>>>>>>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>>>>>>>>>>>                  return msg->len;
>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>>>>>>>>>>>> +                                         struct genl_info *info)
>>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>>> +       struct vdpa_device *vdev;
>>>>>>>>>>>>>>>>> +       struct sk_buff *msg;
>>>>>>>>>>>>>>>>> +       const char *devname;
>>>>>>>>>>>>>>>>> +       struct device *dev;
>>>>>>>>>>>>>>>>> +       u32 index;
>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>>>>>>>>>>>> +               return -EINVAL;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>>>>>>>>>>>> +               return -EINVAL;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>>>>>>>>>>>> +       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>>>>>>>>>>>> +       if (!msg)
>>>>>>>>>>>>>>>>> +               return -ENOMEM;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       index = nla_get_u32(info-
>>>>>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>>>>>>>>>>>> +       mutex_lock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>>>> +       dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>>>>>>>>>>>> vdpa_name_match);
>>>>>>>>>>>>>>>>> +       if (!dev) {
>>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "device not
>>>>>>>>>> found");
>>>>>>>>>>>>>>>>> +               err = -ENODEV;
>>>>>>>>>>>>>>>>> +               goto dev_err;
>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>> +       vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>>>>>>>>>>>> +       if (!vdev->mdev) {
>>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>>>>>>>>>> vdpa
>>>>>>>>>>>>>> device");
>>>>>>>>>>>>>>>>> +               err = -EINVAL;
>>>>>>>>>>>>>>>>> +               goto mdev_err;
>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>> +       err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>>>>>> +       if (!err)
>>>>>>>>>>>>>>>>> +               err = genlmsg_reply(msg, info);
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       put_device(dev);
>>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       if (err)
>>>>>>>>>>>>>>>>> +               nlmsg_free(msg);
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +mdev_err:
>>>>>>>>>>>>>>>>> +       put_device(dev);
>>>>>>>>>>>>>>>>> +dev_err:
>>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>           static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>>>>>>>>>> = {
>>>>>>>>>>>>>>>>>                  [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>>>>>>>>>>>> NLA_NUL_STRING },
>>>>>>>>>>>>>>>>>                  [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>>>>>>>>>>>> }, @@ -
>>>>>>>>>>>>>> 997,6
>>>>>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
>>>>>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>>>>>>>>>>>                  [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>>>>>>>>>>>> NLA_POLICY_ETH_ADDR,
>>>>>>>>>>>>>>>>>                  /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>>>>>>>>>>>                  [VDPA_ATTR_DEV_NET_CFG_MTU] =
>>>>>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>>>>>>>>>>>> +       [VDPA_ATTR_DEV_QUEUE_INDEX] =
>>>>>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
>>>>>>>>>>>>>> 65535),
>>>>>>>>>>>>>>>>>           };
>>>>>>>>>>>>>>>>>           static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>>>>>>>>>>>                          .doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>>>>>>>>>>>                          .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>>>>>>>>>>>                  },
>>>>>>>>>>>>>>>>> +       {
>>>>>>>>>>>>>>>>> +               .cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>>>>> +               .validate = GENL_DONT_VALIDATE_STRICT |
>>>>>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>>>>>>>>>>>> +               .doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>>>>>>>>>>>> +               .flags = GENL_ADMIN_PERM,
>>>>>>>>>>>>>>>>> +       },
>>>>>>>>>>>>>>>>>           };
>>>>>>>>>>>>>>>>>           static struct genl_family vdpa_nl_family __ro_after_init =
>>>>>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>>>>>>>>>>>> --- a/include/linux/vdpa.h
>>>>>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>>>>>>>>>>>                                      const struct vdpa_vq_state *state);
>>>>>>>>>>>>>>>>>                  int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>>>>>                                      struct vdpa_vq_state *state);
>>>>>>>>>>>>>>>>> +       int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>>>>> +                                  struct sk_buff *msg,
>>>>>>>>>>>>>>>>> +                                  struct netlink_ext_ack *extack);
>>>>>>>>>>>>>>>>>                  struct vdpa_notification_area
>>>>>>>>>>>>>>>>>                  (*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>>>>>>>>>>>                  /* vq irq is not expected to be changed once DRIVER_OK is
>>>>>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>>>>>>>>>>>           int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>>>>>           void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>           #endif /* _LINUX_VDPA_H */
>>>>>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
>>>>>>>>>>>>>>>>> 100644
>>>>>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>>>>>>>>>>>                  VDPA_CMD_DEV_DEL,
>>>>>>>>>>>>>>>>>                  VDPA_CMD_DEV_GET,               /* can dump */
>>>>>>>>>>>>>>>>>                  VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
>>>>>>>>>>>>>>>>> +       VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>>>>>           };
>>>>>>>>>>>>>>>>>           enum vdpa_attr {
>>>>>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>>>>>>>>>>>                  VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
>>>>>>>>>>>>>>>>>                  VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
>>>>>>>>>>>> u32 */
>>>>>>>>>>>>>>>>>                  VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
>>>>>>>>>> string */
>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>                  /* new attributes must be added above here */
>>>>>>>>>>>>>>>>>                  VDPA_ATTR_MAX,
>>>>>>>>>>>>>>>>>           };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
  2022-03-18  0:58                                   ` Si-Wei Liu
@ 2022-03-18  2:27                                     ` Jason Wang
  2022-03-19  5:18                                       ` Si-Wei Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Jason Wang @ 2022-03-18  2:27 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: lvivier, mst, virtualization, eperezma, Eli Cohen

On Fri, Mar 18, 2022 at 8:59 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 3/16/2022 7:32 PM, Jason Wang wrote:
> > On Thu, Mar 17, 2022 at 6:00 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 3/16/2022 12:10 AM, Eli Cohen wrote:
> >>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
> >>>> Sent: Wednesday, March 16, 2022 8:52 AM
> >>>> To: Eli Cohen <elic@nvidia.com>
> >>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> >>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
> >>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
> >>>>
> >>>>
> >>>>
> >>>> On 3/15/2022 2:10 AM, Eli Cohen wrote:
> >>>>
> >>>> <...snip...>
> >>>>
> >>>>>> Say you got a vdpa net device created with 4 data queue pairs and a
> >>>>>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
> >>>>>> not F_MQ, then the index for the control vq in guest ends up with 2, as
> >>>>>> in this case there's only a single queue pair enabled for rx (index 0)
> >>>>>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
> >>>>>> control vq is the last vq following 8
> >>>>> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
> >>>>> that control VQ index is 2
> >>>> Right, but I don't see this feature negotiation info getting returned
> >>>> from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do
> >>>> you plan for host user to get this info? If you meant another "vdpa dev
> >>>> show" command to query negotiated features ahead, this won't get the
> >>>> same lock protected as the time you run the stat query. It's very easy
> >>>> to miss that ephemeral queue index.
> >>> Right, so I suggested to include the negotiated features in the netlink message
> >>> for the statistics. That would save us from using two system calls to get the
> >>> information required and it answers your concern with respect to locking.
> >>> I think Jason was reluctant to adding this attribute to the message but can't
> >>> find where he explained the reasoning.
> >> Maybe Jason can clarify and correct me, but I just did not get the same
> >> impression as what you said? I just skimmed through all of the emails in
> >> the thread, only finding that he didn't want device specific attribute
> >> such as queue type to get returned by the vdpa core, which I agree. I'm
> >> not sure if he's explicitly against piggyback negotiated features to aid
> >> userspace parsing the index.
> > I think we need piggyback the negotiated features, otherwise as you
> > mentioned, we will probably get in-consistency.
> Great. Thanks for confirming it.
>
> >
> > But a question for the "host queue index", as mentioned before. It's
> > something that is not defined in the spec, so technically, vendor can
> > do any mappings between it and the index what guest can see. I feel
> > like we need to clarify it in the spec first.
> I have been thinking about this for some while today. Actually I am not
> against exposing the host queue index to the spec, as we know it's
> somewhat implicitly defined in the QEMU device model for multiqueue. The
> thing is, I'm not sure if there's extra benefit than this minor
> requirement (*) given that all of the other vDPA kAPI are taking the
> guest queue index rather than the host queue index.

Rethink of this, consider currently we do this via vendor stats, so
it's probably fine. Maybe we can have a better netlink API like
"vendor_queue_index" etc then everything should be fine.

> It works for
> mlx5_vdpa as the control vq is implemented in the software, so it can
> map to whatever guest qindex it wishes to. But would it cause extra
> trouble for some other emulated vDPA device or other vendor's vDPA such
> as ifcvf to fabricate a fake mapping between the host queue index and
> the one guest can see? I would have to send a heads-up ahead that the
> current vhost-vdpa mq implementation in upstream QEMU has some issue in
> mapping the host qindex to the guest one. This would become a problem
> with MQ enabled vdpa device and a non-MQ supporting guest e.g. OVMF, for
> which I'm about to share some RFC patches shortly to demonstrate the
> issue.

Sure.

> If exposing the host queue index to the spec turns is essential
> to resolving this issue and maybe help with software virtio QEMU
> implementation too, I won't hesitate to expose this important
> implementation detail to the spec.
>
> (*) another means that may somehow address my use case is to use some
> magic keyword e.g. "ctrlvq" to identify the control vq. Implementation
> wise, we can extensively pass -1 to indicate the last guest qindex to
> the get_vq_vstat() API given that we know for sure the ctrlvq is the
> last queue in the array when the relevant features are present. Since
> the negotiated features are piggybacked, it's not hard for the vdpa tool
> to tell apart whether the last queue is a control vq or not.

For virtqueue index (guest index) defined in the spec, I'd let
userspace to deduce it.

But for the host or vendor index, we probably can do this.

(Btw, I feel like we need to separate the features, if we agree to go
with host/vendor index, we can let guest index part in first).

Thanks

>
> I'd also welcome other ideas that can make virtqueue identification
> easier and predictable from the CLI.
>
> Thanks,
> -Siwei
>
> >
> > Thanks
> >
> >> Another way around, vdpa tool may pass down -1 to get_vq_vstat() to
> >> represent the queue index for the control queue - but that's less
> >> favorable as the vdpa core needs to maintain device specific knowledge.
> >>
> >>
> >>
> >>>>>> data vqs of all 4 pairs, hence got
> >>>>>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
> >>>>>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
> >>>>>> stats available, and the rest of qindex would get invalid/empty stat.
> >>>>>>
> >>>>>> Later on say boot continues towards loading the Linux virtio driver,
> >>>>>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
> >>>>>> features. In this case, all 8 data virtqueues are fully enabled, the
> >>>>>> index for the control vq ends up as 8, following tightly after all the 4
> >>>>>> data queue pairs. Only until both features are negotiated, the guest and
> >>>>>> host are able to see consistent view in identifying the control vq.
> >>>>>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
> >>>>>> should have vendor stats available.
> >>>>>>
> >>>>>> That's why I said the guest qindex is ephemeral and hard to predict
> >>>>>> subjected to negotiated features, but host qindex is reliable and more
> >>>>>> eligible for command line identification purpose.
> >>>>>>
> >>>> <...snip...>
> >>>>>>> So what are you actually proposing? Display received and completed descriptors
> >>>>>>> per queue index without further interpretation?
> >>>>>> I'd suggest using a more stable queue id i.e. the host queue index to
> >>>>>> represent the qidx (which seems to be what you're doing now?), and
> >>>>>> displaying both the host qindex (queue_index_device in the example
> >>>>>> below), as well as the guest's (queue_index_driver as below) in the output:
> >>>>>>
> >>>>> Given that per vdpa device you can display statistics only after features have
> >>>>> been negotiated, you can always know the correct queue index for the control
> >>>>> VQ.
> >>>> The stats can be displayed only after features are negotiated, and only
> >>>> when the corresponding queue is enabled. If you know it from "vdpa dev
> >>>> show" on day 1 that the control vq and mq features are negotiated, but
> >>>> then on day2 you got nothing for the predicted control vq index, what
> >>>> would you recommend the host admin to do to get the right qindex again?
> >>>> Re-run the stat query on the same queue index, or check the "vdpa dev
> >>>> show" output again on day 3? This CLI design makes cloud administrator
> >>>> really challenging to follow the dynamics of guest activities were to
> >>>> manage hundreds or thousands of virtual machines...
> >>>>
> >>>> It would be easier, in my opinion, to grasp some well-defined handle
> >>>> that is easily predictable or fixed across the board, for looking up the
> >>>> control virtqueue. This could be a constant host queue index, or a
> >>>> special magic keyword like "qidx ctrlvq". If cloud admin runs vstat
> >>>> query on the control vq using a determined handle but get nothing back,
> >>>> then s/he knows *for sure* the control vq was not available for some
> >>>> reason at the point when the stat was being collected. S/he doesn't even
> >>>> need to care negotiated status via "vdpa dev show" at all. Why bother?
> >>> So, per my suggestion above, passing the negotiated attribute in the netlink
> >>> message would satisfy the requirements for atomicity, right?
> >> Yes, it satisfied the atomicity requirement, though not sure how you
> >> want to represent the queue index for control vq? Basically if cloud
> >> admin wants to dump control queue stats explicitly with a fixed handle
> >> or identifier, how that can be done with the negotiated attribute?
> >>
> >> Thanks,
> >> -Siwei
> >>>>> Do you still hold see your proposal required?
> >>>> Yes, this is essential to any cloud admin that runs stat query on all of
> >>>> the queues on periodic basis. You'd get some deterministic without
> >>>> blindly guessing or bothering other irrelevant command.
> >>>>
> >>>>
> >>>> Thanks,
> >>>> -Siwei
> >>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 8
> >>>>>> {
> >>>>>>         "vstats": {
> >>>>>>             "vdpa-a": {
> >>>>>>                 "queue_stats": [{
> >>>>>>                     "queue_index_device": 8,
> >>>>>>                     "queue_index_driver": 2,
> >>>>>>                     "queue_type": "control_vq",
> >>>>>>                     "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>                     "stat_value": [ 417776,417775 ],
> >>>>>>                 }]
> >>>>>>             }
> >>>>>>         }
> >>>>>> }
> >>>>>>
> >>>>>> Optionally, user may use guest queue index gqidx, which is kind of an
> >>>>>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
> >>>>>> specific guest queue:
> >>>>>>
> >>>>>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
> >>>>>> {
> >>>>>>         "vstats": {
> >>>>>>             "vdpa-a": {
> >>>>>>                 "queue_stats": [{
> >>>>>>                     "queue_index_device": 8,
> >>>>>>                     "queue_index_driver": 2,
> >>>>>>                     "queue_type": "control_vq",
> >>>>>>                     "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>                     "stat_value": [ 417776,417775 ],
> >>>>>>                 }]
> >>>>>>             }
> >>>>>>         }
> >>>>>> }
> >>>>>>
> >>>>>> Thanks,
> >>>>>> -Siwei
> >>>>>>
> >>>>>>>> Thanks,
> >>>>>>>> -Siwei
> >>>>>>>>
> >>>>>>>>>> Regards,
> >>>>>>>>>> -Siwei
> >>>>>>>>>>
> >>>>>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
> >>>>>>>>>>>>>> understand.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>                      "queue_index": 0,
> >>>>>>>>>>>>> I think this can be removed since the command is for a specific index.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>                      "name": "received_desc",
> >>>>>>>>>>>>>>>>>                      "value": 417776,
> >>>>>>>>>>>>>>>>>                      "name": "completed_desc",
> >>>>>>>>>>>>>>>>>                      "value": 417548
> >>>>>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
> >>>>>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
> >>>>>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
> >>>>>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
> >>>>>>>>>>>>>> This comment is missed for some reason. Please change the example
> >>>>>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
> >>>>>>>>>>>>>> keeping the order for json hash/dictionary is fine.
> >>>>>>>>>>>>> Sorry for skipping this comment.
> >>>>>>>>>>>>> Do you mean to present the information like:
> >>>>>>>>>>>>> "received_desc": 417776,
> >>>>>>>>>>>>> "completed_desc": 417548,
> >>>>>>>>>>>> I mean the following presentation:
> >>>>>>>>>>>>
> >>>>>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> >>>>>>>>>>>>            "vstats": {
> >>>>>>>>>>>>                "vdpa-a": {
> >>>>>>>>>>>>                    "queue_stats": [{
> >>>>>>>>>>>>                        "queue_index": 0,
> >>>>>>>>>>>>                        "queue_type": "rx",
> >>>>>>>>>>>>                        "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>>>>>>>                        "stat_value": [ 417776,417548 ],
> >>>>>>>>>>>>                    }]
> >>>>>>>>>>>>                }
> >>>>>>>>>>>>            }
> >>>>>>>>>>>> }
> >>>>>>>>>>>>
> >>>>>>>>>>>> I think Parav had similar suggestion, too.
> >>>>>>>>>>>>
> >>>>>>>>>>>> Thanks,
> >>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>
> >>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>>>>>>                  }
> >>>>>>>>>>>>>>>>>              }
> >>>>>>>>>>>>>>>>> }
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>>>>>>>>>>>>>>> ---
> >>>>>>>>>>>>>>>>>           drivers/vdpa/vdpa.c       | 129
> >>>>>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
> >>>>>>>>>>>>>>>>>           include/linux/vdpa.h      |   5 ++
> >>>>>>>>>>>>>>>>>           include/uapi/linux/vdpa.h |   7 +++
> >>>>>>>>>>>>>>>>>           3 files changed, 141 insertions(+)
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> >>>>>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
> >>>>>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
> >>>>>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
> >>>>>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
> >>>>>>>>>>>>>>>>> *vdev,
> >>>>>>>>>>>>>> struct sk_buff *msg, u32 portid,
> >>>>>>>>>>>>>>>>>                  return err;
> >>>>>>>>>>>>>>>>>           }
> >>>>>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
> >>>>>>>>>>>>>>>>> +sk_buff
> >>>>>>>>>>>>>> *msg,
> >>>>>>>>>>>>>>>>> +                              struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> >>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
> >>>>>>>>>>>>>>>>> +info-
> >>>>>>>>>>>>>>> extack);
> >>>>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>>>> +               return err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       return 0;
> >>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
> >>>>>>>>>>>>>>>>> +sk_buff
> >>>>>>>>>>>> *msg,
> >>>>>>>>>>>>>>>>> +                            struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       if (!vdev->config->get_vendor_vq_stats)
> >>>>>>>>>>>>>>>>> +               return -EOPNOTSUPP;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       err = vdpa_fill_stats_rec(vdev, msg, info, index);
> >>>>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>>>> +               return err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       return 0;
> >>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
> >>>>>>>>>>>>>>>>> +                                     struct sk_buff *msg,
> >>>>>>>>>>>>>>>>> +                                     struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>>>> +       u32 device_id;
> >>>>>>>>>>>>>>>>> +       void *hdr;
> >>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>> +       u32 portid = info->snd_portid;
> >>>>>>>>>>>>>>>>> +       u32 seq = info->snd_seq;
> >>>>>>>>>>>>>>>>> +       u32 flags = 0;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> >>>>>>>>>>>>>>>>> +                         VDPA_CMD_DEV_VSTATS_GET);
> >>>>>>>>>>>>>>>>> +       if (!hdr)
> >>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
> >>>>>>>>>> dev_name(&vdev-
> >>>>>>>>>>>>>>> dev))) {
> >>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
> >>>>>>>>>>>>>>>>> +               goto undo_msg;
> >>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       device_id = vdev->config->get_device_id(vdev);
> >>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> >>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
> >>>>>>>>>>>>>>>>> +               goto undo_msg;
> >>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       err = vendor_stats_fill(vdev, msg, info, index);
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       genlmsg_end(msg, hdr);
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +undo_msg:
> >>>>>>>>>>>>>>>>> +       genlmsg_cancel(msg, hdr);
> >>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>           static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
> >>>>>>>>>>>>>>>>> *skb, struct
> >>>>>>>>>>>>>> genl_info *info)
> >>>>>>>>>>>>>>>>>           {
> >>>>>>>>>>>>>>>>>                  struct vdpa_device *vdev;
> >>>>>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
> >>>>>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
> >>>>>>>>>>>>>> sk_buff *msg, struct netlink_callback *
> >>>>>>>>>>>>>>>>>                  return msg->len;
> >>>>>>>>>>>>>>>>>           }
> >>>>>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> >>>>>>>>>>>>>>>>> +                                         struct genl_info *info)
> >>>>>>>>>>>>>>>>> +{
> >>>>>>>>>>>>>>>>> +       struct vdpa_device *vdev;
> >>>>>>>>>>>>>>>>> +       struct sk_buff *msg;
> >>>>>>>>>>>>>>>>> +       const char *devname;
> >>>>>>>>>>>>>>>>> +       struct device *dev;
> >>>>>>>>>>>>>>>>> +       u32 index;
> >>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_NAME])
> >>>>>>>>>>>>>>>>> +               return -EINVAL;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> >>>>>>>>>>>>>>>>> +               return -EINVAL;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> >>>>>>>>>>>>>>>>> +       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> >>>>>>>>>>>>>>>>> +       if (!msg)
> >>>>>>>>>>>>>>>>> +               return -ENOMEM;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       index = nla_get_u32(info-
> >>>>>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> >>>>>>>>>>>>>>>>> +       mutex_lock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>>>> +       dev = bus_find_device(&vdpa_bus, NULL, devname,
> >>>>>>>>>>>>>> vdpa_name_match);
> >>>>>>>>>>>>>>>>> +       if (!dev) {
> >>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "device not
> >>>>>>>>>> found");
> >>>>>>>>>>>>>>>>> +               err = -ENODEV;
> >>>>>>>>>>>>>>>>> +               goto dev_err;
> >>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>> +       vdev = container_of(dev, struct vdpa_device, dev);
> >>>>>>>>>>>>>>>>> +       if (!vdev->mdev) {
> >>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
> >>>>>>>>>> vdpa
> >>>>>>>>>>>>>> device");
> >>>>>>>>>>>>>>>>> +               err = -EINVAL;
> >>>>>>>>>>>>>>>>> +               goto mdev_err;
> >>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>> +       err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> >>>>>>>>>>>>>>>>> +       if (!err)
> >>>>>>>>>>>>>>>>> +               err = genlmsg_reply(msg, info);
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       put_device(dev);
> >>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>>>> +               nlmsg_free(msg);
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +mdev_err:
> >>>>>>>>>>>>>>>>> +       put_device(dev);
> >>>>>>>>>>>>>>>>> +dev_err:
> >>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>           static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
> >>>>>>>>>> = {
> >>>>>>>>>>>>>>>>>                  [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
> >>>>>>>>>>>> NLA_NUL_STRING },
> >>>>>>>>>>>>>>>>>                  [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
> >>>>>>>>>>>> }, @@ -
> >>>>>>>>>>>>>> 997,6
> >>>>>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
> >>>>>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
> >>>>>>>>>>>>>>>>>                  [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
> >>>>>>>>>>>> NLA_POLICY_ETH_ADDR,
> >>>>>>>>>>>>>>>>>                  /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
> >>>>>>>>>>>>>>>>>                  [VDPA_ATTR_DEV_NET_CFG_MTU] =
> >>>>>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
> >>>>>>>>>>>>>>>>> +       [VDPA_ATTR_DEV_QUEUE_INDEX] =
> >>>>>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
> >>>>>>>>>>>>>> 65535),
> >>>>>>>>>>>>>>>>>           };
> >>>>>>>>>>>>>>>>>           static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
> >>>>>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
> >>>>>>>>>>>>>>>>>                          .doit = vdpa_nl_cmd_dev_config_get_doit,
> >>>>>>>>>>>>>>>>>                          .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
> >>>>>>>>>>>>>>>>>                  },
> >>>>>>>>>>>>>>>>> +       {
> >>>>>>>>>>>>>>>>> +               .cmd = VDPA_CMD_DEV_VSTATS_GET,
> >>>>>>>>>>>>>>>>> +               .validate = GENL_DONT_VALIDATE_STRICT |
> >>>>>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
> >>>>>>>>>>>>>>>>> +               .doit = vdpa_nl_cmd_dev_stats_get_doit,
> >>>>>>>>>>>>>>>>> +               .flags = GENL_ADMIN_PERM,
> >>>>>>>>>>>>>>>>> +       },
> >>>>>>>>>>>>>>>>>           };
> >>>>>>>>>>>>>>>>>           static struct genl_family vdpa_nl_family __ro_after_init =
> >>>>>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
> >>>>>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
> >>>>>>>>>>>>>>>>> --- a/include/linux/vdpa.h
> >>>>>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
> >>>>>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
> >>>>>>>>>>>>>>>>>                                      const struct vdpa_vq_state *state);
> >>>>>>>>>>>>>>>>>                  int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
> >>>>>>>>>>>>>>>>>                                      struct vdpa_vq_state *state);
> >>>>>>>>>>>>>>>>> +       int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
> >>>>>>>>>>>>>>>>> +                                  struct sk_buff *msg,
> >>>>>>>>>>>>>>>>> +                                  struct netlink_ext_ack *extack);
> >>>>>>>>>>>>>>>>>                  struct vdpa_notification_area
> >>>>>>>>>>>>>>>>>                  (*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
> >>>>>>>>>>>>>>>>>                  /* vq irq is not expected to be changed once DRIVER_OK is
> >>>>>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
> >>>>>>>>>>>>>>>>>           int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
> >>>>>>>>>>>>>>>>>           void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
> >>>>>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>           #endif /* _LINUX_VDPA_H */
> >>>>>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
> >>>>>>>>>>>>>>>>> 100644
> >>>>>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
> >>>>>>>>>>>>>>>>>                  VDPA_CMD_DEV_DEL,
> >>>>>>>>>>>>>>>>>                  VDPA_CMD_DEV_GET,               /* can dump */
> >>>>>>>>>>>>>>>>>                  VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
> >>>>>>>>>>>>>>>>> +       VDPA_CMD_DEV_VSTATS_GET,
> >>>>>>>>>>>>>>>>>           };
> >>>>>>>>>>>>>>>>>           enum vdpa_attr {
> >>>>>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
> >>>>>>>>>>>>>>>>>                  VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
> >>>>>>>>>>>>>>>>>                  VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
> >>>>>>>>>>>> u32 */
> >>>>>>>>>>>>>>>>>                  VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> >>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
> >>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
> >>>>>>>>>> string */
> >>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>                  /* new attributes must be added above here */
> >>>>>>>>>>>>>>>>>                  VDPA_ATTR_MAX,
> >>>>>>>>>>>>>>>>>           };
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
  2022-03-18  2:27                                     ` Jason Wang
@ 2022-03-19  5:18                                       ` Si-Wei Liu
  2022-03-22  3:51                                         ` Jason Wang
  0 siblings, 1 reply; 20+ messages in thread
From: Si-Wei Liu @ 2022-03-19  5:18 UTC (permalink / raw)
  To: Jason Wang; +Cc: lvivier, mst, virtualization, eperezma, Eli Cohen



On 3/17/2022 7:27 PM, Jason Wang wrote:
> On Fri, Mar 18, 2022 at 8:59 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 3/16/2022 7:32 PM, Jason Wang wrote:
>>> On Thu, Mar 17, 2022 at 6:00 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>
>>>> On 3/16/2022 12:10 AM, Eli Cohen wrote:
>>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>> Sent: Wednesday, March 16, 2022 8:52 AM
>>>>>> To: Eli Cohen <elic@nvidia.com>
>>>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
>>>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
>>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 3/15/2022 2:10 AM, Eli Cohen wrote:
>>>>>>
>>>>>> <...snip...>
>>>>>>
>>>>>>>> Say you got a vdpa net device created with 4 data queue pairs and a
>>>>>>>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
>>>>>>>> not F_MQ, then the index for the control vq in guest ends up with 2, as
>>>>>>>> in this case there's only a single queue pair enabled for rx (index 0)
>>>>>>>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
>>>>>>>> control vq is the last vq following 8
>>>>>>> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
>>>>>>> that control VQ index is 2
>>>>>> Right, but I don't see this feature negotiation info getting returned
>>>>>> from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do
>>>>>> you plan for host user to get this info? If you meant another "vdpa dev
>>>>>> show" command to query negotiated features ahead, this won't get the
>>>>>> same lock protected as the time you run the stat query. It's very easy
>>>>>> to miss that ephemeral queue index.
>>>>> Right, so I suggested to include the negotiated features in the netlink message
>>>>> for the statistics. That would save us from using two system calls to get the
>>>>> information required and it answers your concern with respect to locking.
>>>>> I think Jason was reluctant to adding this attribute to the message but can't
>>>>> find where he explained the reasoning.
>>>> Maybe Jason can clarify and correct me, but I just did not get the same
>>>> impression as what you said? I just skimmed through all of the emails in
>>>> the thread, only finding that he didn't want device specific attribute
>>>> such as queue type to get returned by the vdpa core, which I agree. I'm
>>>> not sure if he's explicitly against piggyback negotiated features to aid
>>>> userspace parsing the index.
>>> I think we need piggyback the negotiated features, otherwise as you
>>> mentioned, we will probably get in-consistency.
>> Great. Thanks for confirming it.
>>
>>> But a question for the "host queue index", as mentioned before. It's
>>> something that is not defined in the spec, so technically, vendor can
>>> do any mappings between it and the index what guest can see. I feel
>>> like we need to clarify it in the spec first.
>> I have been thinking about this for some while today. Actually I am not
>> against exposing the host queue index to the spec, as we know it's
>> somewhat implicitly defined in the QEMU device model for multiqueue. The
>> thing is, I'm not sure if there's extra benefit than this minor
>> requirement (*) given that all of the other vDPA kAPI are taking the
>> guest queue index rather than the host queue index.
> Rethink of this, consider currently we do this via vendor stats, so
> it's probably fine. Maybe we can have a better netlink API like
> "vendor_queue_index" etc then everything should be fine.
True. Or if there's netlink API that simply dumps the stats for all of 
the available queues in one shot, that would serve our cloud use case 
quite well. :)

>
>> It works for
>> mlx5_vdpa as the control vq is implemented in the software, so it can
>> map to whatever guest qindex it wishes to. But would it cause extra
>> trouble for some other emulated vDPA device or other vendor's vDPA such
>> as ifcvf to fabricate a fake mapping between the host queue index and
>> the one guest can see? I would have to send a heads-up ahead that the
>> current vhost-vdpa mq implementation in upstream QEMU has some issue in
>> mapping the host qindex to the guest one. This would become a problem
>> with MQ enabled vdpa device and a non-MQ supporting guest e.g. OVMF, for
>> which I'm about to share some RFC patches shortly to demonstrate the
>> issue.
> Sure.
Please see the RFC patch just sent with the subject "vhost_net: should 
not use max_queue_pairs for non-mq guest", option #3 is to leverage host 
queue index.

>
>> If exposing the host queue index to the spec turns is essential
>> to resolving this issue and maybe help with software virtio QEMU
>> implementation too, I won't hesitate to expose this important
>> implementation detail to the spec.
>>
>> (*) another means that may somehow address my use case is to use some
>> magic keyword e.g. "ctrlvq" to identify the control vq. Implementation
>> wise, we can extensively pass -1 to indicate the last guest qindex to
>> the get_vq_vstat() API given that we know for sure the ctrlvq is the
>> last queue in the array when the relevant features are present. Since
>> the negotiated features are piggybacked, it's not hard for the vdpa tool
>> to tell apart whether the last queue is a control vq or not.
> For virtqueue index (guest index) defined in the spec, I'd let
> userspace to deduce it.
OK, that'll be fine. Although I thought by extending 
get_vendor_vq_vstat() a bit, the virtqueue index is still guest based, 
from which the userspace can deduce control vq for its own.

> But for the host or vendor index, we probably can do this.
Does vendor index means it's optional and vendor specific, host index 
means it is mandated and universal to all vendors? I hope we can define 
some generic indexing scheme for virtio stats defined in the spec across 
all vendor's devices, while limiting vendor's flexibility to define its 
own index mapping to only those vendor stats.

> (Btw, I feel like we need to separate the features, if we agree to go
> with host/vendor index, we can let guest index part in first).
OK. Sounds like a plan. Thanks Jason.

Thanks,
-Siwei

>
> Thanks
>
>> I'd also welcome other ideas that can make virtqueue identification
>> easier and predictable from the CLI.
>>
>> Thanks,
>> -Siwei
>>
>>> Thanks
>>>
>>>> Another way around, vdpa tool may pass down -1 to get_vq_vstat() to
>>>> represent the queue index for the control queue - but that's less
>>>> favorable as the vdpa core needs to maintain device specific knowledge.
>>>>
>>>>
>>>>
>>>>>>>> data vqs of all 4 pairs, hence got
>>>>>>>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
>>>>>>>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
>>>>>>>> stats available, and the rest of qindex would get invalid/empty stat.
>>>>>>>>
>>>>>>>> Later on say boot continues towards loading the Linux virtio driver,
>>>>>>>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
>>>>>>>> features. In this case, all 8 data virtqueues are fully enabled, the
>>>>>>>> index for the control vq ends up as 8, following tightly after all the 4
>>>>>>>> data queue pairs. Only until both features are negotiated, the guest and
>>>>>>>> host are able to see consistent view in identifying the control vq.
>>>>>>>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
>>>>>>>> should have vendor stats available.
>>>>>>>>
>>>>>>>> That's why I said the guest qindex is ephemeral and hard to predict
>>>>>>>> subjected to negotiated features, but host qindex is reliable and more
>>>>>>>> eligible for command line identification purpose.
>>>>>>>>
>>>>>> <...snip...>
>>>>>>>>> So what are you actually proposing? Display received and completed descriptors
>>>>>>>>> per queue index without further interpretation?
>>>>>>>> I'd suggest using a more stable queue id i.e. the host queue index to
>>>>>>>> represent the qidx (which seems to be what you're doing now?), and
>>>>>>>> displaying both the host qindex (queue_index_device in the example
>>>>>>>> below), as well as the guest's (queue_index_driver as below) in the output:
>>>>>>>>
>>>>>>> Given that per vdpa device you can display statistics only after features have
>>>>>>> been negotiated, you can always know the correct queue index for the control
>>>>>>> VQ.
>>>>>> The stats can be displayed only after features are negotiated, and only
>>>>>> when the corresponding queue is enabled. If you know it from "vdpa dev
>>>>>> show" on day 1 that the control vq and mq features are negotiated, but
>>>>>> then on day2 you got nothing for the predicted control vq index, what
>>>>>> would you recommend the host admin to do to get the right qindex again?
>>>>>> Re-run the stat query on the same queue index, or check the "vdpa dev
>>>>>> show" output again on day 3? This CLI design makes cloud administrator
>>>>>> really challenging to follow the dynamics of guest activities were to
>>>>>> manage hundreds or thousands of virtual machines...
>>>>>>
>>>>>> It would be easier, in my opinion, to grasp some well-defined handle
>>>>>> that is easily predictable or fixed across the board, for looking up the
>>>>>> control virtqueue. This could be a constant host queue index, or a
>>>>>> special magic keyword like "qidx ctrlvq". If cloud admin runs vstat
>>>>>> query on the control vq using a determined handle but get nothing back,
>>>>>> then s/he knows *for sure* the control vq was not available for some
>>>>>> reason at the point when the stat was being collected. S/he doesn't even
>>>>>> need to care negotiated status via "vdpa dev show" at all. Why bother?
>>>>> So, per my suggestion above, passing the negotiated attribute in the netlink
>>>>> message would satisfy the requirements for atomicity, right?
>>>> Yes, it satisfied the atomicity requirement, though not sure how you
>>>> want to represent the queue index for control vq? Basically if cloud
>>>> admin wants to dump control queue stats explicitly with a fixed handle
>>>> or identifier, how that can be done with the negotiated attribute?
>>>>
>>>> Thanks,
>>>> -Siwei
>>>>>>> Do you still hold see your proposal required?
>>>>>> Yes, this is essential to any cloud admin that runs stat query on all of
>>>>>> the queues on periodic basis. You'd get some deterministic without
>>>>>> blindly guessing or bothering other irrelevant command.
>>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 8
>>>>>>>> {
>>>>>>>>          "vstats": {
>>>>>>>>              "vdpa-a": {
>>>>>>>>                  "queue_stats": [{
>>>>>>>>                      "queue_index_device": 8,
>>>>>>>>                      "queue_index_driver": 2,
>>>>>>>>                      "queue_type": "control_vq",
>>>>>>>>                      "stat_name": [ "received_desc","completed_desc" ],
>>>>>>>>                      "stat_value": [ 417776,417775 ],
>>>>>>>>                  }]
>>>>>>>>              }
>>>>>>>>          }
>>>>>>>> }
>>>>>>>>
>>>>>>>> Optionally, user may use guest queue index gqidx, which is kind of an
>>>>>>>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
>>>>>>>> specific guest queue:
>>>>>>>>
>>>>>>>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
>>>>>>>> {
>>>>>>>>          "vstats": {
>>>>>>>>              "vdpa-a": {
>>>>>>>>                  "queue_stats": [{
>>>>>>>>                      "queue_index_device": 8,
>>>>>>>>                      "queue_index_driver": 2,
>>>>>>>>                      "queue_type": "control_vq",
>>>>>>>>                      "stat_name": [ "received_desc","completed_desc" ],
>>>>>>>>                      "stat_value": [ 417776,417775 ],
>>>>>>>>                  }]
>>>>>>>>              }
>>>>>>>>          }
>>>>>>>> }
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> -Siwei
>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> -Siwei
>>>>>>>>>>
>>>>>>>>>>>> Regards,
>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
>>>>>>>>>>>>>>>> understand.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>                       "queue_index": 0,
>>>>>>>>>>>>>>> I think this can be removed since the command is for a specific index.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>                       "name": "received_desc",
>>>>>>>>>>>>>>>>>>>                       "value": 417776,
>>>>>>>>>>>>>>>>>>>                       "name": "completed_desc",
>>>>>>>>>>>>>>>>>>>                       "value": 417548
>>>>>>>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
>>>>>>>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
>>>>>>>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
>>>>>>>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
>>>>>>>>>>>>>>>> This comment is missed for some reason. Please change the example
>>>>>>>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
>>>>>>>>>>>>>>>> keeping the order for json hash/dictionary is fine.
>>>>>>>>>>>>>>> Sorry for skipping this comment.
>>>>>>>>>>>>>>> Do you mean to present the information like:
>>>>>>>>>>>>>>> "received_desc": 417776,
>>>>>>>>>>>>>>> "completed_desc": 417548,
>>>>>>>>>>>>>> I mean the following presentation:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
>>>>>>>>>>>>>>             "vstats": {
>>>>>>>>>>>>>>                 "vdpa-a": {
>>>>>>>>>>>>>>                     "queue_stats": [{
>>>>>>>>>>>>>>                         "queue_index": 0,
>>>>>>>>>>>>>>                         "queue_type": "rx",
>>>>>>>>>>>>>>                         "stat_name": [ "received_desc","completed_desc" ],
>>>>>>>>>>>>>>                         "stat_value": [ 417776,417548 ],
>>>>>>>>>>>>>>                     }]
>>>>>>>>>>>>>>                 }
>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I think Parav had similar suggestion, too.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>> -Siwei
>>>>>>>>>>>>>>>>>>>                   }
>>>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
>>>>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>>>>            drivers/vdpa/vdpa.c       | 129
>>>>>>>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>>>>>>>>>>            include/linux/vdpa.h      |   5 ++
>>>>>>>>>>>>>>>>>>>            include/uapi/linux/vdpa.h |   7 +++
>>>>>>>>>>>>>>>>>>>            3 files changed, 141 insertions(+)
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
>>>>>>>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
>>>>>>>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
>>>>>>>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
>>>>>>>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
>>>>>>>>>>>>>>>>>>> *vdev,
>>>>>>>>>>>>>>>> struct sk_buff *msg, u32 portid,
>>>>>>>>>>>>>>>>>>>                   return err;
>>>>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
>>>>>>>>>>>>>>>>>>> +sk_buff
>>>>>>>>>>>>>>>> *msg,
>>>>>>>>>>>>>>>>>>> +                              struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
>>>>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
>>>>>>>>>>>>>>>>>>> +info-
>>>>>>>>>>>>>>>>> extack);
>>>>>>>>>>>>>>>>>>> +       if (err)
>>>>>>>>>>>>>>>>>>> +               return err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       return 0;
>>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
>>>>>>>>>>>>>>>>>>> +sk_buff
>>>>>>>>>>>>>> *msg,
>>>>>>>>>>>>>>>>>>> +                            struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       if (!vdev->config->get_vendor_vq_stats)
>>>>>>>>>>>>>>>>>>> +               return -EOPNOTSUPP;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       err = vdpa_fill_stats_rec(vdev, msg, info, index);
>>>>>>>>>>>>>>>>>>> +       if (err)
>>>>>>>>>>>>>>>>>>> +               return err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       return 0;
>>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
>>>>>>>>>>>>>>>>>>> +                                     struct sk_buff *msg,
>>>>>>>>>>>>>>>>>>> +                                     struct genl_info *info, u32 index) {
>>>>>>>>>>>>>>>>>>> +       u32 device_id;
>>>>>>>>>>>>>>>>>>> +       void *hdr;
>>>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>>>> +       u32 portid = info->snd_portid;
>>>>>>>>>>>>>>>>>>> +       u32 seq = info->snd_seq;
>>>>>>>>>>>>>>>>>>> +       u32 flags = 0;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
>>>>>>>>>>>>>>>>>>> +                         VDPA_CMD_DEV_VSTATS_GET);
>>>>>>>>>>>>>>>>>>> +       if (!hdr)
>>>>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
>>>>>>>>>>>> dev_name(&vdev-
>>>>>>>>>>>>>>>>> dev))) {
>>>>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
>>>>>>>>>>>>>>>>>>> +               goto undo_msg;
>>>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       device_id = vdev->config->get_device_id(vdev);
>>>>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
>>>>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
>>>>>>>>>>>>>>>>>>> +               goto undo_msg;
>>>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       err = vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       genlmsg_end(msg, hdr);
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +undo_msg:
>>>>>>>>>>>>>>>>>>> +       genlmsg_cancel(msg, hdr);
>>>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>>            static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
>>>>>>>>>>>>>>>>>>> *skb, struct
>>>>>>>>>>>>>>>> genl_info *info)
>>>>>>>>>>>>>>>>>>>            {
>>>>>>>>>>>>>>>>>>>                   struct vdpa_device *vdev;
>>>>>>>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
>>>>>>>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
>>>>>>>>>>>>>>>> sk_buff *msg, struct netlink_callback *
>>>>>>>>>>>>>>>>>>>                   return msg->len;
>>>>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
>>>>>>>>>>>>>>>>>>> +                                         struct genl_info *info)
>>>>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>>>>> +       struct vdpa_device *vdev;
>>>>>>>>>>>>>>>>>>> +       struct sk_buff *msg;
>>>>>>>>>>>>>>>>>>> +       const char *devname;
>>>>>>>>>>>>>>>>>>> +       struct device *dev;
>>>>>>>>>>>>>>>>>>> +       u32 index;
>>>>>>>>>>>>>>>>>>> +       int err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_NAME])
>>>>>>>>>>>>>>>>>>> +               return -EINVAL;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
>>>>>>>>>>>>>>>>>>> +               return -EINVAL;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
>>>>>>>>>>>>>>>>>>> +       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
>>>>>>>>>>>>>>>>>>> +       if (!msg)
>>>>>>>>>>>>>>>>>>> +               return -ENOMEM;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       index = nla_get_u32(info-
>>>>>>>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
>>>>>>>>>>>>>>>>>>> +       mutex_lock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>>>>>> +       dev = bus_find_device(&vdpa_bus, NULL, devname,
>>>>>>>>>>>>>>>> vdpa_name_match);
>>>>>>>>>>>>>>>>>>> +       if (!dev) {
>>>>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "device not
>>>>>>>>>>>> found");
>>>>>>>>>>>>>>>>>>> +               err = -ENODEV;
>>>>>>>>>>>>>>>>>>> +               goto dev_err;
>>>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>>>> +       vdev = container_of(dev, struct vdpa_device, dev);
>>>>>>>>>>>>>>>>>>> +       if (!vdev->mdev) {
>>>>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
>>>>>>>>>>>> vdpa
>>>>>>>>>>>>>>>> device");
>>>>>>>>>>>>>>>>>>> +               err = -EINVAL;
>>>>>>>>>>>>>>>>>>> +               goto mdev_err;
>>>>>>>>>>>>>>>>>>> +       }
>>>>>>>>>>>>>>>>>>> +       err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
>>>>>>>>>>>>>>>>>>> +       if (!err)
>>>>>>>>>>>>>>>>>>> +               err = genlmsg_reply(msg, info);
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       put_device(dev);
>>>>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       if (err)
>>>>>>>>>>>>>>>>>>> +               nlmsg_free(msg);
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +mdev_err:
>>>>>>>>>>>>>>>>>>> +       put_device(dev);
>>>>>>>>>>>>>>>>>>> +dev_err:
>>>>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
>>>>>>>>>>>>>>>>>>> +       return err;
>>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>>            static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
>>>>>>>>>>>> = {
>>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
>>>>>>>>>>>>>> NLA_NUL_STRING },
>>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
>>>>>>>>>>>>>> }, @@ -
>>>>>>>>>>>>>>>> 997,6
>>>>>>>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
>>>>>>>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
>>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
>>>>>>>>>>>>>> NLA_POLICY_ETH_ADDR,
>>>>>>>>>>>>>>>>>>>                   /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
>>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_DEV_NET_CFG_MTU] =
>>>>>>>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
>>>>>>>>>>>>>>>>>>> +       [VDPA_ATTR_DEV_QUEUE_INDEX] =
>>>>>>>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
>>>>>>>>>>>>>>>> 65535),
>>>>>>>>>>>>>>>>>>>            };
>>>>>>>>>>>>>>>>>>>            static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
>>>>>>>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
>>>>>>>>>>>>>>>>>>>                           .doit = vdpa_nl_cmd_dev_config_get_doit,
>>>>>>>>>>>>>>>>>>>                           .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
>>>>>>>>>>>>>>>>>>>                   },
>>>>>>>>>>>>>>>>>>> +       {
>>>>>>>>>>>>>>>>>>> +               .cmd = VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>>>>>>> +               .validate = GENL_DONT_VALIDATE_STRICT |
>>>>>>>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
>>>>>>>>>>>>>>>>>>> +               .doit = vdpa_nl_cmd_dev_stats_get_doit,
>>>>>>>>>>>>>>>>>>> +               .flags = GENL_ADMIN_PERM,
>>>>>>>>>>>>>>>>>>> +       },
>>>>>>>>>>>>>>>>>>>            };
>>>>>>>>>>>>>>>>>>>            static struct genl_family vdpa_nl_family __ro_after_init =
>>>>>>>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
>>>>>>>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
>>>>>>>>>>>>>>>>>>> --- a/include/linux/vdpa.h
>>>>>>>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
>>>>>>>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
>>>>>>>>>>>>>>>>>>>                                       const struct vdpa_vq_state *state);
>>>>>>>>>>>>>>>>>>>                   int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>>>>>>>                                       struct vdpa_vq_state *state);
>>>>>>>>>>>>>>>>>>> +       int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
>>>>>>>>>>>>>>>>>>> +                                  struct sk_buff *msg,
>>>>>>>>>>>>>>>>>>> +                                  struct netlink_ext_ack *extack);
>>>>>>>>>>>>>>>>>>>                   struct vdpa_notification_area
>>>>>>>>>>>>>>>>>>>                   (*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
>>>>>>>>>>>>>>>>>>>                   /* vq irq is not expected to be changed once DRIVER_OK is
>>>>>>>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
>>>>>>>>>>>>>>>>>>>            int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>>>>>>>            void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
>>>>>>>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>>            #endif /* _LINUX_VDPA_H */
>>>>>>>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
>>>>>>>>>>>>>>>>>>> 100644
>>>>>>>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
>>>>>>>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
>>>>>>>>>>>>>>>>>>>                   VDPA_CMD_DEV_DEL,
>>>>>>>>>>>>>>>>>>>                   VDPA_CMD_DEV_GET,               /* can dump */
>>>>>>>>>>>>>>>>>>>                   VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
>>>>>>>>>>>>>>>>>>> +       VDPA_CMD_DEV_VSTATS_GET,
>>>>>>>>>>>>>>>>>>>            };
>>>>>>>>>>>>>>>>>>>            enum vdpa_attr {
>>>>>>>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
>>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
>>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
>>>>>>>>>>>>>> u32 */
>>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
>>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
>>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
>>>>>>>>>>>> string */
>>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>>                   /* new attributes must be added above here */
>>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_MAX,
>>>>>>>>>>>>>>>>>>>            };

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
  2022-03-19  5:18                                       ` Si-Wei Liu
@ 2022-03-22  3:51                                         ` Jason Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Wang @ 2022-03-22  3:51 UTC (permalink / raw)
  To: Si-Wei Liu; +Cc: lvivier, mst, virtualization, eperezma, Eli Cohen

On Sat, Mar 19, 2022 at 1:18 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 3/17/2022 7:27 PM, Jason Wang wrote:
> > On Fri, Mar 18, 2022 at 8:59 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 3/16/2022 7:32 PM, Jason Wang wrote:
> >>> On Thu, Mar 17, 2022 at 6:00 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>>
> >>>> On 3/16/2022 12:10 AM, Eli Cohen wrote:
> >>>>>> From: Si-Wei Liu <si-wei.liu@oracle.com>
> >>>>>> Sent: Wednesday, March 16, 2022 8:52 AM
> >>>>>> To: Eli Cohen <elic@nvidia.com>
> >>>>>> Cc: mst@redhat.com; jasowang@redhat.com; virtualization@lists.linux-foundation.org; eperezma@redhat.com; amorenoz@redhat.com;
> >>>>>> lvivier@redhat.com; sgarzare@redhat.com; Parav Pandit <parav@nvidia.com>
> >>>>>> Subject: Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On 3/15/2022 2:10 AM, Eli Cohen wrote:
> >>>>>>
> >>>>>> <...snip...>
> >>>>>>
> >>>>>>>> Say you got a vdpa net device created with 4 data queue pairs and a
> >>>>>>>> control vq. On boot some guest firmware may support just F_CTRL_VQ but
> >>>>>>>> not F_MQ, then the index for the control vq in guest ends up with 2, as
> >>>>>>>> in this case there's only a single queue pair enabled for rx (index 0)
> >>>>>>>> and tx (index 1). From the host driver (e.g. mlx5_vdpa) perspective, the
> >>>>>>>> control vq is the last vq following 8
> >>>>>>> If the host sees F_MQ was not negotiated but F_CTRL_VQ was, then it knows
> >>>>>>> that control VQ index is 2
> >>>>>> Right, but I don't see this feature negotiation info getting returned
> >>>>>> from your vdpa_dev_vendor_stats_fill(), or did I miss something? How do
> >>>>>> you plan for host user to get this info? If you meant another "vdpa dev
> >>>>>> show" command to query negotiated features ahead, this won't get the
> >>>>>> same lock protected as the time you run the stat query. It's very easy
> >>>>>> to miss that ephemeral queue index.
> >>>>> Right, so I suggested to include the negotiated features in the netlink message
> >>>>> for the statistics. That would save us from using two system calls to get the
> >>>>> information required and it answers your concern with respect to locking.
> >>>>> I think Jason was reluctant to adding this attribute to the message but can't
> >>>>> find where he explained the reasoning.
> >>>> Maybe Jason can clarify and correct me, but I just did not get the same
> >>>> impression as what you said? I just skimmed through all of the emails in
> >>>> the thread, only finding that he didn't want device specific attribute
> >>>> such as queue type to get returned by the vdpa core, which I agree. I'm
> >>>> not sure if he's explicitly against piggyback negotiated features to aid
> >>>> userspace parsing the index.
> >>> I think we need piggyback the negotiated features, otherwise as you
> >>> mentioned, we will probably get in-consistency.
> >> Great. Thanks for confirming it.
> >>
> >>> But a question for the "host queue index", as mentioned before. It's
> >>> something that is not defined in the spec, so technically, vendor can
> >>> do any mappings between it and the index what guest can see. I feel
> >>> like we need to clarify it in the spec first.
> >> I have been thinking about this for some while today. Actually I am not
> >> against exposing the host queue index to the spec, as we know it's
> >> somewhat implicitly defined in the QEMU device model for multiqueue. The
> >> thing is, I'm not sure if there's extra benefit than this minor
> >> requirement (*) given that all of the other vDPA kAPI are taking the
> >> guest queue index rather than the host queue index.
> > Rethink of this, consider currently we do this via vendor stats, so
> > it's probably fine. Maybe we can have a better netlink API like
> > "vendor_queue_index" etc then everything should be fine.
> True. Or if there's netlink API that simply dumps the stats for all of
> the available queues in one shot, that would serve our cloud use case
> quite well. :)

This might be another option.

>
> >
> >> It works for
> >> mlx5_vdpa as the control vq is implemented in the software, so it can
> >> map to whatever guest qindex it wishes to. But would it cause extra
> >> trouble for some other emulated vDPA device or other vendor's vDPA such
> >> as ifcvf to fabricate a fake mapping between the host queue index and
> >> the one guest can see? I would have to send a heads-up ahead that the
> >> current vhost-vdpa mq implementation in upstream QEMU has some issue in
> >> mapping the host qindex to the guest one. This would become a problem
> >> with MQ enabled vdpa device and a non-MQ supporting guest e.g. OVMF, for
> >> which I'm about to share some RFC patches shortly to demonstrate the
> >> issue.
> > Sure.
> Please see the RFC patch just sent with the subject "vhost_net: should
> not use max_queue_pairs for non-mq guest", option #3 is to leverage host
> queue index.

Right. If we take Qemu as a kind of vDPA implementation, it's a good
example that something like "vendor queue index" or "host queue index"
is not even implemented.

>
> >
> >> If exposing the host queue index to the spec turns is essential
> >> to resolving this issue and maybe help with software virtio QEMU
> >> implementation too, I won't hesitate to expose this important
> >> implementation detail to the spec.
> >>
> >> (*) another means that may somehow address my use case is to use some
> >> magic keyword e.g. "ctrlvq" to identify the control vq. Implementation
> >> wise, we can extensively pass -1 to indicate the last guest qindex to
> >> the get_vq_vstat() API given that we know for sure the ctrlvq is the
> >> last queue in the array when the relevant features are present. Since
> >> the negotiated features are piggybacked, it's not hard for the vdpa tool
> >> to tell apart whether the last queue is a control vq or not.
> > For virtqueue index (guest index) defined in the spec, I'd let
> > userspace to deduce it.
> OK, that'll be fine. Although I thought by extending
> get_vendor_vq_vstat() a bit, the virtqueue index is still guest based,
> from which the userspace can deduce control vq for its own.

Yes.

>
> > But for the host or vendor index, we probably can do this.
> Does vendor index means it's optional and vendor specific, host index
> means it is mandated and universal to all vendors?

It's probably too late to mandate this consider we've already have 3
or 4 vDPA vendors.

>I hope we can define
> some generic indexing scheme for virtio stats defined in the spec across
> all vendor's devices, while limiting vendor's flexibility to define its
> own index mapping to only those vendor stats.

Right.

Thanks

>
> > (Btw, I feel like we need to separate the features, if we agree to go
> > with host/vendor index, we can let guest index part in first).
> OK. Sounds like a plan. Thanks Jason.
>
> Thanks,
> -Siwei
>
> >
> > Thanks
> >
> >> I'd also welcome other ideas that can make virtqueue identification
> >> easier and predictable from the CLI.
> >>
> >> Thanks,
> >> -Siwei
> >>
> >>> Thanks
> >>>
> >>>> Another way around, vdpa tool may pass down -1 to get_vq_vstat() to
> >>>> represent the queue index for the control queue - but that's less
> >>>> favorable as the vdpa core needs to maintain device specific knowledge.
> >>>>
> >>>>
> >>>>
> >>>>>>>> data vqs of all 4 pairs, hence got
> >>>>>>>> the 8th index in the rank. Since F_MQ is not negotiated and only 1 data
> >>>>>>>> queue pair enabled, in such event only host qindex 0,1 and 8 have vendor
> >>>>>>>> stats available, and the rest of qindex would get invalid/empty stat.
> >>>>>>>>
> >>>>>>>> Later on say boot continues towards loading the Linux virtio driver,
> >>>>>>>> then guest could successfully negotiate both F_CTRL_VQ and F_MQ
> >>>>>>>> features. In this case, all 8 data virtqueues are fully enabled, the
> >>>>>>>> index for the control vq ends up as 8, following tightly after all the 4
> >>>>>>>> data queue pairs. Only until both features are negotiated, the guest and
> >>>>>>>> host are able to see consistent view in identifying the control vq.
> >>>>>>>> Since F_MQ is negotiated, all host queues, indexed from 0 through 8,
> >>>>>>>> should have vendor stats available.
> >>>>>>>>
> >>>>>>>> That's why I said the guest qindex is ephemeral and hard to predict
> >>>>>>>> subjected to negotiated features, but host qindex is reliable and more
> >>>>>>>> eligible for command line identification purpose.
> >>>>>>>>
> >>>>>> <...snip...>
> >>>>>>>>> So what are you actually proposing? Display received and completed descriptors
> >>>>>>>>> per queue index without further interpretation?
> >>>>>>>> I'd suggest using a more stable queue id i.e. the host queue index to
> >>>>>>>> represent the qidx (which seems to be what you're doing now?), and
> >>>>>>>> displaying both the host qindex (queue_index_device in the example
> >>>>>>>> below), as well as the guest's (queue_index_driver as below) in the output:
> >>>>>>>>
> >>>>>>> Given that per vdpa device you can display statistics only after features have
> >>>>>>> been negotiated, you can always know the correct queue index for the control
> >>>>>>> VQ.
> >>>>>> The stats can be displayed only after features are negotiated, and only
> >>>>>> when the corresponding queue is enabled. If you know it from "vdpa dev
> >>>>>> show" on day 1 that the control vq and mq features are negotiated, but
> >>>>>> then on day2 you got nothing for the predicted control vq index, what
> >>>>>> would you recommend the host admin to do to get the right qindex again?
> >>>>>> Re-run the stat query on the same queue index, or check the "vdpa dev
> >>>>>> show" output again on day 3? This CLI design makes cloud administrator
> >>>>>> really challenging to follow the dynamics of guest activities were to
> >>>>>> manage hundreds or thousands of virtual machines...
> >>>>>>
> >>>>>> It would be easier, in my opinion, to grasp some well-defined handle
> >>>>>> that is easily predictable or fixed across the board, for looking up the
> >>>>>> control virtqueue. This could be a constant host queue index, or a
> >>>>>> special magic keyword like "qidx ctrlvq". If cloud admin runs vstat
> >>>>>> query on the control vq using a determined handle but get nothing back,
> >>>>>> then s/he knows *for sure* the control vq was not available for some
> >>>>>> reason at the point when the stat was being collected. S/he doesn't even
> >>>>>> need to care negotiated status via "vdpa dev show" at all. Why bother?
> >>>>> So, per my suggestion above, passing the negotiated attribute in the netlink
> >>>>> message would satisfy the requirements for atomicity, right?
> >>>> Yes, it satisfied the atomicity requirement, though not sure how you
> >>>> want to represent the queue index for control vq? Basically if cloud
> >>>> admin wants to dump control queue stats explicitly with a fixed handle
> >>>> or identifier, how that can be done with the negotiated attribute?
> >>>>
> >>>> Thanks,
> >>>> -Siwei
> >>>>>>> Do you still hold see your proposal required?
> >>>>>> Yes, this is essential to any cloud admin that runs stat query on all of
> >>>>>> the queues on periodic basis. You'd get some deterministic without
> >>>>>> blindly guessing or bothering other irrelevant command.
> >>>>>>
> >>>>>>
> >>>>>> Thanks,
> >>>>>> -Siwei
> >>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 8
> >>>>>>>> {
> >>>>>>>>          "vstats": {
> >>>>>>>>              "vdpa-a": {
> >>>>>>>>                  "queue_stats": [{
> >>>>>>>>                      "queue_index_device": 8,
> >>>>>>>>                      "queue_index_driver": 2,
> >>>>>>>>                      "queue_type": "control_vq",
> >>>>>>>>                      "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>>>                      "stat_value": [ 417776,417775 ],
> >>>>>>>>                  }]
> >>>>>>>>              }
> >>>>>>>>          }
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> Optionally, user may use guest queue index gqidx, which is kind of an
> >>>>>>>> ephemeral ID and F_MQ negotiation depended, to query the stat on a
> >>>>>>>> specific guest queue:
> >>>>>>>>
> >>>>>>>> $ vdpa -jp dev vstats show vdpa-a gqidx 2
> >>>>>>>> {
> >>>>>>>>          "vstats": {
> >>>>>>>>              "vdpa-a": {
> >>>>>>>>                  "queue_stats": [{
> >>>>>>>>                      "queue_index_device": 8,
> >>>>>>>>                      "queue_index_driver": 2,
> >>>>>>>>                      "queue_type": "control_vq",
> >>>>>>>>                      "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>>>                      "stat_value": [ 417776,417775 ],
> >>>>>>>>                  }]
> >>>>>>>>              }
> >>>>>>>>          }
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> Thanks,
> >>>>>>>> -Siwei
> >>>>>>>>
> >>>>>>>>>> Thanks,
> >>>>>>>>>> -Siwei
> >>>>>>>>>>
> >>>>>>>>>>>> Regards,
> >>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>
> >>>>>>>>>>>>>>>> Looks to me there are still some loose end I don't quite yet
> >>>>>>>>>>>>>>>> understand.
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>                       "queue_index": 0,
> >>>>>>>>>>>>>>> I think this can be removed since the command is for a specific index.
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>                       "name": "received_desc",
> >>>>>>>>>>>>>>>>>>>                       "value": 417776,
> >>>>>>>>>>>>>>>>>>>                       "name": "completed_desc",
> >>>>>>>>>>>>>>>>>>>                       "value": 417548
> >>>>>>>>>>>>>>>>>> Not for this kernel patch, but IMHO it's the best to put the name
> >>>>>>>>>>>>>>>>>> & value pairs in an array instead of flat entries in json's
> >>>>>>>>>>>>>>>>>> hash/dictionary. The hash entries can be re-ordered deliberately
> >>>>>>>>>>>>>>>>>> by external json parsing tool, ending up with inconsistent stat values.
> >>>>>>>>>>>>>>>> This comment is missed for some reason. Please change the example
> >>>>>>>>>>>>>>>> in the log if you agree to address it in vdpa tool. Or justify why
> >>>>>>>>>>>>>>>> keeping the order for json hash/dictionary is fine.
> >>>>>>>>>>>>>>> Sorry for skipping this comment.
> >>>>>>>>>>>>>>> Do you mean to present the information like:
> >>>>>>>>>>>>>>> "received_desc": 417776,
> >>>>>>>>>>>>>>> "completed_desc": 417548,
> >>>>>>>>>>>>>> I mean the following presentation:
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> $ vdpa -jp dev vstats show vdpa-a qidx 0 {
> >>>>>>>>>>>>>>             "vstats": {
> >>>>>>>>>>>>>>                 "vdpa-a": {
> >>>>>>>>>>>>>>                     "queue_stats": [{
> >>>>>>>>>>>>>>                         "queue_index": 0,
> >>>>>>>>>>>>>>                         "queue_type": "rx",
> >>>>>>>>>>>>>>                         "stat_name": [ "received_desc","completed_desc" ],
> >>>>>>>>>>>>>>                         "stat_value": [ 417776,417548 ],
> >>>>>>>>>>>>>>                     }]
> >>>>>>>>>>>>>>                 }
> >>>>>>>>>>>>>>             }
> >>>>>>>>>>>>>> }
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> I think Parav had similar suggestion, too.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>>>>>> -Siwei
> >>>>>>>>>>>>>>>>>>>                   }
> >>>>>>>>>>>>>>>>>>>               }
> >>>>>>>>>>>>>>>>>>> }
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>>>>>>>>>>>>>>>>> ---
> >>>>>>>>>>>>>>>>>>>            drivers/vdpa/vdpa.c       | 129
> >>>>>>>>>>>>>>>> ++++++++++++++++++++++++++++++++++++++
> >>>>>>>>>>>>>>>>>>>            include/linux/vdpa.h      |   5 ++
> >>>>>>>>>>>>>>>>>>>            include/uapi/linux/vdpa.h |   7 +++
> >>>>>>>>>>>>>>>>>>>            3 files changed, 141 insertions(+)
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> >>>>>>>>>>>>>>>>>>> 9846c9de4bfa..d0ff671baf88 100644
> >>>>>>>>>>>>>>>>>>> --- a/drivers/vdpa/vdpa.c
> >>>>>>>>>>>>>>>>>>> +++ b/drivers/vdpa/vdpa.c
> >>>>>>>>>>>>>>>>>>> @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device
> >>>>>>>>>>>>>>>>>>> *vdev,
> >>>>>>>>>>>>>>>> struct sk_buff *msg, u32 portid,
> >>>>>>>>>>>>>>>>>>>                   return err;
> >>>>>>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>>>>>> +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct
> >>>>>>>>>>>>>>>>>>> +sk_buff
> >>>>>>>>>>>>>>>> *msg,
> >>>>>>>>>>>>>>>>>>> +                              struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
> >>>>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       err = vdev->config->get_vendor_vq_stats(vdev, index, msg,
> >>>>>>>>>>>>>>>>>>> +info-
> >>>>>>>>>>>>>>>>> extack);
> >>>>>>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>>>>>> +               return err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       return 0;
> >>>>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +static int vendor_stats_fill(struct vdpa_device *vdev, struct
> >>>>>>>>>>>>>>>>>>> +sk_buff
> >>>>>>>>>>>>>> *msg,
> >>>>>>>>>>>>>>>>>>> +                            struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       if (!vdev->config->get_vendor_vq_stats)
> >>>>>>>>>>>>>>>>>>> +               return -EOPNOTSUPP;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       err = vdpa_fill_stats_rec(vdev, msg, info, index);
> >>>>>>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>>>>>> +               return err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       return 0;
> >>>>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
> >>>>>>>>>>>>>>>>>>> +                                     struct sk_buff *msg,
> >>>>>>>>>>>>>>>>>>> +                                     struct genl_info *info, u32 index) {
> >>>>>>>>>>>>>>>>>>> +       u32 device_id;
> >>>>>>>>>>>>>>>>>>> +       void *hdr;
> >>>>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>>>> +       u32 portid = info->snd_portid;
> >>>>>>>>>>>>>>>>>>> +       u32 seq = info->snd_seq;
> >>>>>>>>>>>>>>>>>>> +       u32 flags = 0;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
> >>>>>>>>>>>>>>>>>>> +                         VDPA_CMD_DEV_VSTATS_GET);
> >>>>>>>>>>>>>>>>>>> +       if (!hdr)
> >>>>>>>>>>>>>>>>>>> +               return -EMSGSIZE;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME,
> >>>>>>>>>>>> dev_name(&vdev-
> >>>>>>>>>>>>>>>>> dev))) {
> >>>>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
> >>>>>>>>>>>>>>>>>>> +               goto undo_msg;
> >>>>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       device_id = vdev->config->get_device_id(vdev);
> >>>>>>>>>>>>>>>>>>> +       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
> >>>>>>>>>>>>>>>>>>> +               err = -EMSGSIZE;
> >>>>>>>>>>>>>>>>>>> +               goto undo_msg;
> >>>>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       err = vendor_stats_fill(vdev, msg, info, index);
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       genlmsg_end(msg, hdr);
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +undo_msg:
> >>>>>>>>>>>>>>>>>>> +       genlmsg_cancel(msg, hdr);
> >>>>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>>            static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff
> >>>>>>>>>>>>>>>>>>> *skb, struct
> >>>>>>>>>>>>>>>> genl_info *info)
> >>>>>>>>>>>>>>>>>>>            {
> >>>>>>>>>>>>>>>>>>>                   struct vdpa_device *vdev;
> >>>>>>>>>>>>>>>>>>> @@ -990,6 +1058,60 @@
> >>>>>>>>>>>> vdpa_nl_cmd_dev_config_get_dumpit(struct
> >>>>>>>>>>>>>>>> sk_buff *msg, struct netlink_callback *
> >>>>>>>>>>>>>>>>>>>                   return msg->len;
> >>>>>>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>>>>>> +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
> >>>>>>>>>>>>>>>>>>> +                                         struct genl_info *info)
> >>>>>>>>>>>>>>>>>>> +{
> >>>>>>>>>>>>>>>>>>> +       struct vdpa_device *vdev;
> >>>>>>>>>>>>>>>>>>> +       struct sk_buff *msg;
> >>>>>>>>>>>>>>>>>>> +       const char *devname;
> >>>>>>>>>>>>>>>>>>> +       struct device *dev;
> >>>>>>>>>>>>>>>>>>> +       u32 index;
> >>>>>>>>>>>>>>>>>>> +       int err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_NAME])
> >>>>>>>>>>>>>>>>>>> +               return -EINVAL;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
> >>>>>>>>>>>>>>>>>>> +               return -EINVAL;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
> >>>>>>>>>>>>>>>>>>> +       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> >>>>>>>>>>>>>>>>>>> +       if (!msg)
> >>>>>>>>>>>>>>>>>>> +               return -ENOMEM;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       index = nla_get_u32(info-
> >>>>>>>>>>>>> attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
> >>>>>>>>>>>>>>>>>>> +       mutex_lock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>>>>>> +       dev = bus_find_device(&vdpa_bus, NULL, devname,
> >>>>>>>>>>>>>>>> vdpa_name_match);
> >>>>>>>>>>>>>>>>>>> +       if (!dev) {
> >>>>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "device not
> >>>>>>>>>>>> found");
> >>>>>>>>>>>>>>>>>>> +               err = -ENODEV;
> >>>>>>>>>>>>>>>>>>> +               goto dev_err;
> >>>>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>>>> +       vdev = container_of(dev, struct vdpa_device, dev);
> >>>>>>>>>>>>>>>>>>> +       if (!vdev->mdev) {
> >>>>>>>>>>>>>>>>>>> +               NL_SET_ERR_MSG_MOD(info->extack, "unmanaged
> >>>>>>>>>>>> vdpa
> >>>>>>>>>>>>>>>> device");
> >>>>>>>>>>>>>>>>>>> +               err = -EINVAL;
> >>>>>>>>>>>>>>>>>>> +               goto mdev_err;
> >>>>>>>>>>>>>>>>>>> +       }
> >>>>>>>>>>>>>>>>>>> +       err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
> >>>>>>>>>>>>>>>>>>> +       if (!err)
> >>>>>>>>>>>>>>>>>>> +               err = genlmsg_reply(msg, info);
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       put_device(dev);
> >>>>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       if (err)
> >>>>>>>>>>>>>>>>>>> +               nlmsg_free(msg);
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +mdev_err:
> >>>>>>>>>>>>>>>>>>> +       put_device(dev);
> >>>>>>>>>>>>>>>>>>> +dev_err:
> >>>>>>>>>>>>>>>>>>> +       mutex_unlock(&vdpa_dev_mutex);
> >>>>>>>>>>>>>>>>>>> +       return err;
> >>>>>>>>>>>>>>>>>>> +}
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>>            static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1]
> >>>>>>>>>>>> = {
> >>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type =
> >>>>>>>>>>>>>> NLA_NUL_STRING },
> >>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING
> >>>>>>>>>>>>>> }, @@ -
> >>>>>>>>>>>>>>>> 997,6
> >>>>>>>>>>>>>>>>>>> +1119,7 @@ static const struct nla_policy
> >>>>>>>>>>>>>>>> vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
> >>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_DEV_NET_CFG_MACADDR] =
> >>>>>>>>>>>>>> NLA_POLICY_ETH_ADDR,
> >>>>>>>>>>>>>>>>>>>                   /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
> >>>>>>>>>>>>>>>>>>>                   [VDPA_ATTR_DEV_NET_CFG_MTU] =
> >>>>>>>>>>>>>> NLA_POLICY_MIN(NLA_U16, 68),
> >>>>>>>>>>>>>>>>>>> +       [VDPA_ATTR_DEV_QUEUE_INDEX] =
> >>>>>>>>>>>> NLA_POLICY_RANGE(NLA_U32, 0,
> >>>>>>>>>>>>>>>> 65535),
> >>>>>>>>>>>>>>>>>>>            };
> >>>>>>>>>>>>>>>>>>>            static const struct genl_ops vdpa_nl_ops[] = { @@ -1030,6
> >>>>>>>>>>>>>>>>>>> +1153,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
> >>>>>>>>>>>>>>>>>>>                           .doit = vdpa_nl_cmd_dev_config_get_doit,
> >>>>>>>>>>>>>>>>>>>                           .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
> >>>>>>>>>>>>>>>>>>>                   },
> >>>>>>>>>>>>>>>>>>> +       {
> >>>>>>>>>>>>>>>>>>> +               .cmd = VDPA_CMD_DEV_VSTATS_GET,
> >>>>>>>>>>>>>>>>>>> +               .validate = GENL_DONT_VALIDATE_STRICT |
> >>>>>>>>>>>>>>>> GENL_DONT_VALIDATE_DUMP,
> >>>>>>>>>>>>>>>>>>> +               .doit = vdpa_nl_cmd_dev_stats_get_doit,
> >>>>>>>>>>>>>>>>>>> +               .flags = GENL_ADMIN_PERM,
> >>>>>>>>>>>>>>>>>>> +       },
> >>>>>>>>>>>>>>>>>>>            };
> >>>>>>>>>>>>>>>>>>>            static struct genl_family vdpa_nl_family __ro_after_init =
> >>>>>>>>>>>>>>>>>>> { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index
> >>>>>>>>>>>>>>>>>>> 2de442ececae..274203845cfc 100644
> >>>>>>>>>>>>>>>>>>> --- a/include/linux/vdpa.h
> >>>>>>>>>>>>>>>>>>> +++ b/include/linux/vdpa.h
> >>>>>>>>>>>>>>>>>>> @@ -275,6 +275,9 @@ struct vdpa_config_ops {
> >>>>>>>>>>>>>>>>>>>                                       const struct vdpa_vq_state *state);
> >>>>>>>>>>>>>>>>>>>                   int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
> >>>>>>>>>>>>>>>>>>>                                       struct vdpa_vq_state *state);
> >>>>>>>>>>>>>>>>>>> +       int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx,
> >>>>>>>>>>>>>>>>>>> +                                  struct sk_buff *msg,
> >>>>>>>>>>>>>>>>>>> +                                  struct netlink_ext_ack *extack);
> >>>>>>>>>>>>>>>>>>>                   struct vdpa_notification_area
> >>>>>>>>>>>>>>>>>>>                   (*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
> >>>>>>>>>>>>>>>>>>>                   /* vq irq is not expected to be changed once DRIVER_OK is
> >>>>>>>>>>>>>>>>>>> set */ @@ -466,4 +469,6 @@ struct vdpa_mgmt_dev {
> >>>>>>>>>>>>>>>>>>>            int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
> >>>>>>>>>>>>>>>>>>>            void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
> >>>>>>>>>>>>>>>>>>> +#define VDPA_INVAL_QUEUE_INDEX 0xffff
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>>            #endif /* _LINUX_VDPA_H */
> >>>>>>>>>>>>>>>>>>> diff --git a/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>>>>>> b/include/uapi/linux/vdpa.h index 1061d8d2d09d..c5f229a41dc2
> >>>>>>>>>>>>>>>>>>> 100644
> >>>>>>>>>>>>>>>>>>> --- a/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>>>>>> +++ b/include/uapi/linux/vdpa.h
> >>>>>>>>>>>>>>>>>>> @@ -18,6 +18,7 @@ enum vdpa_command {
> >>>>>>>>>>>>>>>>>>>                   VDPA_CMD_DEV_DEL,
> >>>>>>>>>>>>>>>>>>>                   VDPA_CMD_DEV_GET,               /* can dump */
> >>>>>>>>>>>>>>>>>>>                   VDPA_CMD_DEV_CONFIG_GET,        /* can dump */
> >>>>>>>>>>>>>>>>>>> +       VDPA_CMD_DEV_VSTATS_GET,
> >>>>>>>>>>>>>>>>>>>            };
> >>>>>>>>>>>>>>>>>>>            enum vdpa_attr {
> >>>>>>>>>>>>>>>>>>> @@ -46,6 +47,12 @@ enum vdpa_attr {
> >>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_DEV_NEGOTIATED_FEATURES,      /* u64 */
> >>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,          /*
> >>>>>>>>>>>>>> u32 */
> >>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_DEV_SUPPORTED_FEATURES,       /* u64 */
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_INDEX,              /* u16 */
> >>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_QUEUE_TYPE,               /* string */
> >>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_NAME,         /*
> >>>>>>>>>>>> string */
> >>>>>>>>>>>>>>>>>>> +       VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
> >>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>>                   /* new attributes must be added above here */
> >>>>>>>>>>>>>>>>>>>                   VDPA_ATTR_MAX,
> >>>>>>>>>>>>>>>>>>>            };
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-03-22  3:51 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220216080022.56707-1-elic@nvidia.com>
     [not found] ` <20220216080022.56707-3-elic@nvidia.com>
2022-03-03  7:55   ` [PATCH v1 2/2] vdpa/mlx5: Add support for reading descriptor statistics Jason Wang
2022-03-04 16:06 ` [PATCH v1 0/2] Show statistics for a vdpa device Michael S. Tsirkin
     [not found] ` <20220216080022.56707-2-elic@nvidia.com>
2022-02-16 18:49   ` [PATCH v1 1/2] vdpa: Add support for querying vendor statistics Si-Wei Liu
     [not found]     ` <20220217064619.GB86497@mtl-vdi-166.wap.labs.mlnx>
2022-03-04 22:34       ` Si-Wei Liu
     [not found]         ` <DM8PR12MB5400E80073521E898056578BAB089@DM8PR12MB5400.namprd12.prod.outlook.com>
2022-03-08  6:15           ` Si-Wei Liu
     [not found]             ` <DM8PR12MB5400E03D7AD7833CEBF8DF9DAB099@DM8PR12MB5400.namprd12.prod.outlook.com>
2022-03-09  2:39               ` Jason Wang
2022-03-09  3:32               ` Si-Wei Liu
     [not found]                 ` <DM8PR12MB540086CCD1F535668D05E546AB0A9@DM8PR12MB5400.namprd12.prod.outlook.com>
2022-03-10  1:45                   ` Si-Wei Liu
     [not found]                     ` <DM8PR12MB54000042A48FDFA446EFE792AB0E9@DM8PR12MB5400.namprd12.prod.outlook.com>
2022-03-14  6:25                       ` Jason Wang
2022-03-15  8:11                         ` Si-Wei Liu
2022-03-15  7:53                       ` Si-Wei Liu
     [not found]                         ` <DM8PR12MB540054565515158F9209723EAB109@DM8PR12MB5400.namprd12.prod.outlook.com>
2022-03-16  6:52                           ` Si-Wei Liu
     [not found]                             ` <DM8PR12MB5400E7B2359FE4797F190AC5AB119@DM8PR12MB5400.namprd12.prod.outlook.com>
2022-03-16 22:00                               ` Si-Wei Liu
2022-03-17  2:32                                 ` Jason Wang
2022-03-18  0:58                                   ` Si-Wei Liu
2022-03-18  2:27                                     ` Jason Wang
2022-03-19  5:18                                       ` Si-Wei Liu
2022-03-22  3:51                                         ` Jason Wang
2022-03-03  7:53   ` Jason Wang
2022-03-07 11:03   ` Parav Pandit via Virtualization

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.