All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] virtio-net: don't busy poll for cvq command
@ 2022-12-26  7:49 ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

Hi all:

The code used to busy poll for cvq command which turns out to have
several side effects:

1) infinite poll for buggy devices
2) bad interaction with scheduler

So this series tries to use sleep + timeout instead of busy polling.

Please review.

Thanks

Changes since RFC:

- switch to use BAD_RING in virtio_break_device()
- check virtqueue_is_broken() after being woken up
- use more_used() instead of virtqueue_get_buf() to allow caller to
  get buffers afterwards
- break the virtio-net device when timeout
- get buffer manually since the virtio core check more_used() instead

Jason Wang (4):
  virtio-net: convert rx mode setting to use workqueue
  virtio_ring: switch to use BAD_RING()
  virtio_ring: introduce a per virtqueue waitqueue
  virtio-net: sleep instead of busy waiting for cvq command

 drivers/net/virtio_net.c     | 90 +++++++++++++++++++++++++++++++-----
 drivers/virtio/virtio_ring.c | 37 +++++++++++++--
 include/linux/virtio.h       |  3 ++
 3 files changed, 115 insertions(+), 15 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH 0/4] virtio-net: don't busy poll for cvq command
@ 2022-12-26  7:49 ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

Hi all:

The code used to busy poll for cvq command which turns out to have
several side effects:

1) infinite poll for buggy devices
2) bad interaction with scheduler

So this series tries to use sleep + timeout instead of busy polling.

Please review.

Thanks

Changes since RFC:

- switch to use BAD_RING in virtio_break_device()
- check virtqueue_is_broken() after being woken up
- use more_used() instead of virtqueue_get_buf() to allow caller to
  get buffers afterwards
- break the virtio-net device when timeout
- get buffer manually since the virtio core check more_used() instead

Jason Wang (4):
  virtio-net: convert rx mode setting to use workqueue
  virtio_ring: switch to use BAD_RING()
  virtio_ring: introduce a per virtqueue waitqueue
  virtio-net: sleep instead of busy waiting for cvq command

 drivers/net/virtio_net.c     | 90 +++++++++++++++++++++++++++++++-----
 drivers/virtio/virtio_ring.c | 37 +++++++++++++--
 include/linux/virtio.h       |  3 ++
 3 files changed, 115 insertions(+), 15 deletions(-)

-- 
2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
  2022-12-26  7:49 ` Jason Wang
@ 2022-12-26  7:49   ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

This patch convert rx mode setting to be done in a workqueue, this is
a must for allow to sleep when waiting for the cvq command to
response since current code is executed under addr spin lock.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 66 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 86e52454b5b5..efd9dd55828b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -260,6 +260,15 @@ struct virtnet_info {
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
+	/* Work struct for config rx mode */
+	struct work_struct rx_mode_work;
+
+	/* Is rx_mode_work enabled? */
+	bool rx_mode_work_enabled;
+
+	/* The lock to synchronize the access to refill_enabled */
+	spinlock_t rx_mode_lock;
+
 	/* Does the affinity hint is set for virtqueues? */
 	bool affinity_hint_set;
 
@@ -383,6 +392,22 @@ static void disable_delayed_refill(struct virtnet_info *vi)
 	spin_unlock_bh(&vi->refill_lock);
 }
 
+static void enable_rx_mode_work(struct virtnet_info *vi)
+{
+	spin_lock_bh(&vi->rx_mode_lock);
+	vi->rx_mode_work_enabled = true;
+	spin_unlock_bh(&vi->rx_mode_lock);
+}
+
+static void disable_rx_mode_work(struct virtnet_info *vi)
+{
+	spin_lock_bh(&vi->rx_mode_lock);
+	vi->rx_mode_work_enabled = false;
+	spin_unlock_bh(&vi->rx_mode_lock);
+
+	flush_work(&vi->rx_mode_work);
+}
+
 static void virtqueue_napi_schedule(struct napi_struct *napi,
 				    struct virtqueue *vq)
 {
@@ -1974,6 +1999,8 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	/* Caller should know better */
 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 
+	ASSERT_RTNL();
+
 	vi->ctrl->status = ~0;
 	vi->ctrl->hdr.class = class;
 	vi->ctrl->hdr.cmd = cmd;
@@ -2160,9 +2187,11 @@ static int virtnet_close(struct net_device *dev)
 	return 0;
 }
 
-static void virtnet_set_rx_mode(struct net_device *dev)
+static void virtnet_rx_mode_work(struct work_struct *work)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_info *vi =
+		container_of(work, struct virtnet_info, rx_mode_work);
+	struct net_device *dev = vi->dev;
 	struct scatterlist sg[2];
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
@@ -2175,8 +2204,12 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
+	rtnl_lock();
+
+	netif_addr_lock_bh(dev);
 	vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
 	vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+	netif_addr_unlock_bh(dev);
 
 	sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
 
@@ -2192,14 +2225,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 vi->ctrl->allmulti ? "en" : "dis");
 
+	netif_addr_lock_bh(dev);
+
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
 	/* MAC filter - use one buffer for both lists */
 	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
 		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	mac_data = buf;
-	if (!buf)
+	if (!buf) {
+		netif_addr_unlock_bh(dev);
+		rtnl_unlock();
 		return;
+	}
 
 	sg_init_table(sg, 2);
 
@@ -2220,6 +2258,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	netdev_for_each_mc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
+	netif_addr_unlock_bh(dev);
+
 	sg_set_buf(&sg[1], mac_data,
 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
 
@@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
 		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
 
+	rtnl_unlock();
+
 	kfree(buf);
 }
 
+static void virtnet_set_rx_mode(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	spin_lock(&vi->rx_mode_lock);
+	if (vi->rx_mode_work_enabled)
+		schedule_work(&vi->rx_mode_work);
+	spin_unlock(&vi->rx_mode_lock);
+}
+
 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
 				   __be16 proto, u16 vid)
 {
@@ -3000,6 +3052,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
 
 	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
@@ -3022,6 +3075,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	virtio_device_ready(vdev);
 
 	enable_delayed_refill(vi);
+	enable_rx_mode_work(vi);
+	virtnet_set_rx_mode(vi->dev);
 
 	if (netif_running(vi->dev)) {
 		err = virtnet_open(vi->dev);
@@ -3799,7 +3854,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	vdev->priv = vi;
 
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+	INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
 	spin_lock_init(&vi->refill_lock);
+	spin_lock_init(&vi->rx_mode_lock);
 
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
@@ -3905,6 +3962,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	enable_rx_mode_work(vi);
+
 	/* serialize netdev register + virtio_device_ready() with ndo_open() */
 	rtnl_lock();
 
@@ -3984,6 +4043,7 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
 
 	unregister_netdev(vi->dev);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
@ 2022-12-26  7:49   ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

This patch convert rx mode setting to be done in a workqueue, this is
a must for allow to sleep when waiting for the cvq command to
response since current code is executed under addr spin lock.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 66 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 86e52454b5b5..efd9dd55828b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -260,6 +260,15 @@ struct virtnet_info {
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
+	/* Work struct for config rx mode */
+	struct work_struct rx_mode_work;
+
+	/* Is rx_mode_work enabled? */
+	bool rx_mode_work_enabled;
+
+	/* The lock to synchronize the access to refill_enabled */
+	spinlock_t rx_mode_lock;
+
 	/* Does the affinity hint is set for virtqueues? */
 	bool affinity_hint_set;
 
@@ -383,6 +392,22 @@ static void disable_delayed_refill(struct virtnet_info *vi)
 	spin_unlock_bh(&vi->refill_lock);
 }
 
+static void enable_rx_mode_work(struct virtnet_info *vi)
+{
+	spin_lock_bh(&vi->rx_mode_lock);
+	vi->rx_mode_work_enabled = true;
+	spin_unlock_bh(&vi->rx_mode_lock);
+}
+
+static void disable_rx_mode_work(struct virtnet_info *vi)
+{
+	spin_lock_bh(&vi->rx_mode_lock);
+	vi->rx_mode_work_enabled = false;
+	spin_unlock_bh(&vi->rx_mode_lock);
+
+	flush_work(&vi->rx_mode_work);
+}
+
 static void virtqueue_napi_schedule(struct napi_struct *napi,
 				    struct virtqueue *vq)
 {
@@ -1974,6 +1999,8 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	/* Caller should know better */
 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 
+	ASSERT_RTNL();
+
 	vi->ctrl->status = ~0;
 	vi->ctrl->hdr.class = class;
 	vi->ctrl->hdr.cmd = cmd;
@@ -2160,9 +2187,11 @@ static int virtnet_close(struct net_device *dev)
 	return 0;
 }
 
-static void virtnet_set_rx_mode(struct net_device *dev)
+static void virtnet_rx_mode_work(struct work_struct *work)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_info *vi =
+		container_of(work, struct virtnet_info, rx_mode_work);
+	struct net_device *dev = vi->dev;
 	struct scatterlist sg[2];
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
@@ -2175,8 +2204,12 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
+	rtnl_lock();
+
+	netif_addr_lock_bh(dev);
 	vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
 	vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+	netif_addr_unlock_bh(dev);
 
 	sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
 
@@ -2192,14 +2225,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 vi->ctrl->allmulti ? "en" : "dis");
 
+	netif_addr_lock_bh(dev);
+
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
 	/* MAC filter - use one buffer for both lists */
 	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
 		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	mac_data = buf;
-	if (!buf)
+	if (!buf) {
+		netif_addr_unlock_bh(dev);
+		rtnl_unlock();
 		return;
+	}
 
 	sg_init_table(sg, 2);
 
@@ -2220,6 +2258,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	netdev_for_each_mc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
+	netif_addr_unlock_bh(dev);
+
 	sg_set_buf(&sg[1], mac_data,
 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
 
@@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
 		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
 
+	rtnl_unlock();
+
 	kfree(buf);
 }
 
+static void virtnet_set_rx_mode(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	spin_lock(&vi->rx_mode_lock);
+	if (vi->rx_mode_work_enabled)
+		schedule_work(&vi->rx_mode_work);
+	spin_unlock(&vi->rx_mode_lock);
+}
+
 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
 				   __be16 proto, u16 vid)
 {
@@ -3000,6 +3052,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
 
 	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
@@ -3022,6 +3075,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	virtio_device_ready(vdev);
 
 	enable_delayed_refill(vi);
+	enable_rx_mode_work(vi);
+	virtnet_set_rx_mode(vi->dev);
 
 	if (netif_running(vi->dev)) {
 		err = virtnet_open(vi->dev);
@@ -3799,7 +3854,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	vdev->priv = vi;
 
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+	INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
 	spin_lock_init(&vi->refill_lock);
+	spin_lock_init(&vi->rx_mode_lock);
 
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
@@ -3905,6 +3962,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	enable_rx_mode_work(vi);
+
 	/* serialize netdev register + virtio_device_ready() with ndo_open() */
 	rtnl_lock();
 
@@ -3984,6 +4043,7 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
 
 	unregister_netdev(vi->dev);
 
-- 
2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 2/4] virtio_ring: switch to use BAD_RING()
  2022-12-26  7:49 ` Jason Wang
@ 2022-12-26  7:49   ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

Switch to reuse BAD_RING() to allow common logic to be implemented in
BAD_RING().

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- switch to use BAD_RING in virtio_break_device()
---
 drivers/virtio/virtio_ring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2e7689bb933b..5cfb2fa8abee 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,7 +58,8 @@
 	do {							\
 		dev_err(&_vq->vq.vdev->dev,			\
 			"%s:"fmt, (_vq)->vq.name, ##args);	\
-		(_vq)->broken = true;				\
+		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
+		WRITE_ONCE((_vq)->broken, true);		       \
 	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
@@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
 
 	/* Prod other side to tell it about changes. */
 	if (!vq->notify(_vq)) {
-		vq->broken = true;
+		BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
 		return false;
 	}
 	return true;
@@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
 	list_for_each_entry(_vq, &dev->vqs, list) {
 		struct vring_virtqueue *vq = to_vvq(_vq);
 
-		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
-		WRITE_ONCE(vq->broken, true);
+		BAD_RING(vq, "Device break vq %d", _vq->index);
 	}
 	spin_unlock(&dev->vqs_list_lock);
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 2/4] virtio_ring: switch to use BAD_RING()
@ 2022-12-26  7:49   ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

Switch to reuse BAD_RING() to allow common logic to be implemented in
BAD_RING().

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- switch to use BAD_RING in virtio_break_device()
---
 drivers/virtio/virtio_ring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2e7689bb933b..5cfb2fa8abee 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,7 +58,8 @@
 	do {							\
 		dev_err(&_vq->vq.vdev->dev,			\
 			"%s:"fmt, (_vq)->vq.name, ##args);	\
-		(_vq)->broken = true;				\
+		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
+		WRITE_ONCE((_vq)->broken, true);		       \
 	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
@@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
 
 	/* Prod other side to tell it about changes. */
 	if (!vq->notify(_vq)) {
-		vq->broken = true;
+		BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
 		return false;
 	}
 	return true;
@@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
 	list_for_each_entry(_vq, &dev->vqs, list) {
 		struct vring_virtqueue *vq = to_vvq(_vq);
 
-		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
-		WRITE_ONCE(vq->broken, true);
+		BAD_RING(vq, "Device break vq %d", _vq->index);
 	}
 	spin_unlock(&dev->vqs_list_lock);
 }
-- 
2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-26  7:49 ` Jason Wang
@ 2022-12-26  7:49   ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

This patch introduces a per virtqueue waitqueue to allow driver to
sleep and wait for more used. Two new helpers are introduced to allow
driver to sleep and wake up.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- check virtqueue_is_broken() as well
- use more_used() instead of virtqueue_get_buf() to allow caller to
  get buffers afterwards
---
 drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
 include/linux/virtio.h       |  3 +++
 2 files changed, 32 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5cfb2fa8abee..9c83eb945493 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -13,6 +13,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/kmsan.h>
 #include <linux/spinlock.h>
+#include <linux/wait.h>
 #include <xen/xen.h>
 
 #ifdef DEBUG
@@ -60,6 +61,7 @@
 			"%s:"fmt, (_vq)->vq.name, ##args);	\
 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
 		WRITE_ONCE((_vq)->broken, true);		       \
+		wake_up_interruptible(&(_vq)->wq);		       \
 	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
@@ -203,6 +205,9 @@ struct vring_virtqueue {
 	/* DMA, allocation, and size information */
 	bool we_own_ring;
 
+	/* Wait for buffer to be used */
+	wait_queue_head_t wq;
+
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
 		vq->weak_barriers = false;
 
+	init_waitqueue_head(&vq->wq);
+
 	err = vring_alloc_state_extra_packed(&vring_packed);
 	if (err)
 		goto err_state_extra;
@@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
 		vq->weak_barriers = false;
 
+	init_waitqueue_head(&vq->wq);
+
 	err = vring_alloc_state_extra_split(vring_split);
 	if (err) {
 		kfree(vq);
@@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
+	wake_up_interruptible(&vq->wq);
+
 	if (vq->we_own_ring) {
 		if (vq->packed_ring) {
 			vring_free_queue(vq->vq.vdev,
@@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
+int virtqueue_wait_for_used(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* TODO: Tweak the timeout. */
+	return wait_event_interruptible_timeout(vq->wq,
+	       virtqueue_is_broken(_vq) || more_used(vq), HZ);
+}
+EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
+
+void virtqueue_wake_up(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	wake_up_interruptible(&vq->wq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_wake_up);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dcab9c7e8784..2eb62c774895 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
 			    void **ctx);
 
+int virtqueue_wait_for_used(struct virtqueue *vq);
+void virtqueue_wake_up(struct virtqueue *vq);
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
-- 
2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-26  7:49   ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

This patch introduces a per virtqueue waitqueue to allow driver to
sleep and wait for more used. Two new helpers are introduced to allow
driver to sleep and wake up.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- check virtqueue_is_broken() as well
- use more_used() instead of virtqueue_get_buf() to allow caller to
  get buffers afterwards
---
 drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
 include/linux/virtio.h       |  3 +++
 2 files changed, 32 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5cfb2fa8abee..9c83eb945493 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -13,6 +13,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/kmsan.h>
 #include <linux/spinlock.h>
+#include <linux/wait.h>
 #include <xen/xen.h>
 
 #ifdef DEBUG
@@ -60,6 +61,7 @@
 			"%s:"fmt, (_vq)->vq.name, ##args);	\
 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
 		WRITE_ONCE((_vq)->broken, true);		       \
+		wake_up_interruptible(&(_vq)->wq);		       \
 	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
@@ -203,6 +205,9 @@ struct vring_virtqueue {
 	/* DMA, allocation, and size information */
 	bool we_own_ring;
 
+	/* Wait for buffer to be used */
+	wait_queue_head_t wq;
+
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
 		vq->weak_barriers = false;
 
+	init_waitqueue_head(&vq->wq);
+
 	err = vring_alloc_state_extra_packed(&vring_packed);
 	if (err)
 		goto err_state_extra;
@@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
 		vq->weak_barriers = false;
 
+	init_waitqueue_head(&vq->wq);
+
 	err = vring_alloc_state_extra_split(vring_split);
 	if (err) {
 		kfree(vq);
@@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
+	wake_up_interruptible(&vq->wq);
+
 	if (vq->we_own_ring) {
 		if (vq->packed_ring) {
 			vring_free_queue(vq->vq.vdev,
@@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
+int virtqueue_wait_for_used(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* TODO: Tweak the timeout. */
+	return wait_event_interruptible_timeout(vq->wq,
+	       virtqueue_is_broken(_vq) || more_used(vq), HZ);
+}
+EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
+
+void virtqueue_wake_up(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	wake_up_interruptible(&vq->wq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_wake_up);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dcab9c7e8784..2eb62c774895 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
 			    void **ctx);
 
+int virtqueue_wait_for_used(struct virtqueue *vq);
+void virtqueue_wake_up(struct virtqueue *vq);
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-26  7:49 ` Jason Wang
@ 2022-12-26  7:49   ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

We used to busy waiting on the cvq command this tends to be
problematic since:

1) CPU could wait for ever on a buggy/malicous device
2) There's no wait to terminate the process that triggers the cvq
   command

So this patch switch to use virtqueue_wait_for_used() to sleep with a
timeout (1s) instead of busy polling for the cvq command forever. This
gives the scheduler a breath and can let the process can respond to
asignal. If the device doesn't respond in the timeout, break the
device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- break the device when timeout
- get buffer manually since the virtio core check more_used() instead
---
 drivers/net/virtio_net.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index efd9dd55828b..6a2ea64cfcb5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
 	vi->rx_mode_work_enabled = false;
 	spin_unlock_bh(&vi->rx_mode_lock);
 
+	virtqueue_wake_up(vi->cvq);
 	flush_work(&vi->rx_mode_work);
 }
 
@@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
 	return !oom;
 }
 
+static void virtnet_cvq_done(struct virtqueue *cvq)
+{
+	virtqueue_wake_up(cvq);
+}
+
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
@@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
 	return err;
 }
 
+static int virtnet_close(struct net_device *dev);
+
 /*
  * Send command via the control virtqueue and check status.  Commands
  * supported by the hypervisor, as indicated by feature bits, should
@@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	if (unlikely(!virtqueue_kick(vi->cvq)))
 		return vi->ctrl->status == VIRTIO_NET_OK;
 
-	/* Spin for a response, the kick causes an ioport write, trapping
-	 * into the hypervisor, so the request should be handled immediately.
-	 */
-	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
-	       !virtqueue_is_broken(vi->cvq))
-		cpu_relax();
+	if (virtqueue_wait_for_used(vi->cvq)) {
+		virtqueue_get_buf(vi->cvq, &tmp);
+		return vi->ctrl->status == VIRTIO_NET_OK;
+	}
 
-	return vi->ctrl->status == VIRTIO_NET_OK;
+	netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
+	virtio_break_device(vi->vdev);
+	return VIRTIO_NET_ERR;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
-		callbacks[total_vqs - 1] = NULL;
+		callbacks[total_vqs - 1] = virtnet_cvq_done;
 		names[total_vqs - 1] = "control";
 	}
 
-- 
2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-26  7:49   ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-26  7:49 UTC (permalink / raw)
  To: mst, jasowang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

We used to busy waiting on the cvq command this tends to be
problematic since:

1) CPU could wait for ever on a buggy/malicous device
2) There's no wait to terminate the process that triggers the cvq
   command

So this patch switch to use virtqueue_wait_for_used() to sleep with a
timeout (1s) instead of busy polling for the cvq command forever. This
gives the scheduler a breath and can let the process can respond to
asignal. If the device doesn't respond in the timeout, break the
device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- break the device when timeout
- get buffer manually since the virtio core check more_used() instead
---
 drivers/net/virtio_net.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index efd9dd55828b..6a2ea64cfcb5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
 	vi->rx_mode_work_enabled = false;
 	spin_unlock_bh(&vi->rx_mode_lock);
 
+	virtqueue_wake_up(vi->cvq);
 	flush_work(&vi->rx_mode_work);
 }
 
@@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
 	return !oom;
 }
 
+static void virtnet_cvq_done(struct virtqueue *cvq)
+{
+	virtqueue_wake_up(cvq);
+}
+
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
@@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
 	return err;
 }
 
+static int virtnet_close(struct net_device *dev);
+
 /*
  * Send command via the control virtqueue and check status.  Commands
  * supported by the hypervisor, as indicated by feature bits, should
@@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	if (unlikely(!virtqueue_kick(vi->cvq)))
 		return vi->ctrl->status == VIRTIO_NET_OK;
 
-	/* Spin for a response, the kick causes an ioport write, trapping
-	 * into the hypervisor, so the request should be handled immediately.
-	 */
-	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
-	       !virtqueue_is_broken(vi->cvq))
-		cpu_relax();
+	if (virtqueue_wait_for_used(vi->cvq)) {
+		virtqueue_get_buf(vi->cvq, &tmp);
+		return vi->ctrl->status == VIRTIO_NET_OK;
+	}
 
-	return vi->ctrl->status == VIRTIO_NET_OK;
+	netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
+	virtio_break_device(vi->vdev);
+	return VIRTIO_NET_ERR;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
-		callbacks[total_vqs - 1] = NULL;
+		callbacks[total_vqs - 1] = virtnet_cvq_done;
 		names[total_vqs - 1] = "control";
 	}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-26  7:49   ` Jason Wang
@ 2022-12-26 23:34     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-26 23:34 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> This patch introduces a per virtqueue waitqueue to allow driver to
> sleep and wait for more used. Two new helpers are introduced to allow
> driver to sleep and wake up.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - check virtqueue_is_broken() as well
> - use more_used() instead of virtqueue_get_buf() to allow caller to
>   get buffers afterwards
> ---
>  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  3 +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 5cfb2fa8abee..9c83eb945493 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/kmsan.h>
>  #include <linux/spinlock.h>
> +#include <linux/wait.h>
>  #include <xen/xen.h>
>  
>  #ifdef DEBUG
> @@ -60,6 +61,7 @@
>  			"%s:"fmt, (_vq)->vq.name, ##args);	\
>  		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>  		WRITE_ONCE((_vq)->broken, true);		       \
> +		wake_up_interruptible(&(_vq)->wq);		       \
>  	} while (0)
>  #define START_USE(vq)
>  #define END_USE(vq)
> @@ -203,6 +205,9 @@ struct vring_virtqueue {
>  	/* DMA, allocation, and size information */
>  	bool we_own_ring;
>  
> +	/* Wait for buffer to be used */
> +	wait_queue_head_t wq;
> +
>  #ifdef DEBUG
>  	/* They're supposed to lock for us. */
>  	unsigned int in_use;
> @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_packed(&vring_packed);
>  	if (err)
>  		goto err_state_extra;
> @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_split(vring_split);
>  	if (err) {
>  		kfree(vq);
> @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
>  
> +	wake_up_interruptible(&vq->wq);
> +
>  	if (vq->we_own_ring) {
>  		if (vq->packed_ring) {
>  			vring_free_queue(vq->vq.vdev,
> @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>  
> +int virtqueue_wait_for_used(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	/* TODO: Tweak the timeout. */
> +	return wait_event_interruptible_timeout(vq->wq,
> +	       virtqueue_is_broken(_vq) || more_used(vq), HZ);

There's no good timeout. Let's not even go there, if device goes
bad it should set the need reset bit.


> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> +
> +void virtqueue_wake_up(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	wake_up_interruptible(&vq->wq);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index dcab9c7e8784..2eb62c774895 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>  			    void **ctx);
>  
> +int virtqueue_wait_for_used(struct virtqueue *vq);
> +void virtqueue_wake_up(struct virtqueue *vq);
> +
>  void virtqueue_disable_cb(struct virtqueue *vq);
>  
>  bool virtqueue_enable_cb(struct virtqueue *vq);
> -- 
> 2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-26 23:34     ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-26 23:34 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> This patch introduces a per virtqueue waitqueue to allow driver to
> sleep and wait for more used. Two new helpers are introduced to allow
> driver to sleep and wake up.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - check virtqueue_is_broken() as well
> - use more_used() instead of virtqueue_get_buf() to allow caller to
>   get buffers afterwards
> ---
>  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  3 +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 5cfb2fa8abee..9c83eb945493 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/kmsan.h>
>  #include <linux/spinlock.h>
> +#include <linux/wait.h>
>  #include <xen/xen.h>
>  
>  #ifdef DEBUG
> @@ -60,6 +61,7 @@
>  			"%s:"fmt, (_vq)->vq.name, ##args);	\
>  		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>  		WRITE_ONCE((_vq)->broken, true);		       \
> +		wake_up_interruptible(&(_vq)->wq);		       \
>  	} while (0)
>  #define START_USE(vq)
>  #define END_USE(vq)
> @@ -203,6 +205,9 @@ struct vring_virtqueue {
>  	/* DMA, allocation, and size information */
>  	bool we_own_ring;
>  
> +	/* Wait for buffer to be used */
> +	wait_queue_head_t wq;
> +
>  #ifdef DEBUG
>  	/* They're supposed to lock for us. */
>  	unsigned int in_use;
> @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_packed(&vring_packed);
>  	if (err)
>  		goto err_state_extra;
> @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_split(vring_split);
>  	if (err) {
>  		kfree(vq);
> @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
>  
> +	wake_up_interruptible(&vq->wq);
> +
>  	if (vq->we_own_ring) {
>  		if (vq->packed_ring) {
>  			vring_free_queue(vq->vq.vdev,
> @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>  
> +int virtqueue_wait_for_used(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	/* TODO: Tweak the timeout. */
> +	return wait_event_interruptible_timeout(vq->wq,
> +	       virtqueue_is_broken(_vq) || more_used(vq), HZ);

There's no good timeout. Let's not even go there, if device goes
bad it should set the need reset bit.


> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> +
> +void virtqueue_wake_up(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	wake_up_interruptible(&vq->wq);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index dcab9c7e8784..2eb62c774895 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>  			    void **ctx);
>  
> +int virtqueue_wait_for_used(struct virtqueue *vq);
> +void virtqueue_wake_up(struct virtqueue *vq);
> +
>  void virtqueue_disable_cb(struct virtqueue *vq);
>  
>  bool virtqueue_enable_cb(struct virtqueue *vq);
> -- 
> 2.25.1


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/4] virtio_ring: switch to use BAD_RING()
  2022-12-26  7:49   ` Jason Wang
@ 2022-12-26 23:36     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-26 23:36 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Dec 26, 2022 at 03:49:06PM +0800, Jason Wang wrote:
> Switch to reuse BAD_RING() to allow common logic to be implemented in
> BAD_RING().
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - switch to use BAD_RING in virtio_break_device()
> ---
>  drivers/virtio/virtio_ring.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 2e7689bb933b..5cfb2fa8abee 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -58,7 +58,8 @@
>  	do {							\
>  		dev_err(&_vq->vq.vdev->dev,			\
>  			"%s:"fmt, (_vq)->vq.name, ##args);	\
> -		(_vq)->broken = true;				\
> +		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \

I don't think WRITE_ONCE/READ_ONCE pair as such. Can you point
me at documentation of such pairing?

> +		WRITE_ONCE((_vq)->broken, true);		       \
>  	} while (0)
>  #define START_USE(vq)
>  #define END_USE(vq)
> @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
>  
>  	/* Prod other side to tell it about changes. */
>  	if (!vq->notify(_vq)) {
> -		vq->broken = true;
> +		BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
>  		return false;
>  	}
>  	return true;
> @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
>  	list_for_each_entry(_vq, &dev->vqs, list) {
>  		struct vring_virtqueue *vq = to_vvq(_vq);
>  
> -		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
> -		WRITE_ONCE(vq->broken, true);
> +		BAD_RING(vq, "Device break vq %d", _vq->index);
>  	}
>  	spin_unlock(&dev->vqs_list_lock);
>  }
> -- 
> 2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/4] virtio_ring: switch to use BAD_RING()
@ 2022-12-26 23:36     ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-26 23:36 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Dec 26, 2022 at 03:49:06PM +0800, Jason Wang wrote:
> Switch to reuse BAD_RING() to allow common logic to be implemented in
> BAD_RING().
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - switch to use BAD_RING in virtio_break_device()
> ---
>  drivers/virtio/virtio_ring.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 2e7689bb933b..5cfb2fa8abee 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -58,7 +58,8 @@
>  	do {							\
>  		dev_err(&_vq->vq.vdev->dev,			\
>  			"%s:"fmt, (_vq)->vq.name, ##args);	\
> -		(_vq)->broken = true;				\
> +		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \

I don't think WRITE_ONCE/READ_ONCE pair as such. Can you point
me at documentation of such pairing?

> +		WRITE_ONCE((_vq)->broken, true);		       \
>  	} while (0)
>  #define START_USE(vq)
>  #define END_USE(vq)
> @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
>  
>  	/* Prod other side to tell it about changes. */
>  	if (!vq->notify(_vq)) {
> -		vq->broken = true;
> +		BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
>  		return false;
>  	}
>  	return true;
> @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
>  	list_for_each_entry(_vq, &dev->vqs, list) {
>  		struct vring_virtqueue *vq = to_vvq(_vq);
>  
> -		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
> -		WRITE_ONCE(vq->broken, true);
> +		BAD_RING(vq, "Device break vq %d", _vq->index);
>  	}
>  	spin_unlock(&dev->vqs_list_lock);
>  }
> -- 
> 2.25.1


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-26  7:49   ` Jason Wang
@ 2022-12-26 23:38     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-26 23:38 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> This patch introduces a per virtqueue waitqueue to allow driver to
> sleep and wait for more used. Two new helpers are introduced to allow
> driver to sleep and wake up.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - check virtqueue_is_broken() as well
> - use more_used() instead of virtqueue_get_buf() to allow caller to
>   get buffers afterwards
> ---
>  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  3 +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 5cfb2fa8abee..9c83eb945493 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/kmsan.h>
>  #include <linux/spinlock.h>
> +#include <linux/wait.h>
>  #include <xen/xen.h>
>  
>  #ifdef DEBUG
> @@ -60,6 +61,7 @@
>  			"%s:"fmt, (_vq)->vq.name, ##args);	\
>  		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>  		WRITE_ONCE((_vq)->broken, true);		       \
> +		wake_up_interruptible(&(_vq)->wq);		       \
>  	} while (0)
>  #define START_USE(vq)
>  #define END_USE(vq)
> @@ -203,6 +205,9 @@ struct vring_virtqueue {
>  	/* DMA, allocation, and size information */
>  	bool we_own_ring;
>  
> +	/* Wait for buffer to be used */
> +	wait_queue_head_t wq;
> +
>  #ifdef DEBUG
>  	/* They're supposed to lock for us. */
>  	unsigned int in_use;
> @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_packed(&vring_packed);
>  	if (err)
>  		goto err_state_extra;
> @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_split(vring_split);
>  	if (err) {
>  		kfree(vq);
> @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
>  
> +	wake_up_interruptible(&vq->wq);
> +
>  	if (vq->we_own_ring) {
>  		if (vq->packed_ring) {
>  			vring_free_queue(vq->vq.vdev,
> @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>  
> +int virtqueue_wait_for_used(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	/* TODO: Tweak the timeout. */
> +	return wait_event_interruptible_timeout(vq->wq,
> +	       virtqueue_is_broken(_vq) || more_used(vq), HZ);

BTW undocumented that you also make it interruptible.
So if we get an interrupt then this will fail.
But device is still going and will later use the buffers.

Same for timeout really.



> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> +
> +void virtqueue_wake_up(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	wake_up_interruptible(&vq->wq);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index dcab9c7e8784..2eb62c774895 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>  			    void **ctx);
>  
> +int virtqueue_wait_for_used(struct virtqueue *vq);
> +void virtqueue_wake_up(struct virtqueue *vq);
> +
>  void virtqueue_disable_cb(struct virtqueue *vq);
>  
>  bool virtqueue_enable_cb(struct virtqueue *vq);
> -- 
> 2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-26 23:38     ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-26 23:38 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> This patch introduces a per virtqueue waitqueue to allow driver to
> sleep and wait for more used. Two new helpers are introduced to allow
> driver to sleep and wake up.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - check virtqueue_is_broken() as well
> - use more_used() instead of virtqueue_get_buf() to allow caller to
>   get buffers afterwards
> ---
>  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  3 +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 5cfb2fa8abee..9c83eb945493 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -13,6 +13,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/kmsan.h>
>  #include <linux/spinlock.h>
> +#include <linux/wait.h>
>  #include <xen/xen.h>
>  
>  #ifdef DEBUG
> @@ -60,6 +61,7 @@
>  			"%s:"fmt, (_vq)->vq.name, ##args);	\
>  		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>  		WRITE_ONCE((_vq)->broken, true);		       \
> +		wake_up_interruptible(&(_vq)->wq);		       \
>  	} while (0)
>  #define START_USE(vq)
>  #define END_USE(vq)
> @@ -203,6 +205,9 @@ struct vring_virtqueue {
>  	/* DMA, allocation, and size information */
>  	bool we_own_ring;
>  
> +	/* Wait for buffer to be used */
> +	wait_queue_head_t wq;
> +
>  #ifdef DEBUG
>  	/* They're supposed to lock for us. */
>  	unsigned int in_use;
> @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_packed(&vring_packed);
>  	if (err)
>  		goto err_state_extra;
> @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>  	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>  		vq->weak_barriers = false;
>  
> +	init_waitqueue_head(&vq->wq);
> +
>  	err = vring_alloc_state_extra_split(vring_split);
>  	if (err) {
>  		kfree(vq);
> @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
>  
> +	wake_up_interruptible(&vq->wq);
> +
>  	if (vq->we_own_ring) {
>  		if (vq->packed_ring) {
>  			vring_free_queue(vq->vq.vdev,
> @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>  
> +int virtqueue_wait_for_used(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	/* TODO: Tweak the timeout. */
> +	return wait_event_interruptible_timeout(vq->wq,
> +	       virtqueue_is_broken(_vq) || more_used(vq), HZ);

BTW undocumented that you also make it interruptible.
So if we get an interrupt then this will fail.
But device is still going and will later use the buffers.

Same for timeout really.



> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> +
> +void virtqueue_wake_up(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	wake_up_interruptible(&vq->wq);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index dcab9c7e8784..2eb62c774895 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>  			    void **ctx);
>  
> +int virtqueue_wait_for_used(struct virtqueue *vq);
> +void virtqueue_wake_up(struct virtqueue *vq);
> +
>  void virtqueue_disable_cb(struct virtqueue *vq);
>  
>  bool virtqueue_enable_cb(struct virtqueue *vq);
> -- 
> 2.25.1


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-26  7:49   ` Jason Wang
@ 2022-12-27  2:19     ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-27  2:19 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem, mst, jasowang

On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> We used to busy waiting on the cvq command this tends to be
> problematic since:
>
> 1) CPU could wait for ever on a buggy/malicous device
> 2) There's no wait to terminate the process that triggers the cvq
>    command
>
> So this patch switch to use virtqueue_wait_for_used() to sleep with a
> timeout (1s) instead of busy polling for the cvq command forever. This

I don't think that a fixed 1S is a good choice. Some of the DPUs are very
lazy for cvq handle. In particular, we will also directly break the device.

I think it is necessary to add a Virtio-Net parameter to allow users to define
this timeout by themselves. Although I don't think this is a good way.

Thanks.


> gives the scheduler a breath and can let the process can respond to
> asignal. If the device doesn't respond in the timeout, break the
> device.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - break the device when timeout
> - get buffer manually since the virtio core check more_used() instead
> ---
>  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index efd9dd55828b..6a2ea64cfcb5 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
>  	vi->rx_mode_work_enabled = false;
>  	spin_unlock_bh(&vi->rx_mode_lock);
>
> +	virtqueue_wake_up(vi->cvq);
>  	flush_work(&vi->rx_mode_work);
>  }
>
> @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>  	return !oom;
>  }
>
> +static void virtnet_cvq_done(struct virtqueue *cvq)
> +{
> +	virtqueue_wake_up(cvq);
> +}
> +
>  static void skb_recv_done(struct virtqueue *rvq)
>  {
>  	struct virtnet_info *vi = rvq->vdev->priv;
> @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
>  	return err;
>  }
>
> +static int virtnet_close(struct net_device *dev);
> +
>  /*
>   * Send command via the control virtqueue and check status.  Commands
>   * supported by the hypervisor, as indicated by feature bits, should
> @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  	if (unlikely(!virtqueue_kick(vi->cvq)))
>  		return vi->ctrl->status == VIRTIO_NET_OK;
>
> -	/* Spin for a response, the kick causes an ioport write, trapping
> -	 * into the hypervisor, so the request should be handled immediately.
> -	 */
> -	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> -	       !virtqueue_is_broken(vi->cvq))
> -		cpu_relax();
> +	if (virtqueue_wait_for_used(vi->cvq)) {
> +		virtqueue_get_buf(vi->cvq, &tmp);
> +		return vi->ctrl->status == VIRTIO_NET_OK;
> +	}
>
> -	return vi->ctrl->status == VIRTIO_NET_OK;
> +	netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> +	virtio_break_device(vi->vdev);
> +	return VIRTIO_NET_ERR;
>  }
>
>  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
>
>  	/* Parameters for control virtqueue, if any */
>  	if (vi->has_cvq) {
> -		callbacks[total_vqs - 1] = NULL;
> +		callbacks[total_vqs - 1] = virtnet_cvq_done;
>  		names[total_vqs - 1] = "control";
>  	}
>
> --
> 2.25.1
>
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-27  2:19     ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-27  2:19 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, linux-kernel, virtualization, eperezma, edumazet,
	kuba, maxime.coquelin, pabeni, davem

On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> We used to busy waiting on the cvq command this tends to be
> problematic since:
>
> 1) CPU could wait for ever on a buggy/malicous device
> 2) There's no wait to terminate the process that triggers the cvq
>    command
>
> So this patch switch to use virtqueue_wait_for_used() to sleep with a
> timeout (1s) instead of busy polling for the cvq command forever. This

I don't think that a fixed 1S is a good choice. Some of the DPUs are very
lazy for cvq handle. In particular, we will also directly break the device.

I think it is necessary to add a Virtio-Net parameter to allow users to define
this timeout by themselves. Although I don't think this is a good way.

Thanks.


> gives the scheduler a breath and can let the process can respond to
> asignal. If the device doesn't respond in the timeout, break the
> device.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - break the device when timeout
> - get buffer manually since the virtio core check more_used() instead
> ---
>  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index efd9dd55828b..6a2ea64cfcb5 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
>  	vi->rx_mode_work_enabled = false;
>  	spin_unlock_bh(&vi->rx_mode_lock);
>
> +	virtqueue_wake_up(vi->cvq);
>  	flush_work(&vi->rx_mode_work);
>  }
>
> @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>  	return !oom;
>  }
>
> +static void virtnet_cvq_done(struct virtqueue *cvq)
> +{
> +	virtqueue_wake_up(cvq);
> +}
> +
>  static void skb_recv_done(struct virtqueue *rvq)
>  {
>  	struct virtnet_info *vi = rvq->vdev->priv;
> @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
>  	return err;
>  }
>
> +static int virtnet_close(struct net_device *dev);
> +
>  /*
>   * Send command via the control virtqueue and check status.  Commands
>   * supported by the hypervisor, as indicated by feature bits, should
> @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  	if (unlikely(!virtqueue_kick(vi->cvq)))
>  		return vi->ctrl->status == VIRTIO_NET_OK;
>
> -	/* Spin for a response, the kick causes an ioport write, trapping
> -	 * into the hypervisor, so the request should be handled immediately.
> -	 */
> -	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> -	       !virtqueue_is_broken(vi->cvq))
> -		cpu_relax();
> +	if (virtqueue_wait_for_used(vi->cvq)) {
> +		virtqueue_get_buf(vi->cvq, &tmp);
> +		return vi->ctrl->status == VIRTIO_NET_OK;
> +	}
>
> -	return vi->ctrl->status == VIRTIO_NET_OK;
> +	netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> +	virtio_break_device(vi->vdev);
> +	return VIRTIO_NET_ERR;
>  }
>
>  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
>
>  	/* Parameters for control virtqueue, if any */
>  	if (vi->has_cvq) {
> -		callbacks[total_vqs - 1] = NULL;
> +		callbacks[total_vqs - 1] = virtnet_cvq_done;
>  		names[total_vqs - 1] = "control";
>  	}
>
> --
> 2.25.1
>
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-26 23:34     ` Michael S. Tsirkin
@ 2022-12-27  3:47       ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  3:47 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 7:34 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> > This patch introduces a per virtqueue waitqueue to allow driver to
> > sleep and wait for more used. Two new helpers are introduced to allow
> > driver to sleep and wake up.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - check virtqueue_is_broken() as well
> > - use more_used() instead of virtqueue_get_buf() to allow caller to
> >   get buffers afterwards
> > ---
> >  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  3 +++
> >  2 files changed, 32 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 5cfb2fa8abee..9c83eb945493 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/kmsan.h>
> >  #include <linux/spinlock.h>
> > +#include <linux/wait.h>
> >  #include <xen/xen.h>
> >
> >  #ifdef DEBUG
> > @@ -60,6 +61,7 @@
> >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> >               /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> >               WRITE_ONCE((_vq)->broken, true);                       \
> > +             wake_up_interruptible(&(_vq)->wq);                     \
> >       } while (0)
> >  #define START_USE(vq)
> >  #define END_USE(vq)
> > @@ -203,6 +205,9 @@ struct vring_virtqueue {
> >       /* DMA, allocation, and size information */
> >       bool we_own_ring;
> >
> > +     /* Wait for buffer to be used */
> > +     wait_queue_head_t wq;
> > +
> >  #ifdef DEBUG
> >       /* They're supposed to lock for us. */
> >       unsigned int in_use;
> > @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_packed(&vring_packed);
> >       if (err)
> >               goto err_state_extra;
> > @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_split(vring_split);
> >       if (err) {
> >               kfree(vq);
> > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> >  {
> >       struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > +     wake_up_interruptible(&vq->wq);
> > +
> >       if (vq->we_own_ring) {
> >               if (vq->packed_ring) {
> >                       vring_free_queue(vq->vq.vdev,
> > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     /* TODO: Tweak the timeout. */
> > +     return wait_event_interruptible_timeout(vq->wq,
> > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
>
> There's no good timeout. Let's not even go there, if device goes
> bad it should set the need reset bit.

The problem is that we can't depend on the device. If it takes too
long for the device to respond to cvq, there's a high possibility that
the device is buggy or even malicious. We can have a higher timeout
here and it should be still better than waiting forever (the cvq
commands need to be serialized so it needs to hold a lock anyway
(RTNL) ).

Thanks

>
>
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> > +
> > +void virtqueue_wake_up(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     wake_up_interruptible(&vq->wq);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> > +
> >  MODULE_LICENSE("GPL");
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index dcab9c7e8784..2eb62c774895 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> >                           void **ctx);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *vq);
> > +void virtqueue_wake_up(struct virtqueue *vq);
> > +
> >  void virtqueue_disable_cb(struct virtqueue *vq);
> >
> >  bool virtqueue_enable_cb(struct virtqueue *vq);
> > --
> > 2.25.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  3:47       ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  3:47 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 7:34 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> > This patch introduces a per virtqueue waitqueue to allow driver to
> > sleep and wait for more used. Two new helpers are introduced to allow
> > driver to sleep and wake up.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - check virtqueue_is_broken() as well
> > - use more_used() instead of virtqueue_get_buf() to allow caller to
> >   get buffers afterwards
> > ---
> >  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  3 +++
> >  2 files changed, 32 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 5cfb2fa8abee..9c83eb945493 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/kmsan.h>
> >  #include <linux/spinlock.h>
> > +#include <linux/wait.h>
> >  #include <xen/xen.h>
> >
> >  #ifdef DEBUG
> > @@ -60,6 +61,7 @@
> >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> >               /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> >               WRITE_ONCE((_vq)->broken, true);                       \
> > +             wake_up_interruptible(&(_vq)->wq);                     \
> >       } while (0)
> >  #define START_USE(vq)
> >  #define END_USE(vq)
> > @@ -203,6 +205,9 @@ struct vring_virtqueue {
> >       /* DMA, allocation, and size information */
> >       bool we_own_ring;
> >
> > +     /* Wait for buffer to be used */
> > +     wait_queue_head_t wq;
> > +
> >  #ifdef DEBUG
> >       /* They're supposed to lock for us. */
> >       unsigned int in_use;
> > @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_packed(&vring_packed);
> >       if (err)
> >               goto err_state_extra;
> > @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_split(vring_split);
> >       if (err) {
> >               kfree(vq);
> > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> >  {
> >       struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > +     wake_up_interruptible(&vq->wq);
> > +
> >       if (vq->we_own_ring) {
> >               if (vq->packed_ring) {
> >                       vring_free_queue(vq->vq.vdev,
> > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     /* TODO: Tweak the timeout. */
> > +     return wait_event_interruptible_timeout(vq->wq,
> > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
>
> There's no good timeout. Let's not even go there, if device goes
> bad it should set the need reset bit.

The problem is that we can't depend on the device. If it takes too
long for the device to respond to cvq, there's a high possibility that
the device is buggy or even malicious. We can have a higher timeout
here and it should be still better than waiting forever (the cvq
commands need to be serialized so it needs to hold a lock anyway
(RTNL) ).

Thanks

>
>
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> > +
> > +void virtqueue_wake_up(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     wake_up_interruptible(&vq->wq);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> > +
> >  MODULE_LICENSE("GPL");
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index dcab9c7e8784..2eb62c774895 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> >                           void **ctx);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *vq);
> > +void virtqueue_wake_up(struct virtqueue *vq);
> > +
> >  void virtqueue_disable_cb(struct virtqueue *vq);
> >
> >  bool virtqueue_enable_cb(struct virtqueue *vq);
> > --
> > 2.25.1
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/4] virtio_ring: switch to use BAD_RING()
  2022-12-26 23:36     ` Michael S. Tsirkin
@ 2022-12-27  3:51       ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  3:51 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 7:36 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Dec 26, 2022 at 03:49:06PM +0800, Jason Wang wrote:
> > Switch to reuse BAD_RING() to allow common logic to be implemented in
> > BAD_RING().
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - switch to use BAD_RING in virtio_break_device()
> > ---
> >  drivers/virtio/virtio_ring.c | 8 ++++----
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 2e7689bb933b..5cfb2fa8abee 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -58,7 +58,8 @@
> >       do {                                                    \
> >               dev_err(&_vq->vq.vdev->dev,                     \
> >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> > -             (_vq)->broken = true;                           \
> > +             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>
> I don't think WRITE_ONCE/READ_ONCE pair as such. Can you point
> me at documentation of such pairing?

Introduced by:

commit 60f0779862e4ab943810187752c462e85f5fa371
Author: Parav Pandit <parav@nvidia.com>
Date:   Wed Jul 21 17:26:45 2021 +0300

    virtio: Improve vq->broken access to avoid any compiler optimization

I think it might still apply here since virtqueue_is_broken() is still
put into a loop inside wait_event().

Thanks

>
> > +             WRITE_ONCE((_vq)->broken, true);                       \
> >       } while (0)
> >  #define START_USE(vq)
> >  #define END_USE(vq)
> > @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
> >
> >       /* Prod other side to tell it about changes. */
> >       if (!vq->notify(_vq)) {
> > -             vq->broken = true;
> > +             BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
> >               return false;
> >       }
> >       return true;
> > @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
> >       list_for_each_entry(_vq, &dev->vqs, list) {
> >               struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > -             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
> > -             WRITE_ONCE(vq->broken, true);
> > +             BAD_RING(vq, "Device break vq %d", _vq->index);
> >       }
> >       spin_unlock(&dev->vqs_list_lock);
> >  }
> > --
> > 2.25.1
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/4] virtio_ring: switch to use BAD_RING()
@ 2022-12-27  3:51       ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  3:51 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 7:36 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Dec 26, 2022 at 03:49:06PM +0800, Jason Wang wrote:
> > Switch to reuse BAD_RING() to allow common logic to be implemented in
> > BAD_RING().
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - switch to use BAD_RING in virtio_break_device()
> > ---
> >  drivers/virtio/virtio_ring.c | 8 ++++----
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 2e7689bb933b..5cfb2fa8abee 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -58,7 +58,8 @@
> >       do {                                                    \
> >               dev_err(&_vq->vq.vdev->dev,                     \
> >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> > -             (_vq)->broken = true;                           \
> > +             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>
> I don't think WRITE_ONCE/READ_ONCE pair as such. Can you point
> me at documentation of such pairing?

Introduced by:

commit 60f0779862e4ab943810187752c462e85f5fa371
Author: Parav Pandit <parav@nvidia.com>
Date:   Wed Jul 21 17:26:45 2021 +0300

    virtio: Improve vq->broken access to avoid any compiler optimization

I think it might still apply here since virtqueue_is_broken() is still
put into a loop inside wait_event().

Thanks

>
> > +             WRITE_ONCE((_vq)->broken, true);                       \
> >       } while (0)
> >  #define START_USE(vq)
> >  #define END_USE(vq)
> > @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
> >
> >       /* Prod other side to tell it about changes. */
> >       if (!vq->notify(_vq)) {
> > -             vq->broken = true;
> > +             BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
> >               return false;
> >       }
> >       return true;
> > @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
> >       list_for_each_entry(_vq, &dev->vqs, list) {
> >               struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > -             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
> > -             WRITE_ONCE(vq->broken, true);
> > +             BAD_RING(vq, "Device break vq %d", _vq->index);
> >       }
> >       spin_unlock(&dev->vqs_list_lock);
> >  }
> > --
> > 2.25.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-26 23:38     ` Michael S. Tsirkin
@ 2022-12-27  4:30       ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  4:30 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 7:38 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> > This patch introduces a per virtqueue waitqueue to allow driver to
> > sleep and wait for more used. Two new helpers are introduced to allow
> > driver to sleep and wake up.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - check virtqueue_is_broken() as well
> > - use more_used() instead of virtqueue_get_buf() to allow caller to
> >   get buffers afterwards
> > ---
> >  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  3 +++
> >  2 files changed, 32 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 5cfb2fa8abee..9c83eb945493 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/kmsan.h>
> >  #include <linux/spinlock.h>
> > +#include <linux/wait.h>
> >  #include <xen/xen.h>
> >
> >  #ifdef DEBUG
> > @@ -60,6 +61,7 @@
> >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> >               /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> >               WRITE_ONCE((_vq)->broken, true);                       \
> > +             wake_up_interruptible(&(_vq)->wq);                     \
> >       } while (0)
> >  #define START_USE(vq)
> >  #define END_USE(vq)
> > @@ -203,6 +205,9 @@ struct vring_virtqueue {
> >       /* DMA, allocation, and size information */
> >       bool we_own_ring;
> >
> > +     /* Wait for buffer to be used */
> > +     wait_queue_head_t wq;
> > +
> >  #ifdef DEBUG
> >       /* They're supposed to lock for us. */
> >       unsigned int in_use;
> > @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_packed(&vring_packed);
> >       if (err)
> >               goto err_state_extra;
> > @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_split(vring_split);
> >       if (err) {
> >               kfree(vq);
> > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> >  {
> >       struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > +     wake_up_interruptible(&vq->wq);
> > +
> >       if (vq->we_own_ring) {
> >               if (vq->packed_ring) {
> >                       vring_free_queue(vq->vq.vdev,
> > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     /* TODO: Tweak the timeout. */
> > +     return wait_event_interruptible_timeout(vq->wq,
> > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
>
> BTW undocumented that you also make it interruptible.
> So if we get an interrupt then this will fail.

Yes, this is sub-optimal.


> But device is still going and will later use the buffers.
>
> Same for timeout really.

Avoiding infinite wait/poll is one of the goals, another is to sleep.
If we think the timeout is hard, we can start from the wait.

Thanks

>
>
>
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> > +
> > +void virtqueue_wake_up(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     wake_up_interruptible(&vq->wq);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> > +
> >  MODULE_LICENSE("GPL");
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index dcab9c7e8784..2eb62c774895 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> >                           void **ctx);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *vq);
> > +void virtqueue_wake_up(struct virtqueue *vq);
> > +
> >  void virtqueue_disable_cb(struct virtqueue *vq);
> >
> >  bool virtqueue_enable_cb(struct virtqueue *vq);
> > --
> > 2.25.1
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  4:30       ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  4:30 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 7:38 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> > This patch introduces a per virtqueue waitqueue to allow driver to
> > sleep and wait for more used. Two new helpers are introduced to allow
> > driver to sleep and wake up.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - check virtqueue_is_broken() as well
> > - use more_used() instead of virtqueue_get_buf() to allow caller to
> >   get buffers afterwards
> > ---
> >  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  3 +++
> >  2 files changed, 32 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 5cfb2fa8abee..9c83eb945493 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/kmsan.h>
> >  #include <linux/spinlock.h>
> > +#include <linux/wait.h>
> >  #include <xen/xen.h>
> >
> >  #ifdef DEBUG
> > @@ -60,6 +61,7 @@
> >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> >               /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> >               WRITE_ONCE((_vq)->broken, true);                       \
> > +             wake_up_interruptible(&(_vq)->wq);                     \
> >       } while (0)
> >  #define START_USE(vq)
> >  #define END_USE(vq)
> > @@ -203,6 +205,9 @@ struct vring_virtqueue {
> >       /* DMA, allocation, and size information */
> >       bool we_own_ring;
> >
> > +     /* Wait for buffer to be used */
> > +     wait_queue_head_t wq;
> > +
> >  #ifdef DEBUG
> >       /* They're supposed to lock for us. */
> >       unsigned int in_use;
> > @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_packed(&vring_packed);
> >       if (err)
> >               goto err_state_extra;
> > @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> >               vq->weak_barriers = false;
> >
> > +     init_waitqueue_head(&vq->wq);
> > +
> >       err = vring_alloc_state_extra_split(vring_split);
> >       if (err) {
> >               kfree(vq);
> > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> >  {
> >       struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > +     wake_up_interruptible(&vq->wq);
> > +
> >       if (vq->we_own_ring) {
> >               if (vq->packed_ring) {
> >                       vring_free_queue(vq->vq.vdev,
> > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     /* TODO: Tweak the timeout. */
> > +     return wait_event_interruptible_timeout(vq->wq,
> > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
>
> BTW undocumented that you also make it interruptible.
> So if we get an interrupt then this will fail.

Yes, this is sub-optimal.


> But device is still going and will later use the buffers.
>
> Same for timeout really.

Avoiding infinite wait/poll is one of the goals, another is to sleep.
If we think the timeout is hard, we can start from the wait.

Thanks

>
>
>
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> > +
> > +void virtqueue_wake_up(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     wake_up_interruptible(&vq->wq);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> > +
> >  MODULE_LICENSE("GPL");
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index dcab9c7e8784..2eb62c774895 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> >                           void **ctx);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *vq);
> > +void virtqueue_wake_up(struct virtqueue *vq);
> > +
> >  void virtqueue_disable_cb(struct virtqueue *vq);
> >
> >  bool virtqueue_enable_cb(struct virtqueue *vq);
> > --
> > 2.25.1
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  2:19     ` Xuan Zhuo
@ 2022-12-27  4:33       ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  4:33 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem, mst

On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > We used to busy waiting on the cvq command this tends to be
> > problematic since:
> >
> > 1) CPU could wait for ever on a buggy/malicous device
> > 2) There's no wait to terminate the process that triggers the cvq
> >    command
> >
> > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > timeout (1s) instead of busy polling for the cvq command forever. This
>
> I don't think that a fixed 1S is a good choice.

Well, it could be tweaked to be a little bit longer.

One way, as discussed, is to let the device advertise a timeout then
the driver can validate if it's valid and use that timeout. But it
needs extension to the spec.

> Some of the DPUs are very
> lazy for cvq handle.

Such design needs to be revisited, cvq (control path) should have a
better priority or QOS than datapath.

> In particular, we will also directly break the device.

It's kind of hardening for malicious devices.

>
> I think it is necessary to add a Virtio-Net parameter to allow users to define
> this timeout by themselves. Although I don't think this is a good way.

Very hard and unfriendly to the end users.

Thanks

>
> Thanks.
>
>
> > gives the scheduler a breath and can let the process can respond to
> > asignal. If the device doesn't respond in the timeout, break the
> > device.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - break the device when timeout
> > - get buffer manually since the virtio core check more_used() instead
> > ---
> >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index efd9dd55828b..6a2ea64cfcb5 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> >       vi->rx_mode_work_enabled = false;
> >       spin_unlock_bh(&vi->rx_mode_lock);
> >
> > +     virtqueue_wake_up(vi->cvq);
> >       flush_work(&vi->rx_mode_work);
> >  }
> >
> > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> >       return !oom;
> >  }
> >
> > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > +{
> > +     virtqueue_wake_up(cvq);
> > +}
> > +
> >  static void skb_recv_done(struct virtqueue *rvq)
> >  {
> >       struct virtnet_info *vi = rvq->vdev->priv;
> > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> >       return err;
> >  }
> >
> > +static int virtnet_close(struct net_device *dev);
> > +
> >  /*
> >   * Send command via the control virtqueue and check status.  Commands
> >   * supported by the hypervisor, as indicated by feature bits, should
> > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >       if (unlikely(!virtqueue_kick(vi->cvq)))
> >               return vi->ctrl->status == VIRTIO_NET_OK;
> >
> > -     /* Spin for a response, the kick causes an ioport write, trapping
> > -      * into the hypervisor, so the request should be handled immediately.
> > -      */
> > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > -            !virtqueue_is_broken(vi->cvq))
> > -             cpu_relax();
> > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > +             virtqueue_get_buf(vi->cvq, &tmp);
> > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > +     }
> >
> > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > +     virtio_break_device(vi->vdev);
> > +     return VIRTIO_NET_ERR;
> >  }
> >
> >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> >
> >       /* Parameters for control virtqueue, if any */
> >       if (vi->has_cvq) {
> > -             callbacks[total_vqs - 1] = NULL;
> > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> >               names[total_vqs - 1] = "control";
> >       }
> >
> > --
> > 2.25.1
> >
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-27  4:33       ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  4:33 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: mst, netdev, linux-kernel, virtualization, eperezma, edumazet,
	kuba, maxime.coquelin, pabeni, davem

On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > We used to busy waiting on the cvq command this tends to be
> > problematic since:
> >
> > 1) CPU could wait for ever on a buggy/malicous device
> > 2) There's no wait to terminate the process that triggers the cvq
> >    command
> >
> > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > timeout (1s) instead of busy polling for the cvq command forever. This
>
> I don't think that a fixed 1S is a good choice.

Well, it could be tweaked to be a little bit longer.

One way, as discussed, is to let the device advertise a timeout then
the driver can validate if it's valid and use that timeout. But it
needs extension to the spec.

> Some of the DPUs are very
> lazy for cvq handle.

Such design needs to be revisited, cvq (control path) should have a
better priority or QOS than datapath.

> In particular, we will also directly break the device.

It's kind of hardening for malicious devices.

>
> I think it is necessary to add a Virtio-Net parameter to allow users to define
> this timeout by themselves. Although I don't think this is a good way.

Very hard and unfriendly to the end users.

Thanks

>
> Thanks.
>
>
> > gives the scheduler a breath and can let the process can respond to
> > asignal. If the device doesn't respond in the timeout, break the
> > device.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - break the device when timeout
> > - get buffer manually since the virtio core check more_used() instead
> > ---
> >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index efd9dd55828b..6a2ea64cfcb5 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> >       vi->rx_mode_work_enabled = false;
> >       spin_unlock_bh(&vi->rx_mode_lock);
> >
> > +     virtqueue_wake_up(vi->cvq);
> >       flush_work(&vi->rx_mode_work);
> >  }
> >
> > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> >       return !oom;
> >  }
> >
> > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > +{
> > +     virtqueue_wake_up(cvq);
> > +}
> > +
> >  static void skb_recv_done(struct virtqueue *rvq)
> >  {
> >       struct virtnet_info *vi = rvq->vdev->priv;
> > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> >       return err;
> >  }
> >
> > +static int virtnet_close(struct net_device *dev);
> > +
> >  /*
> >   * Send command via the control virtqueue and check status.  Commands
> >   * supported by the hypervisor, as indicated by feature bits, should
> > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >       if (unlikely(!virtqueue_kick(vi->cvq)))
> >               return vi->ctrl->status == VIRTIO_NET_OK;
> >
> > -     /* Spin for a response, the kick causes an ioport write, trapping
> > -      * into the hypervisor, so the request should be handled immediately.
> > -      */
> > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > -            !virtqueue_is_broken(vi->cvq))
> > -             cpu_relax();
> > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > +             virtqueue_get_buf(vi->cvq, &tmp);
> > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > +     }
> >
> > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > +     virtio_break_device(vi->vdev);
> > +     return VIRTIO_NET_ERR;
> >  }
> >
> >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> >
> >       /* Parameters for control virtqueue, if any */
> >       if (vi->has_cvq) {
> > -             callbacks[total_vqs - 1] = NULL;
> > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> >               names[total_vqs - 1] = "control";
> >       }
> >
> > --
> > 2.25.1
> >
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  4:33       ` Jason Wang
@ 2022-12-27  6:58         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  6:58 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet, kuba,
	maxime.coquelin, pabeni, davem

On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > We used to busy waiting on the cvq command this tends to be
> > > problematic since:
> > >
> > > 1) CPU could wait for ever on a buggy/malicous device
> > > 2) There's no wait to terminate the process that triggers the cvq
> > >    command
> > >
> > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > timeout (1s) instead of busy polling for the cvq command forever. This
> >
> > I don't think that a fixed 1S is a good choice.
> 
> Well, it could be tweaked to be a little bit longer.
> 
> One way, as discussed, is to let the device advertise a timeout then
> the driver can validate if it's valid and use that timeout. But it
> needs extension to the spec.

Controlling timeout from device is a good idea, e.g. hardware devices
would benefit from a shorter timeout, hypervisor devices from a longer
timeout or no timeout.

> 
> > Some of the DPUs are very
> > lazy for cvq handle.
> 
> Such design needs to be revisited, cvq (control path) should have a
> better priority or QOS than datapath.

Spec says nothing about this, so driver can't assume this either.

> > In particular, we will also directly break the device.
> 
> It's kind of hardening for malicious devices.

ATM no amount of hardening can prevent a malicious hypervisor from
blocking the guest. Recovering when a hardware device is broken would be
nice but I think if we do bother then we should try harder to recover,
such as by driving device reset.


Also, does your patch break surprise removal? There's no callback
in this case ATM.

> >
> > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > this timeout by themselves. Although I don't think this is a good way.
> 
> Very hard and unfriendly to the end users.
> 
> Thanks
> 
> >
> > Thanks.
> >
> >
> > > gives the scheduler a breath and can let the process can respond to
> > > asignal. If the device doesn't respond in the timeout, break the
> > > device.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - break the device when timeout
> > > - get buffer manually since the virtio core check more_used() instead
> > > ---
> > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > >       vi->rx_mode_work_enabled = false;
> > >       spin_unlock_bh(&vi->rx_mode_lock);
> > >
> > > +     virtqueue_wake_up(vi->cvq);
> > >       flush_work(&vi->rx_mode_work);
> > >  }
> > >
> > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > >       return !oom;
> > >  }
> > >
> > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > +{
> > > +     virtqueue_wake_up(cvq);
> > > +}
> > > +
> > >  static void skb_recv_done(struct virtqueue *rvq)
> > >  {
> > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > >       return err;
> > >  }
> > >
> > > +static int virtnet_close(struct net_device *dev);
> > > +
> > >  /*
> > >   * Send command via the control virtqueue and check status.  Commands
> > >   * supported by the hypervisor, as indicated by feature bits, should
> > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > >
> > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > -      * into the hypervisor, so the request should be handled immediately.
> > > -      */
> > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > -            !virtqueue_is_broken(vi->cvq))
> > > -             cpu_relax();
> > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     }
> > >
> > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > +     virtio_break_device(vi->vdev);
> > > +     return VIRTIO_NET_ERR;
> > >  }
> > >
> > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > >
> > >       /* Parameters for control virtqueue, if any */
> > >       if (vi->has_cvq) {
> > > -             callbacks[total_vqs - 1] = NULL;
> > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > >               names[total_vqs - 1] = "control";
> > >       }
> > >
> > > --
> > > 2.25.1
> > >
> > > _______________________________________________
> > > Virtualization mailing list
> > > Virtualization@lists.linux-foundation.org
> > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-27  6:58         ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  6:58 UTC (permalink / raw)
  To: Jason Wang
  Cc: Xuan Zhuo, netdev, linux-kernel, virtualization, eperezma,
	edumazet, maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > We used to busy waiting on the cvq command this tends to be
> > > problematic since:
> > >
> > > 1) CPU could wait for ever on a buggy/malicous device
> > > 2) There's no wait to terminate the process that triggers the cvq
> > >    command
> > >
> > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > timeout (1s) instead of busy polling for the cvq command forever. This
> >
> > I don't think that a fixed 1S is a good choice.
> 
> Well, it could be tweaked to be a little bit longer.
> 
> One way, as discussed, is to let the device advertise a timeout then
> the driver can validate if it's valid and use that timeout. But it
> needs extension to the spec.

Controlling timeout from device is a good idea, e.g. hardware devices
would benefit from a shorter timeout, hypervisor devices from a longer
timeout or no timeout.

> 
> > Some of the DPUs are very
> > lazy for cvq handle.
> 
> Such design needs to be revisited, cvq (control path) should have a
> better priority or QOS than datapath.

Spec says nothing about this, so driver can't assume this either.

> > In particular, we will also directly break the device.
> 
> It's kind of hardening for malicious devices.

ATM no amount of hardening can prevent a malicious hypervisor from
blocking the guest. Recovering when a hardware device is broken would be
nice but I think if we do bother then we should try harder to recover,
such as by driving device reset.


Also, does your patch break surprise removal? There's no callback
in this case ATM.

> >
> > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > this timeout by themselves. Although I don't think this is a good way.
> 
> Very hard and unfriendly to the end users.
> 
> Thanks
> 
> >
> > Thanks.
> >
> >
> > > gives the scheduler a breath and can let the process can respond to
> > > asignal. If the device doesn't respond in the timeout, break the
> > > device.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - break the device when timeout
> > > - get buffer manually since the virtio core check more_used() instead
> > > ---
> > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > >       vi->rx_mode_work_enabled = false;
> > >       spin_unlock_bh(&vi->rx_mode_lock);
> > >
> > > +     virtqueue_wake_up(vi->cvq);
> > >       flush_work(&vi->rx_mode_work);
> > >  }
> > >
> > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > >       return !oom;
> > >  }
> > >
> > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > +{
> > > +     virtqueue_wake_up(cvq);
> > > +}
> > > +
> > >  static void skb_recv_done(struct virtqueue *rvq)
> > >  {
> > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > >       return err;
> > >  }
> > >
> > > +static int virtnet_close(struct net_device *dev);
> > > +
> > >  /*
> > >   * Send command via the control virtqueue and check status.  Commands
> > >   * supported by the hypervisor, as indicated by feature bits, should
> > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > >
> > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > -      * into the hypervisor, so the request should be handled immediately.
> > > -      */
> > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > -            !virtqueue_is_broken(vi->cvq))
> > > -             cpu_relax();
> > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     }
> > >
> > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > +     virtio_break_device(vi->vdev);
> > > +     return VIRTIO_NET_ERR;
> > >  }
> > >
> > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > >
> > >       /* Parameters for control virtqueue, if any */
> > >       if (vi->has_cvq) {
> > > -             callbacks[total_vqs - 1] = NULL;
> > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > >               names[total_vqs - 1] = "control";
> > >       }
> > >
> > > --
> > > 2.25.1
> > >
> > > _______________________________________________
> > > Virtualization mailing list
> > > Virtualization@lists.linux-foundation.org
> > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-27  3:47       ` Jason Wang
@ 2022-12-27  7:19         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:19 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 11:47:34AM +0800, Jason Wang wrote:
> On Tue, Dec 27, 2022 at 7:34 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> > > This patch introduces a per virtqueue waitqueue to allow driver to
> > > sleep and wait for more used. Two new helpers are introduced to allow
> > > driver to sleep and wake up.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - check virtqueue_is_broken() as well
> > > - use more_used() instead of virtqueue_get_buf() to allow caller to
> > >   get buffers afterwards
> > > ---
> > >  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
> > >  include/linux/virtio.h       |  3 +++
> > >  2 files changed, 32 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 5cfb2fa8abee..9c83eb945493 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -13,6 +13,7 @@
> > >  #include <linux/dma-mapping.h>
> > >  #include <linux/kmsan.h>
> > >  #include <linux/spinlock.h>
> > > +#include <linux/wait.h>
> > >  #include <xen/xen.h>
> > >
> > >  #ifdef DEBUG
> > > @@ -60,6 +61,7 @@
> > >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> > >               /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> > >               WRITE_ONCE((_vq)->broken, true);                       \
> > > +             wake_up_interruptible(&(_vq)->wq);                     \
> > >       } while (0)
> > >  #define START_USE(vq)
> > >  #define END_USE(vq)
> > > @@ -203,6 +205,9 @@ struct vring_virtqueue {
> > >       /* DMA, allocation, and size information */
> > >       bool we_own_ring;
> > >
> > > +     /* Wait for buffer to be used */
> > > +     wait_queue_head_t wq;
> > > +
> > >  #ifdef DEBUG
> > >       /* They're supposed to lock for us. */
> > >       unsigned int in_use;
> > > @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> > >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> > >               vq->weak_barriers = false;
> > >
> > > +     init_waitqueue_head(&vq->wq);
> > > +
> > >       err = vring_alloc_state_extra_packed(&vring_packed);
> > >       if (err)
> > >               goto err_state_extra;
> > > @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> > >               vq->weak_barriers = false;
> > >
> > > +     init_waitqueue_head(&vq->wq);
> > > +
> > >       err = vring_alloc_state_extra_split(vring_split);
> > >       if (err) {
> > >               kfree(vq);
> > > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> > >  {
> > >       struct vring_virtqueue *vq = to_vvq(_vq);
> > >
> > > +     wake_up_interruptible(&vq->wq);
> > > +
> > >       if (vq->we_own_ring) {
> > >               if (vq->packed_ring) {
> > >                       vring_free_queue(vq->vq.vdev,
> > > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > >
> > > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > > +{
> > > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > > +
> > > +     /* TODO: Tweak the timeout. */
> > > +     return wait_event_interruptible_timeout(vq->wq,
> > > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
> >
> > There's no good timeout. Let's not even go there, if device goes
> > bad it should set the need reset bit.
> 
> The problem is that we can't depend on the device. If it takes too
> long for the device to respond to cvq, there's a high possibility that
> the device is buggy or even malicious. We can have a higher timeout
> here and it should be still better than waiting forever (the cvq
> commands need to be serialized so it needs to hold a lock anyway
> (RTNL) ).
> 
> Thanks

With a TODO item like this I'd expect this to be an RFC.
Here's why:

Making driver more robust from device failures is a laudable goal but it's really
hard to be 100% foolproof here. E.g. device can just block pci reads and
it would be very hard to recover.  So I'm going to only merge patches
like this if they at least theoretically have very little chance
of breaking existing users.

And note that in most setups, CVQ is only used at startup and then left mostly alone.

Finally, note that lots of guests need virtio to do anything useful at all.
So just failing commands is not enough to recover - you need to try
harder maybe by attempting to reset device. Could be a question of
policy - might need to make this guest configurable.



> >
> >
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> > > +
> > > +void virtqueue_wake_up(struct virtqueue *_vq)
> > > +{
> > > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > > +
> > > +     wake_up_interruptible(&vq->wq);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> > > +
> > >  MODULE_LICENSE("GPL");
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index dcab9c7e8784..2eb62c774895 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> > >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> > >                           void **ctx);
> > >
> > > +int virtqueue_wait_for_used(struct virtqueue *vq);
> > > +void virtqueue_wake_up(struct virtqueue *vq);
> > > +
> > >  void virtqueue_disable_cb(struct virtqueue *vq);
> > >
> > >  bool virtqueue_enable_cb(struct virtqueue *vq);
> > > --
> > > 2.25.1
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  7:19         ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:19 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 11:47:34AM +0800, Jason Wang wrote:
> On Tue, Dec 27, 2022 at 7:34 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
> > > This patch introduces a per virtqueue waitqueue to allow driver to
> > > sleep and wait for more used. Two new helpers are introduced to allow
> > > driver to sleep and wake up.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - check virtqueue_is_broken() as well
> > > - use more_used() instead of virtqueue_get_buf() to allow caller to
> > >   get buffers afterwards
> > > ---
> > >  drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
> > >  include/linux/virtio.h       |  3 +++
> > >  2 files changed, 32 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 5cfb2fa8abee..9c83eb945493 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -13,6 +13,7 @@
> > >  #include <linux/dma-mapping.h>
> > >  #include <linux/kmsan.h>
> > >  #include <linux/spinlock.h>
> > > +#include <linux/wait.h>
> > >  #include <xen/xen.h>
> > >
> > >  #ifdef DEBUG
> > > @@ -60,6 +61,7 @@
> > >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> > >               /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> > >               WRITE_ONCE((_vq)->broken, true);                       \
> > > +             wake_up_interruptible(&(_vq)->wq);                     \
> > >       } while (0)
> > >  #define START_USE(vq)
> > >  #define END_USE(vq)
> > > @@ -203,6 +205,9 @@ struct vring_virtqueue {
> > >       /* DMA, allocation, and size information */
> > >       bool we_own_ring;
> > >
> > > +     /* Wait for buffer to be used */
> > > +     wait_queue_head_t wq;
> > > +
> > >  #ifdef DEBUG
> > >       /* They're supposed to lock for us. */
> > >       unsigned int in_use;
> > > @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> > >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> > >               vq->weak_barriers = false;
> > >
> > > +     init_waitqueue_head(&vq->wq);
> > > +
> > >       err = vring_alloc_state_extra_packed(&vring_packed);
> > >       if (err)
> > >               goto err_state_extra;
> > > @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > >       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> > >               vq->weak_barriers = false;
> > >
> > > +     init_waitqueue_head(&vq->wq);
> > > +
> > >       err = vring_alloc_state_extra_split(vring_split);
> > >       if (err) {
> > >               kfree(vq);
> > > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> > >  {
> > >       struct vring_virtqueue *vq = to_vvq(_vq);
> > >
> > > +     wake_up_interruptible(&vq->wq);
> > > +
> > >       if (vq->we_own_ring) {
> > >               if (vq->packed_ring) {
> > >                       vring_free_queue(vq->vq.vdev,
> > > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > >
> > > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > > +{
> > > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > > +
> > > +     /* TODO: Tweak the timeout. */
> > > +     return wait_event_interruptible_timeout(vq->wq,
> > > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
> >
> > There's no good timeout. Let's not even go there, if device goes
> > bad it should set the need reset bit.
> 
> The problem is that we can't depend on the device. If it takes too
> long for the device to respond to cvq, there's a high possibility that
> the device is buggy or even malicious. We can have a higher timeout
> here and it should be still better than waiting forever (the cvq
> commands need to be serialized so it needs to hold a lock anyway
> (RTNL) ).
> 
> Thanks

With a TODO item like this I'd expect this to be an RFC.
Here's why:

Making driver more robust from device failures is a laudable goal but it's really
hard to be 100% foolproof here. E.g. device can just block pci reads and
it would be very hard to recover.  So I'm going to only merge patches
like this if they at least theoretically have very little chance
of breaking existing users.

And note that in most setups, CVQ is only used at startup and then left mostly alone.

Finally, note that lots of guests need virtio to do anything useful at all.
So just failing commands is not enough to recover - you need to try
harder maybe by attempting to reset device. Could be a question of
policy - might need to make this guest configurable.



> >
> >
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
> > > +
> > > +void virtqueue_wake_up(struct virtqueue *_vq)
> > > +{
> > > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > > +
> > > +     wake_up_interruptible(&vq->wq);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
> > > +
> > >  MODULE_LICENSE("GPL");
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index dcab9c7e8784..2eb62c774895 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> > >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> > >                           void **ctx);
> > >
> > > +int virtqueue_wait_for_used(struct virtqueue *vq);
> > > +void virtqueue_wake_up(struct virtqueue *vq);
> > > +
> > >  void virtqueue_disable_cb(struct virtqueue *vq);
> > >
> > >  bool virtqueue_enable_cb(struct virtqueue *vq);
> > > --
> > > 2.25.1
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/4] virtio_ring: switch to use BAD_RING()
  2022-12-27  3:51       ` Jason Wang
@ 2022-12-27  7:21         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 11:51:02AM +0800, Jason Wang wrote:
> On Tue, Dec 27, 2022 at 7:36 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Dec 26, 2022 at 03:49:06PM +0800, Jason Wang wrote:
> > > Switch to reuse BAD_RING() to allow common logic to be implemented in
> > > BAD_RING().
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - switch to use BAD_RING in virtio_break_device()
> > > ---
> > >  drivers/virtio/virtio_ring.c | 8 ++++----
> > >  1 file changed, 4 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 2e7689bb933b..5cfb2fa8abee 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -58,7 +58,8 @@
> > >       do {                                                    \
> > >               dev_err(&_vq->vq.vdev->dev,                     \
> > >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> > > -             (_vq)->broken = true;                           \
> > > +             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> >
> > I don't think WRITE_ONCE/READ_ONCE pair as such. Can you point
> > me at documentation of such pairing?
> 
> Introduced by:
> 
> commit 60f0779862e4ab943810187752c462e85f5fa371
> Author: Parav Pandit <parav@nvidia.com>
> Date:   Wed Jul 21 17:26:45 2021 +0300
> 
>     virtio: Improve vq->broken access to avoid any compiler optimization
> 
> I think it might still apply here since virtqueue_is_broken() is still
> put into a loop inside wait_event().
> 
> Thanks

Oh I see. Maybe it's a response to some discussion we had at the time,
at this point I can no longer say what it meant.
But you are doing right not changing it here of course.

> >
> > > +             WRITE_ONCE((_vq)->broken, true);                       \
> > >       } while (0)
> > >  #define START_USE(vq)
> > >  #define END_USE(vq)
> > > @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
> > >
> > >       /* Prod other side to tell it about changes. */
> > >       if (!vq->notify(_vq)) {
> > > -             vq->broken = true;
> > > +             BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
> > >               return false;
> > >       }
> > >       return true;
> > > @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
> > >       list_for_each_entry(_vq, &dev->vqs, list) {
> > >               struct vring_virtqueue *vq = to_vvq(_vq);
> > >
> > > -             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
> > > -             WRITE_ONCE(vq->broken, true);
> > > +             BAD_RING(vq, "Device break vq %d", _vq->index);
> > >       }
> > >       spin_unlock(&dev->vqs_list_lock);
> > >  }
> > > --
> > > 2.25.1
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/4] virtio_ring: switch to use BAD_RING()
@ 2022-12-27  7:21         ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 11:51:02AM +0800, Jason Wang wrote:
> On Tue, Dec 27, 2022 at 7:36 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Dec 26, 2022 at 03:49:06PM +0800, Jason Wang wrote:
> > > Switch to reuse BAD_RING() to allow common logic to be implemented in
> > > BAD_RING().
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - switch to use BAD_RING in virtio_break_device()
> > > ---
> > >  drivers/virtio/virtio_ring.c | 8 ++++----
> > >  1 file changed, 4 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 2e7689bb933b..5cfb2fa8abee 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -58,7 +58,8 @@
> > >       do {                                                    \
> > >               dev_err(&_vq->vq.vdev->dev,                     \
> > >                       "%s:"fmt, (_vq)->vq.name, ##args);      \
> > > -             (_vq)->broken = true;                           \
> > > +             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
> >
> > I don't think WRITE_ONCE/READ_ONCE pair as such. Can you point
> > me at documentation of such pairing?
> 
> Introduced by:
> 
> commit 60f0779862e4ab943810187752c462e85f5fa371
> Author: Parav Pandit <parav@nvidia.com>
> Date:   Wed Jul 21 17:26:45 2021 +0300
> 
>     virtio: Improve vq->broken access to avoid any compiler optimization
> 
> I think it might still apply here since virtqueue_is_broken() is still
> put into a loop inside wait_event().
> 
> Thanks

Oh I see. Maybe it's a response to some discussion we had at the time,
at this point I can no longer say what it meant.
But you are doing right not changing it here of course.

> >
> > > +             WRITE_ONCE((_vq)->broken, true);                       \
> > >       } while (0)
> > >  #define START_USE(vq)
> > >  #define END_USE(vq)
> > > @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
> > >
> > >       /* Prod other side to tell it about changes. */
> > >       if (!vq->notify(_vq)) {
> > > -             vq->broken = true;
> > > +             BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
> > >               return false;
> > >       }
> > >       return true;
> > > @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
> > >       list_for_each_entry(_vq, &dev->vqs, list) {
> > >               struct vring_virtqueue *vq = to_vvq(_vq);
> > >
> > > -             /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
> > > -             WRITE_ONCE(vq->broken, true);
> > > +             BAD_RING(vq, "Device break vq %d", _vq->index);
> > >       }
> > >       spin_unlock(&dev->vqs_list_lock);
> > >  }
> > > --
> > > 2.25.1
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-27  4:30       ` Jason Wang
@ 2022-12-27  7:33         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:33 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > But device is still going and will later use the buffers.
> >
> > Same for timeout really.
> 
> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> If we think the timeout is hard, we can start from the wait.
> 
> Thanks

If the goal is to avoid disrupting traffic while CVQ is in use,
that sounds more reasonable. E.g. someone is turning on promisc,
a spike in CPU usage might be unwelcome.

things we should be careful to address then:
1- debugging. Currently it's easy to see a warning if CPU is stuck
   in a loop for a while, and we also get a backtrace.
   E.g. with this - how do we know who has the RTNL?
   We need to integrate with kernel/watchdog.c for good results
   and to make sure policy is consistent.
2- overhead. In a very common scenario when device is in hypervisor,
   programming timers etc has a very high overhead, at bootup
   lots of CVQ commands are run and slowing boot down is not nice.
   let's poll for a bit before waiting?
3- suprise removal. need to wake up thread in some way. what about
   other cases of device breakage - is there a chance this
   introduces new bugs around that? at least enumerate them please.


-- 
MST

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  7:33         ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:33 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > But device is still going and will later use the buffers.
> >
> > Same for timeout really.
> 
> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> If we think the timeout is hard, we can start from the wait.
> 
> Thanks

If the goal is to avoid disrupting traffic while CVQ is in use,
that sounds more reasonable. E.g. someone is turning on promisc,
a spike in CPU usage might be unwelcome.

things we should be careful to address then:
1- debugging. Currently it's easy to see a warning if CPU is stuck
   in a loop for a while, and we also get a backtrace.
   E.g. with this - how do we know who has the RTNL?
   We need to integrate with kernel/watchdog.c for good results
   and to make sure policy is consistent.
2- overhead. In a very common scenario when device is in hypervisor,
   programming timers etc has a very high overhead, at bootup
   lots of CVQ commands are run and slowing boot down is not nice.
   let's poll for a bit before waiting?
3- suprise removal. need to wake up thread in some way. what about
   other cases of device breakage - is there a chance this
   introduces new bugs around that? at least enumerate them please.


-- 
MST


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
  2022-12-26  7:49   ` Jason Wang
@ 2022-12-27  7:39     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:39 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Dec 26, 2022 at 03:49:05PM +0800, Jason Wang wrote:
> @@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>  				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
>  		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
>  
> +	rtnl_unlock();
> +
>  	kfree(buf);
>  }
>  
> +static void virtnet_set_rx_mode(struct net_device *dev)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +
> +	spin_lock(&vi->rx_mode_lock);
> +	if (vi->rx_mode_work_enabled)
> +		schedule_work(&vi->rx_mode_work);
> +	spin_unlock(&vi->rx_mode_lock);
> +}
> +
>  static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>  				   __be16 proto, u16 vid)
>  {

Hmm so user tells us to e.g enable promisc. We report completion
but card is still dropping packets. I think this
has a chance to break some setups.

-- 
MST

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
@ 2022-12-27  7:39     ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  7:39 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Dec 26, 2022 at 03:49:05PM +0800, Jason Wang wrote:
> @@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>  				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
>  		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
>  
> +	rtnl_unlock();
> +
>  	kfree(buf);
>  }
>  
> +static void virtnet_set_rx_mode(struct net_device *dev)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +
> +	spin_lock(&vi->rx_mode_lock);
> +	if (vi->rx_mode_work_enabled)
> +		schedule_work(&vi->rx_mode_work);
> +	spin_unlock(&vi->rx_mode_lock);
> +}
> +
>  static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>  				   __be16 proto, u16 vid)
>  {

Hmm so user tells us to e.g enable promisc. We report completion
but card is still dropping packets. I think this
has a chance to break some setups.

-- 
MST


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
  2022-12-27  7:39     ` Michael S. Tsirkin
@ 2022-12-27  9:06       ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:06 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma


在 2022/12/27 15:39, Michael S. Tsirkin 写道:
> On Mon, Dec 26, 2022 at 03:49:05PM +0800, Jason Wang wrote:
>> @@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>>   				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
>>   		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
>>   
>> +	rtnl_unlock();
>> +
>>   	kfree(buf);
>>   }
>>   
>> +static void virtnet_set_rx_mode(struct net_device *dev)
>> +{
>> +	struct virtnet_info *vi = netdev_priv(dev);
>> +
>> +	spin_lock(&vi->rx_mode_lock);
>> +	if (vi->rx_mode_work_enabled)
>> +		schedule_work(&vi->rx_mode_work);
>> +	spin_unlock(&vi->rx_mode_lock);
>> +}
>> +
>>   static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>>   				   __be16 proto, u16 vid)
>>   {
> Hmm so user tells us to e.g enable promisc. We report completion
> but card is still dropping packets. I think this
> has a chance to break some setups.


I think all those filters are best efforts, am I wrong?

Thanks


>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
@ 2022-12-27  9:06       ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:06 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem


在 2022/12/27 15:39, Michael S. Tsirkin 写道:
> On Mon, Dec 26, 2022 at 03:49:05PM +0800, Jason Wang wrote:
>> @@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>>   				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
>>   		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
>>   
>> +	rtnl_unlock();
>> +
>>   	kfree(buf);
>>   }
>>   
>> +static void virtnet_set_rx_mode(struct net_device *dev)
>> +{
>> +	struct virtnet_info *vi = netdev_priv(dev);
>> +
>> +	spin_lock(&vi->rx_mode_lock);
>> +	if (vi->rx_mode_work_enabled)
>> +		schedule_work(&vi->rx_mode_work);
>> +	spin_unlock(&vi->rx_mode_lock);
>> +}
>> +
>>   static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>>   				   __be16 proto, u16 vid)
>>   {
> Hmm so user tells us to e.g enable promisc. We report completion
> but card is still dropping packets. I think this
> has a chance to break some setups.


I think all those filters are best efforts, am I wrong?

Thanks


>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-27  7:19         ` Michael S. Tsirkin
@ 2022-12-27  9:09           ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:09 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem


在 2022/12/27 15:19, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 11:47:34AM +0800, Jason Wang wrote:
>> On Tue, Dec 27, 2022 at 7:34 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>>> On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
>>>> This patch introduces a per virtqueue waitqueue to allow driver to
>>>> sleep and wait for more used. Two new helpers are introduced to allow
>>>> driver to sleep and wake up.
>>>>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Changes since V1:
>>>> - check virtqueue_is_broken() as well
>>>> - use more_used() instead of virtqueue_get_buf() to allow caller to
>>>>    get buffers afterwards
>>>> ---
>>>>   drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
>>>>   include/linux/virtio.h       |  3 +++
>>>>   2 files changed, 32 insertions(+)
>>>>
>>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>>> index 5cfb2fa8abee..9c83eb945493 100644
>>>> --- a/drivers/virtio/virtio_ring.c
>>>> +++ b/drivers/virtio/virtio_ring.c
>>>> @@ -13,6 +13,7 @@
>>>>   #include <linux/dma-mapping.h>
>>>>   #include <linux/kmsan.h>
>>>>   #include <linux/spinlock.h>
>>>> +#include <linux/wait.h>
>>>>   #include <xen/xen.h>
>>>>
>>>>   #ifdef DEBUG
>>>> @@ -60,6 +61,7 @@
>>>>                        "%s:"fmt, (_vq)->vq.name, ##args);      \
>>>>                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>>>>                WRITE_ONCE((_vq)->broken, true);                       \
>>>> +             wake_up_interruptible(&(_vq)->wq);                     \
>>>>        } while (0)
>>>>   #define START_USE(vq)
>>>>   #define END_USE(vq)
>>>> @@ -203,6 +205,9 @@ struct vring_virtqueue {
>>>>        /* DMA, allocation, and size information */
>>>>        bool we_own_ring;
>>>>
>>>> +     /* Wait for buffer to be used */
>>>> +     wait_queue_head_t wq;
>>>> +
>>>>   #ifdef DEBUG
>>>>        /* They're supposed to lock for us. */
>>>>        unsigned int in_use;
>>>> @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>>>>        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>>>>                vq->weak_barriers = false;
>>>>
>>>> +     init_waitqueue_head(&vq->wq);
>>>> +
>>>>        err = vring_alloc_state_extra_packed(&vring_packed);
>>>>        if (err)
>>>>                goto err_state_extra;
>>>> @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>>>>        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>>>>                vq->weak_barriers = false;
>>>>
>>>> +     init_waitqueue_head(&vq->wq);
>>>> +
>>>>        err = vring_alloc_state_extra_split(vring_split);
>>>>        if (err) {
>>>>                kfree(vq);
>>>> @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
>>>>   {
>>>>        struct vring_virtqueue *vq = to_vvq(_vq);
>>>>
>>>> +     wake_up_interruptible(&vq->wq);
>>>> +
>>>>        if (vq->we_own_ring) {
>>>>                if (vq->packed_ring) {
>>>>                        vring_free_queue(vq->vq.vdev,
>>>> @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>>>>   }
>>>>   EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>>>>
>>>> +int virtqueue_wait_for_used(struct virtqueue *_vq)
>>>> +{
>>>> +     struct vring_virtqueue *vq = to_vvq(_vq);
>>>> +
>>>> +     /* TODO: Tweak the timeout. */
>>>> +     return wait_event_interruptible_timeout(vq->wq,
>>>> +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
>>> There's no good timeout. Let's not even go there, if device goes
>>> bad it should set the need reset bit.
>> The problem is that we can't depend on the device. If it takes too
>> long for the device to respond to cvq, there's a high possibility that
>> the device is buggy or even malicious. We can have a higher timeout
>> here and it should be still better than waiting forever (the cvq
>> commands need to be serialized so it needs to hold a lock anyway
>> (RTNL) ).
>>
>> Thanks
> With a TODO item like this I'd expect this to be an RFC.
> Here's why:
>
> Making driver more robust from device failures is a laudable goal but it's really
> hard to be 100% foolproof here. E.g. device can just block pci reads and
> it would be very hard to recover.


Yes.


>    So I'm going to only merge patches
> like this if they at least theoretically have very little chance
> of breaking existing users.


AFAIK, this is not theoretical, consider:

1) DPU may implement virtio-net CVQ with codes running in CPU
2) VDUSE may want to support CVQ in the future


>
> And note that in most setups, CVQ is only used at startup and then left mostly alone.
>
> Finally, note that lots of guests need virtio to do anything useful at all.
> So just failing commands is not enough to recover - you need to try
> harder maybe by attempting to reset device.


This requires upper layer support which seems not existed in the 
networking subsystem.


> Could be a question of
> policy - might need to make this guest configurable.


Yes.

Thanks


>
>
>
>>>
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
>>>> +
>>>> +void virtqueue_wake_up(struct virtqueue *_vq)
>>>> +{
>>>> +     struct vring_virtqueue *vq = to_vvq(_vq);
>>>> +
>>>> +     wake_up_interruptible(&vq->wq);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
>>>> +
>>>>   MODULE_LICENSE("GPL");
>>>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>>>> index dcab9c7e8784..2eb62c774895 100644
>>>> --- a/include/linux/virtio.h
>>>> +++ b/include/linux/virtio.h
>>>> @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>>>>   void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>>>>                            void **ctx);
>>>>
>>>> +int virtqueue_wait_for_used(struct virtqueue *vq);
>>>> +void virtqueue_wake_up(struct virtqueue *vq);
>>>> +
>>>>   void virtqueue_disable_cb(struct virtqueue *vq);
>>>>
>>>>   bool virtqueue_enable_cb(struct virtqueue *vq);
>>>> --
>>>> 2.25.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  9:09           ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:09 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma


在 2022/12/27 15:19, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 11:47:34AM +0800, Jason Wang wrote:
>> On Tue, Dec 27, 2022 at 7:34 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>>> On Mon, Dec 26, 2022 at 03:49:07PM +0800, Jason Wang wrote:
>>>> This patch introduces a per virtqueue waitqueue to allow driver to
>>>> sleep and wait for more used. Two new helpers are introduced to allow
>>>> driver to sleep and wake up.
>>>>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Changes since V1:
>>>> - check virtqueue_is_broken() as well
>>>> - use more_used() instead of virtqueue_get_buf() to allow caller to
>>>>    get buffers afterwards
>>>> ---
>>>>   drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
>>>>   include/linux/virtio.h       |  3 +++
>>>>   2 files changed, 32 insertions(+)
>>>>
>>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>>> index 5cfb2fa8abee..9c83eb945493 100644
>>>> --- a/drivers/virtio/virtio_ring.c
>>>> +++ b/drivers/virtio/virtio_ring.c
>>>> @@ -13,6 +13,7 @@
>>>>   #include <linux/dma-mapping.h>
>>>>   #include <linux/kmsan.h>
>>>>   #include <linux/spinlock.h>
>>>> +#include <linux/wait.h>
>>>>   #include <xen/xen.h>
>>>>
>>>>   #ifdef DEBUG
>>>> @@ -60,6 +61,7 @@
>>>>                        "%s:"fmt, (_vq)->vq.name, ##args);      \
>>>>                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
>>>>                WRITE_ONCE((_vq)->broken, true);                       \
>>>> +             wake_up_interruptible(&(_vq)->wq);                     \
>>>>        } while (0)
>>>>   #define START_USE(vq)
>>>>   #define END_USE(vq)
>>>> @@ -203,6 +205,9 @@ struct vring_virtqueue {
>>>>        /* DMA, allocation, and size information */
>>>>        bool we_own_ring;
>>>>
>>>> +     /* Wait for buffer to be used */
>>>> +     wait_queue_head_t wq;
>>>> +
>>>>   #ifdef DEBUG
>>>>        /* They're supposed to lock for us. */
>>>>        unsigned int in_use;
>>>> @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>>>>        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>>>>                vq->weak_barriers = false;
>>>>
>>>> +     init_waitqueue_head(&vq->wq);
>>>> +
>>>>        err = vring_alloc_state_extra_packed(&vring_packed);
>>>>        if (err)
>>>>                goto err_state_extra;
>>>> @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>>>>        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>>>>                vq->weak_barriers = false;
>>>>
>>>> +     init_waitqueue_head(&vq->wq);
>>>> +
>>>>        err = vring_alloc_state_extra_split(vring_split);
>>>>        if (err) {
>>>>                kfree(vq);
>>>> @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
>>>>   {
>>>>        struct vring_virtqueue *vq = to_vvq(_vq);
>>>>
>>>> +     wake_up_interruptible(&vq->wq);
>>>> +
>>>>        if (vq->we_own_ring) {
>>>>                if (vq->packed_ring) {
>>>>                        vring_free_queue(vq->vq.vdev,
>>>> @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>>>>   }
>>>>   EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>>>>
>>>> +int virtqueue_wait_for_used(struct virtqueue *_vq)
>>>> +{
>>>> +     struct vring_virtqueue *vq = to_vvq(_vq);
>>>> +
>>>> +     /* TODO: Tweak the timeout. */
>>>> +     return wait_event_interruptible_timeout(vq->wq,
>>>> +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
>>> There's no good timeout. Let's not even go there, if device goes
>>> bad it should set the need reset bit.
>> The problem is that we can't depend on the device. If it takes too
>> long for the device to respond to cvq, there's a high possibility that
>> the device is buggy or even malicious. We can have a higher timeout
>> here and it should be still better than waiting forever (the cvq
>> commands need to be serialized so it needs to hold a lock anyway
>> (RTNL) ).
>>
>> Thanks
> With a TODO item like this I'd expect this to be an RFC.
> Here's why:
>
> Making driver more robust from device failures is a laudable goal but it's really
> hard to be 100% foolproof here. E.g. device can just block pci reads and
> it would be very hard to recover.


Yes.


>    So I'm going to only merge patches
> like this if they at least theoretically have very little chance
> of breaking existing users.


AFAIK, this is not theoretical, consider:

1) DPU may implement virtio-net CVQ with codes running in CPU
2) VDUSE may want to support CVQ in the future


>
> And note that in most setups, CVQ is only used at startup and then left mostly alone.
>
> Finally, note that lots of guests need virtio to do anything useful at all.
> So just failing commands is not enough to recover - you need to try
> harder maybe by attempting to reset device.


This requires upper layer support which seems not existed in the 
networking subsystem.


> Could be a question of
> policy - might need to make this guest configurable.


Yes.

Thanks


>
>
>
>>>
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
>>>> +
>>>> +void virtqueue_wake_up(struct virtqueue *_vq)
>>>> +{
>>>> +     struct vring_virtqueue *vq = to_vvq(_vq);
>>>> +
>>>> +     wake_up_interruptible(&vq->wq);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_wake_up);
>>>> +
>>>>   MODULE_LICENSE("GPL");
>>>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>>>> index dcab9c7e8784..2eb62c774895 100644
>>>> --- a/include/linux/virtio.h
>>>> +++ b/include/linux/virtio.h
>>>> @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>>>>   void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>>>>                            void **ctx);
>>>>
>>>> +int virtqueue_wait_for_used(struct virtqueue *vq);
>>>> +void virtqueue_wake_up(struct virtqueue *vq);
>>>> +
>>>>   void virtqueue_disable_cb(struct virtqueue *vq);
>>>>
>>>>   bool virtqueue_enable_cb(struct virtqueue *vq);
>>>> --
>>>> 2.25.1


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-27  7:33         ` Michael S. Tsirkin
@ 2022-12-27  9:12           ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:12 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma


在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
>>> But device is still going and will later use the buffers.
>>>
>>> Same for timeout really.
>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
>> If we think the timeout is hard, we can start from the wait.
>>
>> Thanks
> If the goal is to avoid disrupting traffic while CVQ is in use,
> that sounds more reasonable. E.g. someone is turning on promisc,
> a spike in CPU usage might be unwelcome.


Yes, this would be more obvious is UP is used.


>
> things we should be careful to address then:
> 1- debugging. Currently it's easy to see a warning if CPU is stuck
>     in a loop for a while, and we also get a backtrace.
>     E.g. with this - how do we know who has the RTNL?
>     We need to integrate with kernel/watchdog.c for good results
>     and to make sure policy is consistent.


That's fine, will consider this.


> 2- overhead. In a very common scenario when device is in hypervisor,
>     programming timers etc has a very high overhead, at bootup
>     lots of CVQ commands are run and slowing boot down is not nice.
>     let's poll for a bit before waiting?


Then we go back to the question of choosing a good timeout for poll. And 
poll seems problematic in the case of UP, scheduler might not have the 
chance to run.


> 3- suprise removal. need to wake up thread in some way. what about
>     other cases of device breakage - is there a chance this
>     introduces new bugs around that? at least enumerate them please.


The current code did:

1) check for vq->broken
2) wakeup during BAD_RING()

So we won't end up with a never woke up process which should be fine.

Thanks


>
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  9:12           ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:12 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem


在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
>>> But device is still going and will later use the buffers.
>>>
>>> Same for timeout really.
>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
>> If we think the timeout is hard, we can start from the wait.
>>
>> Thanks
> If the goal is to avoid disrupting traffic while CVQ is in use,
> that sounds more reasonable. E.g. someone is turning on promisc,
> a spike in CPU usage might be unwelcome.


Yes, this would be more obvious is UP is used.


>
> things we should be careful to address then:
> 1- debugging. Currently it's easy to see a warning if CPU is stuck
>     in a loop for a while, and we also get a backtrace.
>     E.g. with this - how do we know who has the RTNL?
>     We need to integrate with kernel/watchdog.c for good results
>     and to make sure policy is consistent.


That's fine, will consider this.


> 2- overhead. In a very common scenario when device is in hypervisor,
>     programming timers etc has a very high overhead, at bootup
>     lots of CVQ commands are run and slowing boot down is not nice.
>     let's poll for a bit before waiting?


Then we go back to the question of choosing a good timeout for poll. And 
poll seems problematic in the case of UP, scheduler might not have the 
chance to run.


> 3- suprise removal. need to wake up thread in some way. what about
>     other cases of device breakage - is there a chance this
>     introduces new bugs around that? at least enumerate them please.


The current code did:

1) check for vq->broken
2) wakeup during BAD_RING()

So we won't end up with a never woke up process which should be fine.

Thanks


>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  6:58         ` Michael S. Tsirkin
@ 2022-12-27  9:17           ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:17 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet, kuba,
	maxime.coquelin, pabeni, davem


在 2022/12/27 14:58, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
>> On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>>> On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
>>>> We used to busy waiting on the cvq command this tends to be
>>>> problematic since:
>>>>
>>>> 1) CPU could wait for ever on a buggy/malicous device
>>>> 2) There's no wait to terminate the process that triggers the cvq
>>>>     command
>>>>
>>>> So this patch switch to use virtqueue_wait_for_used() to sleep with a
>>>> timeout (1s) instead of busy polling for the cvq command forever. This
>>> I don't think that a fixed 1S is a good choice.
>> Well, it could be tweaked to be a little bit longer.
>>
>> One way, as discussed, is to let the device advertise a timeout then
>> the driver can validate if it's valid and use that timeout. But it
>> needs extension to the spec.
> Controlling timeout from device is a good idea, e.g. hardware devices
> would benefit from a shorter timeout, hypervisor devices from a longer
> timeout or no timeout.


Yes.


>
>>> Some of the DPUs are very
>>> lazy for cvq handle.
>> Such design needs to be revisited, cvq (control path) should have a
>> better priority or QOS than datapath.
> Spec says nothing about this, so driver can't assume this either.


Well, my understanding is that it's more than what spec can define or 
it's a kind of best practice.

The current code is one example, that is, driver may choose to busy poll 
which cause spike.


>
>>> In particular, we will also directly break the device.
>> It's kind of hardening for malicious devices.
> ATM no amount of hardening can prevent a malicious hypervisor from
> blocking the guest. Recovering when a hardware device is broken would be
> nice but I think if we do bother then we should try harder to recover,
> such as by driving device reset.


Probably, but as discussed in another thread, it needs co-operation in 
the upper layer (networking core).


>
>
> Also, does your patch break surprise removal? There's no callback
> in this case ATM.


I think not (see reply in another thread).

Thanks


>
>>> I think it is necessary to add a Virtio-Net parameter to allow users to define
>>> this timeout by themselves. Although I don't think this is a good way.
>> Very hard and unfriendly to the end users.
>>
>> Thanks
>>
>>> Thanks.
>>>
>>>
>>>> gives the scheduler a breath and can let the process can respond to
>>>> asignal. If the device doesn't respond in the timeout, break the
>>>> device.
>>>>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Changes since V1:
>>>> - break the device when timeout
>>>> - get buffer manually since the virtio core check more_used() instead
>>>> ---
>>>>   drivers/net/virtio_net.c | 24 ++++++++++++++++--------
>>>>   1 file changed, 16 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>> index efd9dd55828b..6a2ea64cfcb5 100644
>>>> --- a/drivers/net/virtio_net.c
>>>> +++ b/drivers/net/virtio_net.c
>>>> @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
>>>>        vi->rx_mode_work_enabled = false;
>>>>        spin_unlock_bh(&vi->rx_mode_lock);
>>>>
>>>> +     virtqueue_wake_up(vi->cvq);
>>>>        flush_work(&vi->rx_mode_work);
>>>>   }
>>>>
>>>> @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>>>>        return !oom;
>>>>   }
>>>>
>>>> +static void virtnet_cvq_done(struct virtqueue *cvq)
>>>> +{
>>>> +     virtqueue_wake_up(cvq);
>>>> +}
>>>> +
>>>>   static void skb_recv_done(struct virtqueue *rvq)
>>>>   {
>>>>        struct virtnet_info *vi = rvq->vdev->priv;
>>>> @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
>>>>        return err;
>>>>   }
>>>>
>>>> +static int virtnet_close(struct net_device *dev);
>>>> +
>>>>   /*
>>>>    * Send command via the control virtqueue and check status.  Commands
>>>>    * supported by the hypervisor, as indicated by feature bits, should
>>>> @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>>>>        if (unlikely(!virtqueue_kick(vi->cvq)))
>>>>                return vi->ctrl->status == VIRTIO_NET_OK;
>>>>
>>>> -     /* Spin for a response, the kick causes an ioport write, trapping
>>>> -      * into the hypervisor, so the request should be handled immediately.
>>>> -      */
>>>> -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>>>> -            !virtqueue_is_broken(vi->cvq))
>>>> -             cpu_relax();
>>>> +     if (virtqueue_wait_for_used(vi->cvq)) {
>>>> +             virtqueue_get_buf(vi->cvq, &tmp);
>>>> +             return vi->ctrl->status == VIRTIO_NET_OK;
>>>> +     }
>>>>
>>>> -     return vi->ctrl->status == VIRTIO_NET_OK;
>>>> +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
>>>> +     virtio_break_device(vi->vdev);
>>>> +     return VIRTIO_NET_ERR;
>>>>   }
>>>>
>>>>   static int virtnet_set_mac_address(struct net_device *dev, void *p)
>>>> @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
>>>>
>>>>        /* Parameters for control virtqueue, if any */
>>>>        if (vi->has_cvq) {
>>>> -             callbacks[total_vqs - 1] = NULL;
>>>> +             callbacks[total_vqs - 1] = virtnet_cvq_done;
>>>>                names[total_vqs - 1] = "control";
>>>>        }
>>>>
>>>> --
>>>> 2.25.1
>>>>
>>>> _______________________________________________
>>>> Virtualization mailing list
>>>> Virtualization@lists.linux-foundation.org
>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-27  9:17           ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-27  9:17 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xuan Zhuo, netdev, linux-kernel, virtualization, eperezma,
	edumazet, maxime.coquelin, kuba, pabeni, davem


在 2022/12/27 14:58, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
>> On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>>> On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
>>>> We used to busy waiting on the cvq command this tends to be
>>>> problematic since:
>>>>
>>>> 1) CPU could wait for ever on a buggy/malicous device
>>>> 2) There's no wait to terminate the process that triggers the cvq
>>>>     command
>>>>
>>>> So this patch switch to use virtqueue_wait_for_used() to sleep with a
>>>> timeout (1s) instead of busy polling for the cvq command forever. This
>>> I don't think that a fixed 1S is a good choice.
>> Well, it could be tweaked to be a little bit longer.
>>
>> One way, as discussed, is to let the device advertise a timeout then
>> the driver can validate if it's valid and use that timeout. But it
>> needs extension to the spec.
> Controlling timeout from device is a good idea, e.g. hardware devices
> would benefit from a shorter timeout, hypervisor devices from a longer
> timeout or no timeout.


Yes.


>
>>> Some of the DPUs are very
>>> lazy for cvq handle.
>> Such design needs to be revisited, cvq (control path) should have a
>> better priority or QOS than datapath.
> Spec says nothing about this, so driver can't assume this either.


Well, my understanding is that it's more than what spec can define or 
it's a kind of best practice.

The current code is one example, that is, driver may choose to busy poll 
which cause spike.


>
>>> In particular, we will also directly break the device.
>> It's kind of hardening for malicious devices.
> ATM no amount of hardening can prevent a malicious hypervisor from
> blocking the guest. Recovering when a hardware device is broken would be
> nice but I think if we do bother then we should try harder to recover,
> such as by driving device reset.


Probably, but as discussed in another thread, it needs co-operation in 
the upper layer (networking core).


>
>
> Also, does your patch break surprise removal? There's no callback
> in this case ATM.


I think not (see reply in another thread).

Thanks


>
>>> I think it is necessary to add a Virtio-Net parameter to allow users to define
>>> this timeout by themselves. Although I don't think this is a good way.
>> Very hard and unfriendly to the end users.
>>
>> Thanks
>>
>>> Thanks.
>>>
>>>
>>>> gives the scheduler a breath and can let the process can respond to
>>>> asignal. If the device doesn't respond in the timeout, break the
>>>> device.
>>>>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Changes since V1:
>>>> - break the device when timeout
>>>> - get buffer manually since the virtio core check more_used() instead
>>>> ---
>>>>   drivers/net/virtio_net.c | 24 ++++++++++++++++--------
>>>>   1 file changed, 16 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>> index efd9dd55828b..6a2ea64cfcb5 100644
>>>> --- a/drivers/net/virtio_net.c
>>>> +++ b/drivers/net/virtio_net.c
>>>> @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
>>>>        vi->rx_mode_work_enabled = false;
>>>>        spin_unlock_bh(&vi->rx_mode_lock);
>>>>
>>>> +     virtqueue_wake_up(vi->cvq);
>>>>        flush_work(&vi->rx_mode_work);
>>>>   }
>>>>
>>>> @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>>>>        return !oom;
>>>>   }
>>>>
>>>> +static void virtnet_cvq_done(struct virtqueue *cvq)
>>>> +{
>>>> +     virtqueue_wake_up(cvq);
>>>> +}
>>>> +
>>>>   static void skb_recv_done(struct virtqueue *rvq)
>>>>   {
>>>>        struct virtnet_info *vi = rvq->vdev->priv;
>>>> @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
>>>>        return err;
>>>>   }
>>>>
>>>> +static int virtnet_close(struct net_device *dev);
>>>> +
>>>>   /*
>>>>    * Send command via the control virtqueue and check status.  Commands
>>>>    * supported by the hypervisor, as indicated by feature bits, should
>>>> @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>>>>        if (unlikely(!virtqueue_kick(vi->cvq)))
>>>>                return vi->ctrl->status == VIRTIO_NET_OK;
>>>>
>>>> -     /* Spin for a response, the kick causes an ioport write, trapping
>>>> -      * into the hypervisor, so the request should be handled immediately.
>>>> -      */
>>>> -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>>>> -            !virtqueue_is_broken(vi->cvq))
>>>> -             cpu_relax();
>>>> +     if (virtqueue_wait_for_used(vi->cvq)) {
>>>> +             virtqueue_get_buf(vi->cvq, &tmp);
>>>> +             return vi->ctrl->status == VIRTIO_NET_OK;
>>>> +     }
>>>>
>>>> -     return vi->ctrl->status == VIRTIO_NET_OK;
>>>> +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
>>>> +     virtio_break_device(vi->vdev);
>>>> +     return VIRTIO_NET_ERR;
>>>>   }
>>>>
>>>>   static int virtnet_set_mac_address(struct net_device *dev, void *p)
>>>> @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
>>>>
>>>>        /* Parameters for control virtqueue, if any */
>>>>        if (vi->has_cvq) {
>>>> -             callbacks[total_vqs - 1] = NULL;
>>>> +             callbacks[total_vqs - 1] = virtnet_cvq_done;
>>>>                names[total_vqs - 1] = "control";
>>>>        }
>>>>
>>>> --
>>>> 2.25.1
>>>>
>>>> _______________________________________________
>>>> Virtualization mailing list
>>>> Virtualization@lists.linux-foundation.org
>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  9:17           ` Jason Wang
@ 2022-12-27  9:31             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  9:31 UTC (permalink / raw)
  To: Jason Wang
  Cc: Xuan Zhuo, netdev, linux-kernel, virtualization, eperezma,
	edumazet, maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 05:17:20PM +0800, Jason Wang wrote:
> > > > In particular, we will also directly break the device.
> > > It's kind of hardening for malicious devices.
> > ATM no amount of hardening can prevent a malicious hypervisor from
> > blocking the guest. Recovering when a hardware device is broken would be
> > nice but I think if we do bother then we should try harder to recover,
> > such as by driving device reset.
> 
> 
> Probably, but as discussed in another thread, it needs co-operation in the
> upper layer (networking core).

To track all state? Yea, maybe. For sure it's doable just in virtio,
but if you can find 1-2 other drivers that do this internally
then factoring this out to net core will likely be accepted.

-- 
MST


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-27  9:31             ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  9:31 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet, kuba,
	maxime.coquelin, pabeni, davem

On Tue, Dec 27, 2022 at 05:17:20PM +0800, Jason Wang wrote:
> > > > In particular, we will also directly break the device.
> > > It's kind of hardening for malicious devices.
> > ATM no amount of hardening can prevent a malicious hypervisor from
> > blocking the guest. Recovering when a hardware device is broken would be
> > nice but I think if we do bother then we should try harder to recover,
> > such as by driving device reset.
> 
> 
> Probably, but as discussed in another thread, it needs co-operation in the
> upper layer (networking core).

To track all state? Yea, maybe. For sure it's doable just in virtio,
but if you can find 1-2 other drivers that do this internally
then factoring this out to net core will likely be accepted.

-- 
MST

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-27  9:12           ` Jason Wang
@ 2022-12-27  9:38             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  9:38 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> 
> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > But device is still going and will later use the buffers.
> > > > 
> > > > Same for timeout really.
> > > Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > If we think the timeout is hard, we can start from the wait.
> > > 
> > > Thanks
> > If the goal is to avoid disrupting traffic while CVQ is in use,
> > that sounds more reasonable. E.g. someone is turning on promisc,
> > a spike in CPU usage might be unwelcome.
> 
> 
> Yes, this would be more obvious is UP is used.
> 
> 
> > 
> > things we should be careful to address then:
> > 1- debugging. Currently it's easy to see a warning if CPU is stuck
> >     in a loop for a while, and we also get a backtrace.
> >     E.g. with this - how do we know who has the RTNL?
> >     We need to integrate with kernel/watchdog.c for good results
> >     and to make sure policy is consistent.
> 
> 
> That's fine, will consider this.
> 
> 
> > 2- overhead. In a very common scenario when device is in hypervisor,
> >     programming timers etc has a very high overhead, at bootup
> >     lots of CVQ commands are run and slowing boot down is not nice.
> >     let's poll for a bit before waiting?
> 
> 
> Then we go back to the question of choosing a good timeout for poll. And
> poll seems problematic in the case of UP, scheduler might not have the
> chance to run.

Poll just a bit :) Seriously I don't know, but at least check once
after kick.

> 
> > 3- suprise removal. need to wake up thread in some way. what about
> >     other cases of device breakage - is there a chance this
> >     introduces new bugs around that? at least enumerate them please.
> 
> 
> The current code did:
> 
> 1) check for vq->broken
> 2) wakeup during BAD_RING()
> 
> So we won't end up with a never woke up process which should be fine.
> 
> Thanks


BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
idea - can cause crashes if kernel panics on error.

> 
> > 
> > 

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-27  9:38             ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-27  9:38 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> 
> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > But device is still going and will later use the buffers.
> > > > 
> > > > Same for timeout really.
> > > Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > If we think the timeout is hard, we can start from the wait.
> > > 
> > > Thanks
> > If the goal is to avoid disrupting traffic while CVQ is in use,
> > that sounds more reasonable. E.g. someone is turning on promisc,
> > a spike in CPU usage might be unwelcome.
> 
> 
> Yes, this would be more obvious is UP is used.
> 
> 
> > 
> > things we should be careful to address then:
> > 1- debugging. Currently it's easy to see a warning if CPU is stuck
> >     in a loop for a while, and we also get a backtrace.
> >     E.g. with this - how do we know who has the RTNL?
> >     We need to integrate with kernel/watchdog.c for good results
> >     and to make sure policy is consistent.
> 
> 
> That's fine, will consider this.
> 
> 
> > 2- overhead. In a very common scenario when device is in hypervisor,
> >     programming timers etc has a very high overhead, at bootup
> >     lots of CVQ commands are run and slowing boot down is not nice.
> >     let's poll for a bit before waiting?
> 
> 
> Then we go back to the question of choosing a good timeout for poll. And
> poll seems problematic in the case of UP, scheduler might not have the
> chance to run.

Poll just a bit :) Seriously I don't know, but at least check once
after kick.

> 
> > 3- suprise removal. need to wake up thread in some way. what about
> >     other cases of device breakage - is there a chance this
> >     introduces new bugs around that? at least enumerate them please.
> 
> 
> The current code did:
> 
> 1) check for vq->broken
> 2) wakeup during BAD_RING()
> 
> So we won't end up with a never woke up process which should be fine.
> 
> Thanks


BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
idea - can cause crashes if kernel panics on error.

> 
> > 
> > 


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-27  9:38             ` Michael S. Tsirkin
@ 2022-12-28  6:34               ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28  6:34 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma


在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
>> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
>>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
>>>>> But device is still going and will later use the buffers.
>>>>>
>>>>> Same for timeout really.
>>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
>>>> If we think the timeout is hard, we can start from the wait.
>>>>
>>>> Thanks
>>> If the goal is to avoid disrupting traffic while CVQ is in use,
>>> that sounds more reasonable. E.g. someone is turning on promisc,
>>> a spike in CPU usage might be unwelcome.
>>
>> Yes, this would be more obvious is UP is used.
>>
>>
>>> things we should be careful to address then:
>>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
>>>      in a loop for a while, and we also get a backtrace.
>>>      E.g. with this - how do we know who has the RTNL?
>>>      We need to integrate with kernel/watchdog.c for good results
>>>      and to make sure policy is consistent.
>>
>> That's fine, will consider this.
>>
>>
>>> 2- overhead. In a very common scenario when device is in hypervisor,
>>>      programming timers etc has a very high overhead, at bootup
>>>      lots of CVQ commands are run and slowing boot down is not nice.
>>>      let's poll for a bit before waiting?
>>
>> Then we go back to the question of choosing a good timeout for poll. And
>> poll seems problematic in the case of UP, scheduler might not have the
>> chance to run.
> Poll just a bit :) Seriously I don't know, but at least check once
> after kick.


I think it is what the current code did where the condition will be 
check before trying to sleep in the wait_event().


>
>>> 3- suprise removal. need to wake up thread in some way. what about
>>>      other cases of device breakage - is there a chance this
>>>      introduces new bugs around that? at least enumerate them please.
>>
>> The current code did:
>>
>> 1) check for vq->broken
>> 2) wakeup during BAD_RING()
>>
>> So we won't end up with a never woke up process which should be fine.
>>
>> Thanks
>
> BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> idea - can cause crashes if kernel panics on error.


Yes, it's better to use __virtqueue_break() instead.

But consider we will start from a wait first, I will limit the changes 
in virtio-net without bothering virtio core.

Thanks


>
>>>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-28  6:34               ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28  6:34 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem


在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
>> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
>>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
>>>>> But device is still going and will later use the buffers.
>>>>>
>>>>> Same for timeout really.
>>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
>>>> If we think the timeout is hard, we can start from the wait.
>>>>
>>>> Thanks
>>> If the goal is to avoid disrupting traffic while CVQ is in use,
>>> that sounds more reasonable. E.g. someone is turning on promisc,
>>> a spike in CPU usage might be unwelcome.
>>
>> Yes, this would be more obvious is UP is used.
>>
>>
>>> things we should be careful to address then:
>>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
>>>      in a loop for a while, and we also get a backtrace.
>>>      E.g. with this - how do we know who has the RTNL?
>>>      We need to integrate with kernel/watchdog.c for good results
>>>      and to make sure policy is consistent.
>>
>> That's fine, will consider this.
>>
>>
>>> 2- overhead. In a very common scenario when device is in hypervisor,
>>>      programming timers etc has a very high overhead, at bootup
>>>      lots of CVQ commands are run and slowing boot down is not nice.
>>>      let's poll for a bit before waiting?
>>
>> Then we go back to the question of choosing a good timeout for poll. And
>> poll seems problematic in the case of UP, scheduler might not have the
>> chance to run.
> Poll just a bit :) Seriously I don't know, but at least check once
> after kick.


I think it is what the current code did where the condition will be 
check before trying to sleep in the wait_event().


>
>>> 3- suprise removal. need to wake up thread in some way. what about
>>>      other cases of device breakage - is there a chance this
>>>      introduces new bugs around that? at least enumerate them please.
>>
>> The current code did:
>>
>> 1) check for vq->broken
>> 2) wakeup during BAD_RING()
>>
>> So we won't end up with a never woke up process which should be fine.
>>
>> Thanks
>
> BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> idea - can cause crashes if kernel panics on error.


Yes, it's better to use __virtqueue_break() instead.

But consider we will start from a wait first, I will limit the changes 
in virtio-net without bothering virtio core.

Thanks


>
>>>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  9:31             ` Michael S. Tsirkin
@ 2022-12-28  6:35               ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28  6:35 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet, kuba,
	maxime.coquelin, pabeni, davem


在 2022/12/27 17:31, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 05:17:20PM +0800, Jason Wang wrote:
>>>>> In particular, we will also directly break the device.
>>>> It's kind of hardening for malicious devices.
>>> ATM no amount of hardening can prevent a malicious hypervisor from
>>> blocking the guest. Recovering when a hardware device is broken would be
>>> nice but I think if we do bother then we should try harder to recover,
>>> such as by driving device reset.
>>
>> Probably, but as discussed in another thread, it needs co-operation in the
>> upper layer (networking core).
> To track all state? Yea, maybe. For sure it's doable just in virtio,
> but if you can find 1-2 other drivers that do this internally
> then factoring this out to net core will likely be accepted.


One thing that might be useful is to reuse tx_timeout() but current 
virtio-net doesn't do more than a simple warning (other drivers may try 
to reset).

So I would leave it for future investigation.

Thanks


>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-28  6:35               ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28  6:35 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xuan Zhuo, netdev, linux-kernel, virtualization, eperezma,
	edumazet, maxime.coquelin, kuba, pabeni, davem


在 2022/12/27 17:31, Michael S. Tsirkin 写道:
> On Tue, Dec 27, 2022 at 05:17:20PM +0800, Jason Wang wrote:
>>>>> In particular, we will also directly break the device.
>>>> It's kind of hardening for malicious devices.
>>> ATM no amount of hardening can prevent a malicious hypervisor from
>>> blocking the guest. Recovering when a hardware device is broken would be
>>> nice but I think if we do bother then we should try harder to recover,
>>> such as by driving device reset.
>>
>> Probably, but as discussed in another thread, it needs co-operation in the
>> upper layer (networking core).
> To track all state? Yea, maybe. For sure it's doable just in virtio,
> but if you can find 1-2 other drivers that do this internally
> then factoring this out to net core will likely be accepted.


One thing that might be useful is to reuse tx_timeout() but current 
virtio-net doesn't do more than a simple warning (other drivers may try 
to reset).

So I would leave it for future investigation.

Thanks


>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  6:58         ` Michael S. Tsirkin
@ 2022-12-28  8:31           ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-28  8:31 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem, Jason Wang

On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > We used to busy waiting on the cvq command this tends to be
> > > > problematic since:
> > > >
> > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > 2) There's no wait to terminate the process that triggers the cvq
> > > >    command
> > > >
> > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > >
> > > I don't think that a fixed 1S is a good choice.
> >
> > Well, it could be tweaked to be a little bit longer.
> >
> > One way, as discussed, is to let the device advertise a timeout then
> > the driver can validate if it's valid and use that timeout. But it
> > needs extension to the spec.
>
> Controlling timeout from device is a good idea, e.g. hardware devices
> would benefit from a shorter timeout, hypervisor devices from a longer
> timeout or no timeout.

Yes. That is good.

Before introducing this feature, I personally like to use "wait", rather than
define a timeout.

Thanks.


>
> >
> > > Some of the DPUs are very
> > > lazy for cvq handle.
> >
> > Such design needs to be revisited, cvq (control path) should have a
> > better priority or QOS than datapath.
>
> Spec says nothing about this, so driver can't assume this either.
>
> > > In particular, we will also directly break the device.
> >
> > It's kind of hardening for malicious devices.
>
> ATM no amount of hardening can prevent a malicious hypervisor from
> blocking the guest. Recovering when a hardware device is broken would be
> nice but I think if we do bother then we should try harder to recover,
> such as by driving device reset.
>
>
> Also, does your patch break surprise removal? There's no callback
> in this case ATM.
>
> > >
> > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > this timeout by themselves. Although I don't think this is a good way.
> >
> > Very hard and unfriendly to the end users.
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > > gives the scheduler a breath and can let the process can respond to
> > > > asignal. If the device doesn't respond in the timeout, break the
> > > > device.
> > > >
> > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > ---
> > > > Changes since V1:
> > > > - break the device when timeout
> > > > - get buffer manually since the virtio core check more_used() instead
> > > > ---
> > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > >       vi->rx_mode_work_enabled = false;
> > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > >
> > > > +     virtqueue_wake_up(vi->cvq);
> > > >       flush_work(&vi->rx_mode_work);
> > > >  }
> > > >
> > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > >       return !oom;
> > > >  }
> > > >
> > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > +{
> > > > +     virtqueue_wake_up(cvq);
> > > > +}
> > > > +
> > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > >  {
> > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > >       return err;
> > > >  }
> > > >
> > > > +static int virtnet_close(struct net_device *dev);
> > > > +
> > > >  /*
> > > >   * Send command via the control virtqueue and check status.  Commands
> > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > >
> > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > -      */
> > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > -            !virtqueue_is_broken(vi->cvq))
> > > > -             cpu_relax();
> > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     }
> > > >
> > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > +     virtio_break_device(vi->vdev);
> > > > +     return VIRTIO_NET_ERR;
> > > >  }
> > > >
> > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > >
> > > >       /* Parameters for control virtqueue, if any */
> > > >       if (vi->has_cvq) {
> > > > -             callbacks[total_vqs - 1] = NULL;
> > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > >               names[total_vqs - 1] = "control";
> > > >       }
> > > >
> > > > --
> > > > 2.25.1
> > > >
> > > > _______________________________________________
> > > > Virtualization mailing list
> > > > Virtualization@lists.linux-foundation.org
> > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > >
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-28  8:31           ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-28  8:31 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet, kuba,
	maxime.coquelin, pabeni, davem

On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > We used to busy waiting on the cvq command this tends to be
> > > > problematic since:
> > > >
> > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > 2) There's no wait to terminate the process that triggers the cvq
> > > >    command
> > > >
> > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > >
> > > I don't think that a fixed 1S is a good choice.
> >
> > Well, it could be tweaked to be a little bit longer.
> >
> > One way, as discussed, is to let the device advertise a timeout then
> > the driver can validate if it's valid and use that timeout. But it
> > needs extension to the spec.
>
> Controlling timeout from device is a good idea, e.g. hardware devices
> would benefit from a shorter timeout, hypervisor devices from a longer
> timeout or no timeout.

Yes. That is good.

Before introducing this feature, I personally like to use "wait", rather than
define a timeout.

Thanks.


>
> >
> > > Some of the DPUs are very
> > > lazy for cvq handle.
> >
> > Such design needs to be revisited, cvq (control path) should have a
> > better priority or QOS than datapath.
>
> Spec says nothing about this, so driver can't assume this either.
>
> > > In particular, we will also directly break the device.
> >
> > It's kind of hardening for malicious devices.
>
> ATM no amount of hardening can prevent a malicious hypervisor from
> blocking the guest. Recovering when a hardware device is broken would be
> nice but I think if we do bother then we should try harder to recover,
> such as by driving device reset.
>
>
> Also, does your patch break surprise removal? There's no callback
> in this case ATM.
>
> > >
> > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > this timeout by themselves. Although I don't think this is a good way.
> >
> > Very hard and unfriendly to the end users.
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > > gives the scheduler a breath and can let the process can respond to
> > > > asignal. If the device doesn't respond in the timeout, break the
> > > > device.
> > > >
> > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > ---
> > > > Changes since V1:
> > > > - break the device when timeout
> > > > - get buffer manually since the virtio core check more_used() instead
> > > > ---
> > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > >       vi->rx_mode_work_enabled = false;
> > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > >
> > > > +     virtqueue_wake_up(vi->cvq);
> > > >       flush_work(&vi->rx_mode_work);
> > > >  }
> > > >
> > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > >       return !oom;
> > > >  }
> > > >
> > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > +{
> > > > +     virtqueue_wake_up(cvq);
> > > > +}
> > > > +
> > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > >  {
> > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > >       return err;
> > > >  }
> > > >
> > > > +static int virtnet_close(struct net_device *dev);
> > > > +
> > > >  /*
> > > >   * Send command via the control virtqueue and check status.  Commands
> > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > >
> > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > -      */
> > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > -            !virtqueue_is_broken(vi->cvq))
> > > > -             cpu_relax();
> > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     }
> > > >
> > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > +     virtio_break_device(vi->vdev);
> > > > +     return VIRTIO_NET_ERR;
> > > >  }
> > > >
> > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > >
> > > >       /* Parameters for control virtqueue, if any */
> > > >       if (vi->has_cvq) {
> > > > -             callbacks[total_vqs - 1] = NULL;
> > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > >               names[total_vqs - 1] = "control";
> > > >       }
> > > >
> > > > --
> > > > 2.25.1
> > > >
> > > > _______________________________________________
> > > > Virtualization mailing list
> > > > Virtualization@lists.linux-foundation.org
> > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-27  4:33       ` Jason Wang
@ 2022-12-28  8:39         ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-28  8:39 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem, mst

On Tue, 27 Dec 2022 12:33:53 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > We used to busy waiting on the cvq command this tends to be
> > > problematic since:
> > >
> > > 1) CPU could wait for ever on a buggy/malicous device
> > > 2) There's no wait to terminate the process that triggers the cvq
> > >    command
> > >
> > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > timeout (1s) instead of busy polling for the cvq command forever. This
> >
> > I don't think that a fixed 1S is a good choice.
>
> Well, it could be tweaked to be a little bit longer.
>
> One way, as discussed, is to let the device advertise a timeout then
> the driver can validate if it's valid and use that timeout. But it
> needs extension to the spec.
>
> > Some of the DPUs are very
> > lazy for cvq handle.
>
> Such design needs to be revisited, cvq (control path) should have a
> better priority or QOS than datapath.
>
> > In particular, we will also directly break the device.
>
> It's kind of hardening for malicious devices.

Just based on timeout, it is judged that it is a malicious device. I think it is
too arbitrary.

Thanks.


>
> >
> > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > this timeout by themselves. Although I don't think this is a good way.
>
> Very hard and unfriendly to the end users.
>
> Thanks
>
> >
> > Thanks.
> >
> >
> > > gives the scheduler a breath and can let the process can respond to
> > > asignal. If the device doesn't respond in the timeout, break the
> > > device.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - break the device when timeout
> > > - get buffer manually since the virtio core check more_used() instead
> > > ---
> > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > >       vi->rx_mode_work_enabled = false;
> > >       spin_unlock_bh(&vi->rx_mode_lock);
> > >
> > > +     virtqueue_wake_up(vi->cvq);
> > >       flush_work(&vi->rx_mode_work);
> > >  }
> > >
> > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > >       return !oom;
> > >  }
> > >
> > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > +{
> > > +     virtqueue_wake_up(cvq);
> > > +}
> > > +
> > >  static void skb_recv_done(struct virtqueue *rvq)
> > >  {
> > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > >       return err;
> > >  }
> > >
> > > +static int virtnet_close(struct net_device *dev);
> > > +
> > >  /*
> > >   * Send command via the control virtqueue and check status.  Commands
> > >   * supported by the hypervisor, as indicated by feature bits, should
> > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > >
> > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > -      * into the hypervisor, so the request should be handled immediately.
> > > -      */
> > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > -            !virtqueue_is_broken(vi->cvq))
> > > -             cpu_relax();
> > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     }
> > >
> > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > +     virtio_break_device(vi->vdev);
> > > +     return VIRTIO_NET_ERR;
> > >  }
> > >
> > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > >
> > >       /* Parameters for control virtqueue, if any */
> > >       if (vi->has_cvq) {
> > > -             callbacks[total_vqs - 1] = NULL;
> > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > >               names[total_vqs - 1] = "control";
> > >       }
> > >
> > > --
> > > 2.25.1
> > >
> > > _______________________________________________
> > > Virtualization mailing list
> > > Virtualization@lists.linux-foundation.org
> > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-28  8:39         ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-28  8:39 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, linux-kernel, virtualization, eperezma, edumazet,
	kuba, maxime.coquelin, pabeni, davem

On Tue, 27 Dec 2022 12:33:53 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > We used to busy waiting on the cvq command this tends to be
> > > problematic since:
> > >
> > > 1) CPU could wait for ever on a buggy/malicous device
> > > 2) There's no wait to terminate the process that triggers the cvq
> > >    command
> > >
> > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > timeout (1s) instead of busy polling for the cvq command forever. This
> >
> > I don't think that a fixed 1S is a good choice.
>
> Well, it could be tweaked to be a little bit longer.
>
> One way, as discussed, is to let the device advertise a timeout then
> the driver can validate if it's valid and use that timeout. But it
> needs extension to the spec.
>
> > Some of the DPUs are very
> > lazy for cvq handle.
>
> Such design needs to be revisited, cvq (control path) should have a
> better priority or QOS than datapath.
>
> > In particular, we will also directly break the device.
>
> It's kind of hardening for malicious devices.

Just based on timeout, it is judged that it is a malicious device. I think it is
too arbitrary.

Thanks.


>
> >
> > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > this timeout by themselves. Although I don't think this is a good way.
>
> Very hard and unfriendly to the end users.
>
> Thanks
>
> >
> > Thanks.
> >
> >
> > > gives the scheduler a breath and can let the process can respond to
> > > asignal. If the device doesn't respond in the timeout, break the
> > > device.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Changes since V1:
> > > - break the device when timeout
> > > - get buffer manually since the virtio core check more_used() instead
> > > ---
> > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > >       vi->rx_mode_work_enabled = false;
> > >       spin_unlock_bh(&vi->rx_mode_lock);
> > >
> > > +     virtqueue_wake_up(vi->cvq);
> > >       flush_work(&vi->rx_mode_work);
> > >  }
> > >
> > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > >       return !oom;
> > >  }
> > >
> > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > +{
> > > +     virtqueue_wake_up(cvq);
> > > +}
> > > +
> > >  static void skb_recv_done(struct virtqueue *rvq)
> > >  {
> > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > >       return err;
> > >  }
> > >
> > > +static int virtnet_close(struct net_device *dev);
> > > +
> > >  /*
> > >   * Send command via the control virtqueue and check status.  Commands
> > >   * supported by the hypervisor, as indicated by feature bits, should
> > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > >
> > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > -      * into the hypervisor, so the request should be handled immediately.
> > > -      */
> > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > -            !virtqueue_is_broken(vi->cvq))
> > > -             cpu_relax();
> > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     }
> > >
> > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > +     virtio_break_device(vi->vdev);
> > > +     return VIRTIO_NET_ERR;
> > >  }
> > >
> > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > >
> > >       /* Parameters for control virtqueue, if any */
> > >       if (vi->has_cvq) {
> > > -             callbacks[total_vqs - 1] = NULL;
> > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > >               names[total_vqs - 1] = "control";
> > >       }
> > >
> > > --
> > > 2.25.1
> > >
> > > _______________________________________________
> > > Virtualization mailing list
> > > Virtualization@lists.linux-foundation.org
> > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-28  8:31           ` Xuan Zhuo
@ 2022-12-28 11:41             ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28 11:41 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, kuba, maxime.coquelin, pabeni, davem

On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > We used to busy waiting on the cvq command this tends to be
> > > > > problematic since:
> > > > >
> > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > >    command
> > > > >
> > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > >
> > > > I don't think that a fixed 1S is a good choice.
> > >
> > > Well, it could be tweaked to be a little bit longer.
> > >
> > > One way, as discussed, is to let the device advertise a timeout then
> > > the driver can validate if it's valid and use that timeout. But it
> > > needs extension to the spec.
> >
> > Controlling timeout from device is a good idea, e.g. hardware devices
> > would benefit from a shorter timeout, hypervisor devices from a longer
> > timeout or no timeout.
>
> Yes. That is good.
>
> Before introducing this feature, I personally like to use "wait", rather than
> define a timeout.

Note that the driver still needs to validate what device advertises to
avoid infinite wait.

Thanks

>
> Thanks.
>
>
> >
> > >
> > > > Some of the DPUs are very
> > > > lazy for cvq handle.
> > >
> > > Such design needs to be revisited, cvq (control path) should have a
> > > better priority or QOS than datapath.
> >
> > Spec says nothing about this, so driver can't assume this either.
> >
> > > > In particular, we will also directly break the device.
> > >
> > > It's kind of hardening for malicious devices.
> >
> > ATM no amount of hardening can prevent a malicious hypervisor from
> > blocking the guest. Recovering when a hardware device is broken would be
> > nice but I think if we do bother then we should try harder to recover,
> > such as by driving device reset.
> >
> >
> > Also, does your patch break surprise removal? There's no callback
> > in this case ATM.
> >
> > > >
> > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > this timeout by themselves. Although I don't think this is a good way.
> > >
> > > Very hard and unfriendly to the end users.
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > > gives the scheduler a breath and can let the process can respond to
> > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > device.
> > > > >
> > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > ---
> > > > > Changes since V1:
> > > > > - break the device when timeout
> > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > ---
> > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > >       vi->rx_mode_work_enabled = false;
> > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > >
> > > > > +     virtqueue_wake_up(vi->cvq);
> > > > >       flush_work(&vi->rx_mode_work);
> > > > >  }
> > > > >
> > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > >       return !oom;
> > > > >  }
> > > > >
> > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > +{
> > > > > +     virtqueue_wake_up(cvq);
> > > > > +}
> > > > > +
> > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > >  {
> > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > >       return err;
> > > > >  }
> > > > >
> > > > > +static int virtnet_close(struct net_device *dev);
> > > > > +
> > > > >  /*
> > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > >
> > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > -      */
> > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > -             cpu_relax();
> > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     }
> > > > >
> > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > +     virtio_break_device(vi->vdev);
> > > > > +     return VIRTIO_NET_ERR;
> > > > >  }
> > > > >
> > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > >
> > > > >       /* Parameters for control virtqueue, if any */
> > > > >       if (vi->has_cvq) {
> > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > >               names[total_vqs - 1] = "control";
> > > > >       }
> > > > >
> > > > > --
> > > > > 2.25.1
> > > > >
> > > > > _______________________________________________
> > > > > Virtualization mailing list
> > > > > Virtualization@lists.linux-foundation.org
> > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-28 11:41             ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28 11:41 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, kuba, pabeni, davem

On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > We used to busy waiting on the cvq command this tends to be
> > > > > problematic since:
> > > > >
> > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > >    command
> > > > >
> > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > >
> > > > I don't think that a fixed 1S is a good choice.
> > >
> > > Well, it could be tweaked to be a little bit longer.
> > >
> > > One way, as discussed, is to let the device advertise a timeout then
> > > the driver can validate if it's valid and use that timeout. But it
> > > needs extension to the spec.
> >
> > Controlling timeout from device is a good idea, e.g. hardware devices
> > would benefit from a shorter timeout, hypervisor devices from a longer
> > timeout or no timeout.
>
> Yes. That is good.
>
> Before introducing this feature, I personally like to use "wait", rather than
> define a timeout.

Note that the driver still needs to validate what device advertises to
avoid infinite wait.

Thanks

>
> Thanks.
>
>
> >
> > >
> > > > Some of the DPUs are very
> > > > lazy for cvq handle.
> > >
> > > Such design needs to be revisited, cvq (control path) should have a
> > > better priority or QOS than datapath.
> >
> > Spec says nothing about this, so driver can't assume this either.
> >
> > > > In particular, we will also directly break the device.
> > >
> > > It's kind of hardening for malicious devices.
> >
> > ATM no amount of hardening can prevent a malicious hypervisor from
> > blocking the guest. Recovering when a hardware device is broken would be
> > nice but I think if we do bother then we should try harder to recover,
> > such as by driving device reset.
> >
> >
> > Also, does your patch break surprise removal? There's no callback
> > in this case ATM.
> >
> > > >
> > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > this timeout by themselves. Although I don't think this is a good way.
> > >
> > > Very hard and unfriendly to the end users.
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > > gives the scheduler a breath and can let the process can respond to
> > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > device.
> > > > >
> > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > ---
> > > > > Changes since V1:
> > > > > - break the device when timeout
> > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > ---
> > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > >       vi->rx_mode_work_enabled = false;
> > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > >
> > > > > +     virtqueue_wake_up(vi->cvq);
> > > > >       flush_work(&vi->rx_mode_work);
> > > > >  }
> > > > >
> > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > >       return !oom;
> > > > >  }
> > > > >
> > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > +{
> > > > > +     virtqueue_wake_up(cvq);
> > > > > +}
> > > > > +
> > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > >  {
> > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > >       return err;
> > > > >  }
> > > > >
> > > > > +static int virtnet_close(struct net_device *dev);
> > > > > +
> > > > >  /*
> > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > >
> > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > -      */
> > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > -             cpu_relax();
> > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     }
> > > > >
> > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > +     virtio_break_device(vi->vdev);
> > > > > +     return VIRTIO_NET_ERR;
> > > > >  }
> > > > >
> > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > >
> > > > >       /* Parameters for control virtqueue, if any */
> > > > >       if (vi->has_cvq) {
> > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > >               names[total_vqs - 1] = "control";
> > > > >       }
> > > > >
> > > > > --
> > > > > 2.25.1
> > > > >
> > > > > _______________________________________________
> > > > > Virtualization mailing list
> > > > > Virtualization@lists.linux-foundation.org
> > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > >
> >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-28  8:39         ` Xuan Zhuo
@ 2022-12-28 11:43           ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28 11:43 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem, mst

On Wed, Dec 28, 2022 at 4:40 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Tue, 27 Dec 2022 12:33:53 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > We used to busy waiting on the cvq command this tends to be
> > > > problematic since:
> > > >
> > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > 2) There's no wait to terminate the process that triggers the cvq
> > > >    command
> > > >
> > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > >
> > > I don't think that a fixed 1S is a good choice.
> >
> > Well, it could be tweaked to be a little bit longer.
> >
> > One way, as discussed, is to let the device advertise a timeout then
> > the driver can validate if it's valid and use that timeout. But it
> > needs extension to the spec.
> >
> > > Some of the DPUs are very
> > > lazy for cvq handle.
> >
> > Such design needs to be revisited, cvq (control path) should have a
> > better priority or QOS than datapath.
> >
> > > In particular, we will also directly break the device.
> >
> > It's kind of hardening for malicious devices.
>
> Just based on timeout, it is judged that it is a malicious device. I think it is
> too arbitrary.

Drivers have very little information to make the decision. So it's
really a balance.

We can start with a very long timeout like 10 minutes. Otherwise a
buggy/malicious device will block a lot of important things (reboot,
modprobe) even if the scheduler is still functional.

Thanks

>
> Thanks.
>
>
> >
> > >
> > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > this timeout by themselves. Although I don't think this is a good way.
> >
> > Very hard and unfriendly to the end users.
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > > gives the scheduler a breath and can let the process can respond to
> > > > asignal. If the device doesn't respond in the timeout, break the
> > > > device.
> > > >
> > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > ---
> > > > Changes since V1:
> > > > - break the device when timeout
> > > > - get buffer manually since the virtio core check more_used() instead
> > > > ---
> > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > >       vi->rx_mode_work_enabled = false;
> > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > >
> > > > +     virtqueue_wake_up(vi->cvq);
> > > >       flush_work(&vi->rx_mode_work);
> > > >  }
> > > >
> > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > >       return !oom;
> > > >  }
> > > >
> > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > +{
> > > > +     virtqueue_wake_up(cvq);
> > > > +}
> > > > +
> > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > >  {
> > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > >       return err;
> > > >  }
> > > >
> > > > +static int virtnet_close(struct net_device *dev);
> > > > +
> > > >  /*
> > > >   * Send command via the control virtqueue and check status.  Commands
> > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > >
> > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > -      */
> > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > -            !virtqueue_is_broken(vi->cvq))
> > > > -             cpu_relax();
> > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     }
> > > >
> > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > +     virtio_break_device(vi->vdev);
> > > > +     return VIRTIO_NET_ERR;
> > > >  }
> > > >
> > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > >
> > > >       /* Parameters for control virtqueue, if any */
> > > >       if (vi->has_cvq) {
> > > > -             callbacks[total_vqs - 1] = NULL;
> > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > >               names[total_vqs - 1] = "control";
> > > >       }
> > > >
> > > > --
> > > > 2.25.1
> > > >
> > > > _______________________________________________
> > > > Virtualization mailing list
> > > > Virtualization@lists.linux-foundation.org
> > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > >
> >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-28 11:43           ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28 11:43 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: mst, netdev, linux-kernel, virtualization, eperezma, edumazet,
	kuba, maxime.coquelin, pabeni, davem

On Wed, Dec 28, 2022 at 4:40 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Tue, 27 Dec 2022 12:33:53 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > We used to busy waiting on the cvq command this tends to be
> > > > problematic since:
> > > >
> > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > 2) There's no wait to terminate the process that triggers the cvq
> > > >    command
> > > >
> > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > >
> > > I don't think that a fixed 1S is a good choice.
> >
> > Well, it could be tweaked to be a little bit longer.
> >
> > One way, as discussed, is to let the device advertise a timeout then
> > the driver can validate if it's valid and use that timeout. But it
> > needs extension to the spec.
> >
> > > Some of the DPUs are very
> > > lazy for cvq handle.
> >
> > Such design needs to be revisited, cvq (control path) should have a
> > better priority or QOS than datapath.
> >
> > > In particular, we will also directly break the device.
> >
> > It's kind of hardening for malicious devices.
>
> Just based on timeout, it is judged that it is a malicious device. I think it is
> too arbitrary.

Drivers have very little information to make the decision. So it's
really a balance.

We can start with a very long timeout like 10 minutes. Otherwise a
buggy/malicious device will block a lot of important things (reboot,
modprobe) even if the scheduler is still functional.

Thanks

>
> Thanks.
>
>
> >
> > >
> > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > this timeout by themselves. Although I don't think this is a good way.
> >
> > Very hard and unfriendly to the end users.
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > > gives the scheduler a breath and can let the process can respond to
> > > > asignal. If the device doesn't respond in the timeout, break the
> > > > device.
> > > >
> > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > ---
> > > > Changes since V1:
> > > > - break the device when timeout
> > > > - get buffer manually since the virtio core check more_used() instead
> > > > ---
> > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > >       vi->rx_mode_work_enabled = false;
> > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > >
> > > > +     virtqueue_wake_up(vi->cvq);
> > > >       flush_work(&vi->rx_mode_work);
> > > >  }
> > > >
> > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > >       return !oom;
> > > >  }
> > > >
> > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > +{
> > > > +     virtqueue_wake_up(cvq);
> > > > +}
> > > > +
> > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > >  {
> > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > >       return err;
> > > >  }
> > > >
> > > > +static int virtnet_close(struct net_device *dev);
> > > > +
> > > >  /*
> > > >   * Send command via the control virtqueue and check status.  Commands
> > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > >
> > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > -      */
> > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > -            !virtqueue_is_broken(vi->cvq))
> > > > -             cpu_relax();
> > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     }
> > > >
> > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > +     virtio_break_device(vi->vdev);
> > > > +     return VIRTIO_NET_ERR;
> > > >  }
> > > >
> > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > >
> > > >       /* Parameters for control virtqueue, if any */
> > > >       if (vi->has_cvq) {
> > > > -             callbacks[total_vqs - 1] = NULL;
> > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > >               names[total_vqs - 1] = "control";
> > > >       }
> > > >
> > > > --
> > > > 2.25.1
> > > >
> > > > _______________________________________________
> > > > Virtualization mailing list
> > > > Virtualization@lists.linux-foundation.org
> > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-28  6:34               ` Jason Wang
@ 2022-12-28 11:53                 ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28 11:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> >>>>> But device is still going and will later use the buffers.
> >>>>>
> >>>>> Same for timeout really.
> >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> >>>> If we think the timeout is hard, we can start from the wait.
> >>>>
> >>>> Thanks
> >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> >>> that sounds more reasonable. E.g. someone is turning on promisc,
> >>> a spike in CPU usage might be unwelcome.
> >>
> >> Yes, this would be more obvious is UP is used.
> >>
> >>
> >>> things we should be careful to address then:
> >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> >>>      in a loop for a while, and we also get a backtrace.
> >>>      E.g. with this - how do we know who has the RTNL?
> >>>      We need to integrate with kernel/watchdog.c for good results
> >>>      and to make sure policy is consistent.
> >>
> >> That's fine, will consider this.

So after some investigation, it seems the watchdog.c doesn't help. The
only export helper is touch_softlockup_watchdog() which tries to avoid
triggering the lockups warning for the known slow path.

And before the patch, we end up with a real infinite loop which could
be caught by RCU stall detector which is not the case of the sleep.
What we can do is probably do a periodic netdev_err().

Thanks

> >>
> >>
> >>> 2- overhead. In a very common scenario when device is in hypervisor,
> >>>      programming timers etc has a very high overhead, at bootup
> >>>      lots of CVQ commands are run and slowing boot down is not nice.
> >>>      let's poll for a bit before waiting?
> >>
> >> Then we go back to the question of choosing a good timeout for poll. And
> >> poll seems problematic in the case of UP, scheduler might not have the
> >> chance to run.
> > Poll just a bit :) Seriously I don't know, but at least check once
> > after kick.
>
>
> I think it is what the current code did where the condition will be
> check before trying to sleep in the wait_event().
>
>
> >
> >>> 3- suprise removal. need to wake up thread in some way. what about
> >>>      other cases of device breakage - is there a chance this
> >>>      introduces new bugs around that? at least enumerate them please.
> >>
> >> The current code did:
> >>
> >> 1) check for vq->broken
> >> 2) wakeup during BAD_RING()
> >>
> >> So we won't end up with a never woke up process which should be fine.
> >>
> >> Thanks
> >
> > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > idea - can cause crashes if kernel panics on error.
>
>
> Yes, it's better to use __virtqueue_break() instead.
>
> But consider we will start from a wait first, I will limit the changes
> in virtio-net without bothering virtio core.
>
> Thanks
>
>
> >
> >>>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-28 11:53                 ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-28 11:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> >>>>> But device is still going and will later use the buffers.
> >>>>>
> >>>>> Same for timeout really.
> >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> >>>> If we think the timeout is hard, we can start from the wait.
> >>>>
> >>>> Thanks
> >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> >>> that sounds more reasonable. E.g. someone is turning on promisc,
> >>> a spike in CPU usage might be unwelcome.
> >>
> >> Yes, this would be more obvious is UP is used.
> >>
> >>
> >>> things we should be careful to address then:
> >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> >>>      in a loop for a while, and we also get a backtrace.
> >>>      E.g. with this - how do we know who has the RTNL?
> >>>      We need to integrate with kernel/watchdog.c for good results
> >>>      and to make sure policy is consistent.
> >>
> >> That's fine, will consider this.

So after some investigation, it seems the watchdog.c doesn't help. The
only export helper is touch_softlockup_watchdog() which tries to avoid
triggering the lockups warning for the known slow path.

And before the patch, we end up with a real infinite loop which could
be caught by RCU stall detector which is not the case of the sleep.
What we can do is probably do a periodic netdev_err().

Thanks

> >>
> >>
> >>> 2- overhead. In a very common scenario when device is in hypervisor,
> >>>      programming timers etc has a very high overhead, at bootup
> >>>      lots of CVQ commands are run and slowing boot down is not nice.
> >>>      let's poll for a bit before waiting?
> >>
> >> Then we go back to the question of choosing a good timeout for poll. And
> >> poll seems problematic in the case of UP, scheduler might not have the
> >> chance to run.
> > Poll just a bit :) Seriously I don't know, but at least check once
> > after kick.
>
>
> I think it is what the current code did where the condition will be
> check before trying to sleep in the wait_event().
>
>
> >
> >>> 3- suprise removal. need to wake up thread in some way. what about
> >>>      other cases of device breakage - is there a chance this
> >>>      introduces new bugs around that? at least enumerate them please.
> >>
> >> The current code did:
> >>
> >> 1) check for vq->broken
> >> 2) wakeup during BAD_RING()
> >>
> >> So we won't end up with a never woke up process which should be fine.
> >>
> >> Thanks
> >
> > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > idea - can cause crashes if kernel panics on error.
>
>
> Yes, it's better to use __virtqueue_break() instead.
>
> But consider we will start from a wait first, I will limit the changes
> in virtio-net without bothering virtio core.
>
> Thanks
>
>
> >
> >>>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-28 11:43           ` Jason Wang
@ 2022-12-29  2:01             ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  2:01 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem, mst

On Wed, 28 Dec 2022 19:43:56 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Dec 28, 2022 at 4:40 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Tue, 27 Dec 2022 12:33:53 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > We used to busy waiting on the cvq command this tends to be
> > > > > problematic since:
> > > > >
> > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > >    command
> > > > >
> > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > >
> > > > I don't think that a fixed 1S is a good choice.
> > >
> > > Well, it could be tweaked to be a little bit longer.
> > >
> > > One way, as discussed, is to let the device advertise a timeout then
> > > the driver can validate if it's valid and use that timeout. But it
> > > needs extension to the spec.
> > >
> > > > Some of the DPUs are very
> > > > lazy for cvq handle.
> > >
> > > Such design needs to be revisited, cvq (control path) should have a
> > > better priority or QOS than datapath.
> > >
> > > > In particular, we will also directly break the device.
> > >
> > > It's kind of hardening for malicious devices.
> >
> > Just based on timeout, it is judged that it is a malicious device. I think it is
> > too arbitrary.
>
> Drivers have very little information to make the decision. So it's
> really a balance.
>
> We can start with a very long timeout like 10 minutes. Otherwise a
> buggy/malicious device will block a lot of important things (reboot,
> modprobe) even if the scheduler is still functional.

Relatively speaking, starting from a 1min+ timeout, I think it is safe.

Thanks.



>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > >
> > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > this timeout by themselves. Although I don't think this is a good way.
> > >
> > > Very hard and unfriendly to the end users.
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > > gives the scheduler a breath and can let the process can respond to
> > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > device.
> > > > >
> > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > ---
> > > > > Changes since V1:
> > > > > - break the device when timeout
> > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > ---
> > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > >       vi->rx_mode_work_enabled = false;
> > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > >
> > > > > +     virtqueue_wake_up(vi->cvq);
> > > > >       flush_work(&vi->rx_mode_work);
> > > > >  }
> > > > >
> > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > >       return !oom;
> > > > >  }
> > > > >
> > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > +{
> > > > > +     virtqueue_wake_up(cvq);
> > > > > +}
> > > > > +
> > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > >  {
> > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > >       return err;
> > > > >  }
> > > > >
> > > > > +static int virtnet_close(struct net_device *dev);
> > > > > +
> > > > >  /*
> > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > >
> > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > -      */
> > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > -             cpu_relax();
> > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     }
> > > > >
> > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > +     virtio_break_device(vi->vdev);
> > > > > +     return VIRTIO_NET_ERR;
> > > > >  }
> > > > >
> > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > >
> > > > >       /* Parameters for control virtqueue, if any */
> > > > >       if (vi->has_cvq) {
> > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > >               names[total_vqs - 1] = "control";
> > > > >       }
> > > > >
> > > > > --
> > > > > 2.25.1
> > > > >
> > > > > _______________________________________________
> > > > > Virtualization mailing list
> > > > > Virtualization@lists.linux-foundation.org
> > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > >
> > >
> >
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-29  2:01             ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  2:01 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, linux-kernel, virtualization, eperezma, edumazet,
	kuba, maxime.coquelin, pabeni, davem

On Wed, 28 Dec 2022 19:43:56 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Dec 28, 2022 at 4:40 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Tue, 27 Dec 2022 12:33:53 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > We used to busy waiting on the cvq command this tends to be
> > > > > problematic since:
> > > > >
> > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > >    command
> > > > >
> > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > >
> > > > I don't think that a fixed 1S is a good choice.
> > >
> > > Well, it could be tweaked to be a little bit longer.
> > >
> > > One way, as discussed, is to let the device advertise a timeout then
> > > the driver can validate if it's valid and use that timeout. But it
> > > needs extension to the spec.
> > >
> > > > Some of the DPUs are very
> > > > lazy for cvq handle.
> > >
> > > Such design needs to be revisited, cvq (control path) should have a
> > > better priority or QOS than datapath.
> > >
> > > > In particular, we will also directly break the device.
> > >
> > > It's kind of hardening for malicious devices.
> >
> > Just based on timeout, it is judged that it is a malicious device. I think it is
> > too arbitrary.
>
> Drivers have very little information to make the decision. So it's
> really a balance.
>
> We can start with a very long timeout like 10 minutes. Otherwise a
> buggy/malicious device will block a lot of important things (reboot,
> modprobe) even if the scheduler is still functional.

Relatively speaking, starting from a 1min+ timeout, I think it is safe.

Thanks.



>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > >
> > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > this timeout by themselves. Although I don't think this is a good way.
> > >
> > > Very hard and unfriendly to the end users.
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > > gives the scheduler a breath and can let the process can respond to
> > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > device.
> > > > >
> > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > ---
> > > > > Changes since V1:
> > > > > - break the device when timeout
> > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > ---
> > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > >       vi->rx_mode_work_enabled = false;
> > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > >
> > > > > +     virtqueue_wake_up(vi->cvq);
> > > > >       flush_work(&vi->rx_mode_work);
> > > > >  }
> > > > >
> > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > >       return !oom;
> > > > >  }
> > > > >
> > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > +{
> > > > > +     virtqueue_wake_up(cvq);
> > > > > +}
> > > > > +
> > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > >  {
> > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > >       return err;
> > > > >  }
> > > > >
> > > > > +static int virtnet_close(struct net_device *dev);
> > > > > +
> > > > >  /*
> > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > >
> > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > -      */
> > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > -             cpu_relax();
> > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     }
> > > > >
> > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > +     virtio_break_device(vi->vdev);
> > > > > +     return VIRTIO_NET_ERR;
> > > > >  }
> > > > >
> > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > >
> > > > >       /* Parameters for control virtqueue, if any */
> > > > >       if (vi->has_cvq) {
> > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > >               names[total_vqs - 1] = "control";
> > > > >       }
> > > > >
> > > > > --
> > > > > 2.25.1
> > > > >
> > > > > _______________________________________________
> > > > > Virtualization mailing list
> > > > > Virtualization@lists.linux-foundation.org
> > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-28 11:41             ` Jason Wang
@ 2022-12-29  2:09               ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  2:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, kuba, pabeni, davem

On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > problematic since:
> > > > > >
> > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > >    command
> > > > > >
> > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > >
> > > > > I don't think that a fixed 1S is a good choice.
> > > >
> > > > Well, it could be tweaked to be a little bit longer.
> > > >
> > > > One way, as discussed, is to let the device advertise a timeout then
> > > > the driver can validate if it's valid and use that timeout. But it
> > > > needs extension to the spec.
> > >
> > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > timeout or no timeout.
> >
> > Yes. That is good.
> >
> > Before introducing this feature, I personally like to use "wait", rather than
> > define a timeout.
>
> Note that the driver still needs to validate what device advertises to
> avoid infinite wait.

Sorry, I didn't understand what you mean.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > >
> > > > > Some of the DPUs are very
> > > > > lazy for cvq handle.
> > > >
> > > > Such design needs to be revisited, cvq (control path) should have a
> > > > better priority or QOS than datapath.
> > >
> > > Spec says nothing about this, so driver can't assume this either.
> > >
> > > > > In particular, we will also directly break the device.
> > > >
> > > > It's kind of hardening for malicious devices.
> > >
> > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > blocking the guest. Recovering when a hardware device is broken would be
> > > nice but I think if we do bother then we should try harder to recover,
> > > such as by driving device reset.
> > >
> > >
> > > Also, does your patch break surprise removal? There's no callback
> > > in this case ATM.
> > >
> > > > >
> > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > this timeout by themselves. Although I don't think this is a good way.
> > > >
> > > > Very hard and unfriendly to the end users.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > device.
> > > > > >
> > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > ---
> > > > > > Changes since V1:
> > > > > > - break the device when timeout
> > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > ---
> > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > --- a/drivers/net/virtio_net.c
> > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > >       vi->rx_mode_work_enabled = false;
> > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > >
> > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > >       flush_work(&vi->rx_mode_work);
> > > > > >  }
> > > > > >
> > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > >       return !oom;
> > > > > >  }
> > > > > >
> > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > +{
> > > > > > +     virtqueue_wake_up(cvq);
> > > > > > +}
> > > > > > +
> > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > >  {
> > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > >       return err;
> > > > > >  }
> > > > > >
> > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > +
> > > > > >  /*
> > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > >
> > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > -      */
> > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > -             cpu_relax();
> > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > +     }
> > > > > >
> > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > +     virtio_break_device(vi->vdev);
> > > > > > +     return VIRTIO_NET_ERR;
> > > > > >  }
> > > > > >
> > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > >
> > > > > >       /* Parameters for control virtqueue, if any */
> > > > > >       if (vi->has_cvq) {
> > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > >               names[total_vqs - 1] = "control";
> > > > > >       }
> > > > > >
> > > > > > --
> > > > > > 2.25.1
> > > > > >
> > > > > > _______________________________________________
> > > > > > Virtualization mailing list
> > > > > > Virtualization@lists.linux-foundation.org
> > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > >
> > >
> >
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-29  2:09               ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  2:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, kuba, maxime.coquelin, pabeni, davem

On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > problematic since:
> > > > > >
> > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > >    command
> > > > > >
> > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > >
> > > > > I don't think that a fixed 1S is a good choice.
> > > >
> > > > Well, it could be tweaked to be a little bit longer.
> > > >
> > > > One way, as discussed, is to let the device advertise a timeout then
> > > > the driver can validate if it's valid and use that timeout. But it
> > > > needs extension to the spec.
> > >
> > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > timeout or no timeout.
> >
> > Yes. That is good.
> >
> > Before introducing this feature, I personally like to use "wait", rather than
> > define a timeout.
>
> Note that the driver still needs to validate what device advertises to
> avoid infinite wait.

Sorry, I didn't understand what you mean.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > >
> > > > > Some of the DPUs are very
> > > > > lazy for cvq handle.
> > > >
> > > > Such design needs to be revisited, cvq (control path) should have a
> > > > better priority or QOS than datapath.
> > >
> > > Spec says nothing about this, so driver can't assume this either.
> > >
> > > > > In particular, we will also directly break the device.
> > > >
> > > > It's kind of hardening for malicious devices.
> > >
> > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > blocking the guest. Recovering when a hardware device is broken would be
> > > nice but I think if we do bother then we should try harder to recover,
> > > such as by driving device reset.
> > >
> > >
> > > Also, does your patch break surprise removal? There's no callback
> > > in this case ATM.
> > >
> > > > >
> > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > this timeout by themselves. Although I don't think this is a good way.
> > > >
> > > > Very hard and unfriendly to the end users.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > device.
> > > > > >
> > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > ---
> > > > > > Changes since V1:
> > > > > > - break the device when timeout
> > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > ---
> > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > --- a/drivers/net/virtio_net.c
> > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > >       vi->rx_mode_work_enabled = false;
> > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > >
> > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > >       flush_work(&vi->rx_mode_work);
> > > > > >  }
> > > > > >
> > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > >       return !oom;
> > > > > >  }
> > > > > >
> > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > +{
> > > > > > +     virtqueue_wake_up(cvq);
> > > > > > +}
> > > > > > +
> > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > >  {
> > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > >       return err;
> > > > > >  }
> > > > > >
> > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > +
> > > > > >  /*
> > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > >
> > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > -      */
> > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > -             cpu_relax();
> > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > +     }
> > > > > >
> > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > +     virtio_break_device(vi->vdev);
> > > > > > +     return VIRTIO_NET_ERR;
> > > > > >  }
> > > > > >
> > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > >
> > > > > >       /* Parameters for control virtqueue, if any */
> > > > > >       if (vi->has_cvq) {
> > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > >               names[total_vqs - 1] = "control";
> > > > > >       }
> > > > > >
> > > > > > --
> > > > > > 2.25.1
> > > > > >
> > > > > > _______________________________________________
> > > > > > Virtualization mailing list
> > > > > > Virtualization@lists.linux-foundation.org
> > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-29  2:09               ` Xuan Zhuo
@ 2022-12-29  3:22                 ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-29  3:22 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, kuba, maxime.coquelin, pabeni, davem

On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > problematic since:
> > > > > > >
> > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > >    command
> > > > > > >
> > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > >
> > > > > > I don't think that a fixed 1S is a good choice.
> > > > >
> > > > > Well, it could be tweaked to be a little bit longer.
> > > > >
> > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > needs extension to the spec.
> > > >
> > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > timeout or no timeout.
> > >
> > > Yes. That is good.
> > >
> > > Before introducing this feature, I personally like to use "wait", rather than
> > > define a timeout.
> >
> > Note that the driver still needs to validate what device advertises to
> > avoid infinite wait.
>
> Sorry, I didn't understand what you mean.

I meant the interface needs to carefully designed to

1) avoid device to advertise a infinite (or very long) timeout
2) driver need to have its own max timeout regardless what device advertises

Thanks

>
> Thanks.
>
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > >
> > > > >
> > > > > > Some of the DPUs are very
> > > > > > lazy for cvq handle.
> > > > >
> > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > better priority or QOS than datapath.
> > > >
> > > > Spec says nothing about this, so driver can't assume this either.
> > > >
> > > > > > In particular, we will also directly break the device.
> > > > >
> > > > > It's kind of hardening for malicious devices.
> > > >
> > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > nice but I think if we do bother then we should try harder to recover,
> > > > such as by driving device reset.
> > > >
> > > >
> > > > Also, does your patch break surprise removal? There's no callback
> > > > in this case ATM.
> > > >
> > > > > >
> > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > >
> > > > > Very hard and unfriendly to the end users.
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > device.
> > > > > > >
> > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > ---
> > > > > > > Changes since V1:
> > > > > > > - break the device when timeout
> > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > ---
> > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > >
> > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > >       return !oom;
> > > > > > >  }
> > > > > > >
> > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > +{
> > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > +}
> > > > > > > +
> > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > >  {
> > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > >       return err;
> > > > > > >  }
> > > > > > >
> > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > +
> > > > > > >  /*
> > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > >
> > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > -      */
> > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > -             cpu_relax();
> > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > +     }
> > > > > > >
> > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > >  }
> > > > > > >
> > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > >
> > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > >       if (vi->has_cvq) {
> > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > >               names[total_vqs - 1] = "control";
> > > > > > >       }
> > > > > > >
> > > > > > > --
> > > > > > > 2.25.1
> > > > > > >
> > > > > > > _______________________________________________
> > > > > > > Virtualization mailing list
> > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > >
> > > >
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-29  3:22                 ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-29  3:22 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, kuba, pabeni, davem

On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > problematic since:
> > > > > > >
> > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > >    command
> > > > > > >
> > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > >
> > > > > > I don't think that a fixed 1S is a good choice.
> > > > >
> > > > > Well, it could be tweaked to be a little bit longer.
> > > > >
> > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > needs extension to the spec.
> > > >
> > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > timeout or no timeout.
> > >
> > > Yes. That is good.
> > >
> > > Before introducing this feature, I personally like to use "wait", rather than
> > > define a timeout.
> >
> > Note that the driver still needs to validate what device advertises to
> > avoid infinite wait.
>
> Sorry, I didn't understand what you mean.

I meant the interface needs to carefully designed to

1) avoid device to advertise a infinite (or very long) timeout
2) driver need to have its own max timeout regardless what device advertises

Thanks

>
> Thanks.
>
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > >
> > > > >
> > > > > > Some of the DPUs are very
> > > > > > lazy for cvq handle.
> > > > >
> > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > better priority or QOS than datapath.
> > > >
> > > > Spec says nothing about this, so driver can't assume this either.
> > > >
> > > > > > In particular, we will also directly break the device.
> > > > >
> > > > > It's kind of hardening for malicious devices.
> > > >
> > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > nice but I think if we do bother then we should try harder to recover,
> > > > such as by driving device reset.
> > > >
> > > >
> > > > Also, does your patch break surprise removal? There's no callback
> > > > in this case ATM.
> > > >
> > > > > >
> > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > >
> > > > > Very hard and unfriendly to the end users.
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > device.
> > > > > > >
> > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > ---
> > > > > > > Changes since V1:
> > > > > > > - break the device when timeout
> > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > ---
> > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > >
> > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > >       return !oom;
> > > > > > >  }
> > > > > > >
> > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > +{
> > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > +}
> > > > > > > +
> > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > >  {
> > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > >       return err;
> > > > > > >  }
> > > > > > >
> > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > +
> > > > > > >  /*
> > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > >
> > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > -      */
> > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > -             cpu_relax();
> > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > +     }
> > > > > > >
> > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > >  }
> > > > > > >
> > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > >
> > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > >       if (vi->has_cvq) {
> > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > >               names[total_vqs - 1] = "control";
> > > > > > >       }
> > > > > > >
> > > > > > > --
> > > > > > > 2.25.1
> > > > > > >
> > > > > > > _______________________________________________
> > > > > > > Virtualization mailing list
> > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > >
> > > >
> > >
> >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-29  3:22                 ` Jason Wang
@ 2022-12-29  3:41                   ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  3:41 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, kuba, pabeni, davem

On Thu, 29 Dec 2022 11:22:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > > problematic since:
> > > > > > > >
> > > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > > >    command
> > > > > > > >
> > > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > > >
> > > > > > > I don't think that a fixed 1S is a good choice.
> > > > > >
> > > > > > Well, it could be tweaked to be a little bit longer.
> > > > > >
> > > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > > needs extension to the spec.
> > > > >
> > > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > > timeout or no timeout.
> > > >
> > > > Yes. That is good.
> > > >
> > > > Before introducing this feature, I personally like to use "wait", rather than
> > > > define a timeout.
> > >
> > > Note that the driver still needs to validate what device advertises to
> > > avoid infinite wait.
> >
> > Sorry, I didn't understand what you mean.
>
> I meant the interface needs to carefully designed to
>
> 1) avoid device to advertise a infinite (or very long) timeout
> 2) driver need to have its own max timeout regardless what device advertises


I see.

As far as I know, different operations will take different time.
For example, the queues are initialized one by one when performing
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET commands. If the number of queues is large, then
this time will be very long.

So we should set different timeouts for different commands.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > >
> > > > > >
> > > > > > > Some of the DPUs are very
> > > > > > > lazy for cvq handle.
> > > > > >
> > > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > > better priority or QOS than datapath.
> > > > >
> > > > > Spec says nothing about this, so driver can't assume this either.
> > > > >
> > > > > > > In particular, we will also directly break the device.
> > > > > >
> > > > > > It's kind of hardening for malicious devices.
> > > > >
> > > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > > nice but I think if we do bother then we should try harder to recover,
> > > > > such as by driving device reset.
> > > > >
> > > > >
> > > > > Also, does your patch break surprise removal? There's no callback
> > > > > in this case ATM.
> > > > >
> > > > > > >
> > > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > > >
> > > > > > Very hard and unfriendly to the end users.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > > >
> > > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > > device.
> > > > > > > >
> > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > ---
> > > > > > > > Changes since V1:
> > > > > > > > - break the device when timeout
> > > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > > ---
> > > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > > >
> > > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > > >       return !oom;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > > +{
> > > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > > >  {
> > > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > > >       return err;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > > +
> > > > > > > >  /*
> > > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > >
> > > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > > -      */
> > > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > > -             cpu_relax();
> > > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > +     }
> > > > > > > >
> > > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > > >
> > > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > > >       if (vi->has_cvq) {
> > > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > > >               names[total_vqs - 1] = "control";
> > > > > > > >       }
> > > > > > > >
> > > > > > > > --
> > > > > > > > 2.25.1
> > > > > > > >
> > > > > > > > _______________________________________________
> > > > > > > > Virtualization mailing list
> > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > >
> > > > >
> > > >
> > >
> >
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-29  3:41                   ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  3:41 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, kuba, maxime.coquelin, pabeni, davem

On Thu, 29 Dec 2022 11:22:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > > problematic since:
> > > > > > > >
> > > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > > >    command
> > > > > > > >
> > > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > > >
> > > > > > > I don't think that a fixed 1S is a good choice.
> > > > > >
> > > > > > Well, it could be tweaked to be a little bit longer.
> > > > > >
> > > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > > needs extension to the spec.
> > > > >
> > > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > > timeout or no timeout.
> > > >
> > > > Yes. That is good.
> > > >
> > > > Before introducing this feature, I personally like to use "wait", rather than
> > > > define a timeout.
> > >
> > > Note that the driver still needs to validate what device advertises to
> > > avoid infinite wait.
> >
> > Sorry, I didn't understand what you mean.
>
> I meant the interface needs to carefully designed to
>
> 1) avoid device to advertise a infinite (or very long) timeout
> 2) driver need to have its own max timeout regardless what device advertises


I see.

As far as I know, different operations will take different time.
For example, the queues are initialized one by one when performing
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET commands. If the number of queues is large, then
this time will be very long.

So we should set different timeouts for different commands.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > >
> > > > > >
> > > > > > > Some of the DPUs are very
> > > > > > > lazy for cvq handle.
> > > > > >
> > > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > > better priority or QOS than datapath.
> > > > >
> > > > > Spec says nothing about this, so driver can't assume this either.
> > > > >
> > > > > > > In particular, we will also directly break the device.
> > > > > >
> > > > > > It's kind of hardening for malicious devices.
> > > > >
> > > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > > nice but I think if we do bother then we should try harder to recover,
> > > > > such as by driving device reset.
> > > > >
> > > > >
> > > > > Also, does your patch break surprise removal? There's no callback
> > > > > in this case ATM.
> > > > >
> > > > > > >
> > > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > > >
> > > > > > Very hard and unfriendly to the end users.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > > >
> > > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > > device.
> > > > > > > >
> > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > ---
> > > > > > > > Changes since V1:
> > > > > > > > - break the device when timeout
> > > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > > ---
> > > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > > >
> > > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > > >       return !oom;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > > +{
> > > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > > >  {
> > > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > > >       return err;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > > +
> > > > > > > >  /*
> > > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > >
> > > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > > -      */
> > > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > > -             cpu_relax();
> > > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > +     }
> > > > > > > >
> > > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > > >
> > > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > > >       if (vi->has_cvq) {
> > > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > > >               names[total_vqs - 1] = "control";
> > > > > > > >       }
> > > > > > > >
> > > > > > > > --
> > > > > > > > 2.25.1
> > > > > > > >
> > > > > > > > _______________________________________________
> > > > > > > > Virtualization mailing list
> > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > >
> > > > >
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-29  3:41                   ` Xuan Zhuo
@ 2022-12-29  4:08                     ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-29  4:08 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, kuba, maxime.coquelin, pabeni, davem

On Thu, Dec 29, 2022 at 11:49 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 29 Dec 2022 11:22:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > > > problematic since:
> > > > > > > > >
> > > > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > > > >    command
> > > > > > > > >
> > > > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > > > >
> > > > > > > > I don't think that a fixed 1S is a good choice.
> > > > > > >
> > > > > > > Well, it could be tweaked to be a little bit longer.
> > > > > > >
> > > > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > > > needs extension to the spec.
> > > > > >
> > > > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > > > timeout or no timeout.
> > > > >
> > > > > Yes. That is good.
> > > > >
> > > > > Before introducing this feature, I personally like to use "wait", rather than
> > > > > define a timeout.
> > > >
> > > > Note that the driver still needs to validate what device advertises to
> > > > avoid infinite wait.
> > >
> > > Sorry, I didn't understand what you mean.
> >
> > I meant the interface needs to carefully designed to
> >
> > 1) avoid device to advertise a infinite (or very long) timeout
> > 2) driver need to have its own max timeout regardless what device advertises
>
>
> I see.
>
> As far as I know, different operations will take different time.
> For example, the queues are initialized one by one when performing
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET commands. If the number of queues is large, then
> this time will be very long.

I see. This is the case even for the software backends.

>
> So we should set different timeouts for different commands.

Probably but it would result in a very complex interface, the device
can just choose to advertise the maximum timeout of all the commands
in this case. As discussed, I think we can start a very long timeout.
Is 1 minutes sufficient in this case?

Thanks

>
> Thanks.
>
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > > Some of the DPUs are very
> > > > > > > > lazy for cvq handle.
> > > > > > >
> > > > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > > > better priority or QOS than datapath.
> > > > > >
> > > > > > Spec says nothing about this, so driver can't assume this either.
> > > > > >
> > > > > > > > In particular, we will also directly break the device.
> > > > > > >
> > > > > > > It's kind of hardening for malicious devices.
> > > > > >
> > > > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > > > nice but I think if we do bother then we should try harder to recover,
> > > > > > such as by driving device reset.
> > > > > >
> > > > > >
> > > > > > Also, does your patch break surprise removal? There's no callback
> > > > > > in this case ATM.
> > > > > >
> > > > > > > >
> > > > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > > > >
> > > > > > > Very hard and unfriendly to the end users.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > > >
> > > > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > > > device.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > ---
> > > > > > > > > Changes since V1:
> > > > > > > > > - break the device when timeout
> > > > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > > > ---
> > > > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > > > >
> > > > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > > > >       return !oom;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > > > +{
> > > > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > > > >  {
> > > > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > > > >       return err;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > > > +
> > > > > > > > >  /*
> > > > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > >
> > > > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > -      */
> > > > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > > > -             cpu_relax();
> > > > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > +     }
> > > > > > > > >
> > > > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > > > >
> > > > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > > > >       if (vi->has_cvq) {
> > > > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > > > >               names[total_vqs - 1] = "control";
> > > > > > > > >       }
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > 2.25.1
> > > > > > > > >
> > > > > > > > > _______________________________________________
> > > > > > > > > Virtualization mailing list
> > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-29  4:08                     ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-29  4:08 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, kuba, pabeni, davem

On Thu, Dec 29, 2022 at 11:49 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 29 Dec 2022 11:22:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > > > problematic since:
> > > > > > > > >
> > > > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > > > >    command
> > > > > > > > >
> > > > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > > > >
> > > > > > > > I don't think that a fixed 1S is a good choice.
> > > > > > >
> > > > > > > Well, it could be tweaked to be a little bit longer.
> > > > > > >
> > > > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > > > needs extension to the spec.
> > > > > >
> > > > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > > > timeout or no timeout.
> > > > >
> > > > > Yes. That is good.
> > > > >
> > > > > Before introducing this feature, I personally like to use "wait", rather than
> > > > > define a timeout.
> > > >
> > > > Note that the driver still needs to validate what device advertises to
> > > > avoid infinite wait.
> > >
> > > Sorry, I didn't understand what you mean.
> >
> > I meant the interface needs to carefully designed to
> >
> > 1) avoid device to advertise a infinite (or very long) timeout
> > 2) driver need to have its own max timeout regardless what device advertises
>
>
> I see.
>
> As far as I know, different operations will take different time.
> For example, the queues are initialized one by one when performing
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET commands. If the number of queues is large, then
> this time will be very long.

I see. This is the case even for the software backends.

>
> So we should set different timeouts for different commands.

Probably but it would result in a very complex interface, the device
can just choose to advertise the maximum timeout of all the commands
in this case. As discussed, I think we can start a very long timeout.
Is 1 minutes sufficient in this case?

Thanks

>
> Thanks.
>
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > > Some of the DPUs are very
> > > > > > > > lazy for cvq handle.
> > > > > > >
> > > > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > > > better priority or QOS than datapath.
> > > > > >
> > > > > > Spec says nothing about this, so driver can't assume this either.
> > > > > >
> > > > > > > > In particular, we will also directly break the device.
> > > > > > >
> > > > > > > It's kind of hardening for malicious devices.
> > > > > >
> > > > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > > > nice but I think if we do bother then we should try harder to recover,
> > > > > > such as by driving device reset.
> > > > > >
> > > > > >
> > > > > > Also, does your patch break surprise removal? There's no callback
> > > > > > in this case ATM.
> > > > > >
> > > > > > > >
> > > > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > > > >
> > > > > > > Very hard and unfriendly to the end users.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > > >
> > > > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > > > device.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > ---
> > > > > > > > > Changes since V1:
> > > > > > > > > - break the device when timeout
> > > > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > > > ---
> > > > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > > > >
> > > > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > > > >       return !oom;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > > > +{
> > > > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > > > >  {
> > > > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > > > >       return err;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > > > +
> > > > > > > > >  /*
> > > > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > >
> > > > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > -      */
> > > > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > > > -             cpu_relax();
> > > > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > +     }
> > > > > > > > >
> > > > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > > > >
> > > > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > > > >       if (vi->has_cvq) {
> > > > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > > > >               names[total_vqs - 1] = "control";
> > > > > > > > >       }
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > 2.25.1
> > > > > > > > >
> > > > > > > > > _______________________________________________
> > > > > > > > > Virtualization mailing list
> > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
  2022-12-29  4:08                     ` Jason Wang
@ 2022-12-29  6:13                       ` Xuan Zhuo
  -1 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  6:13 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, kuba, pabeni, davem

On Thu, 29 Dec 2022 12:08:23 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Dec 29, 2022 at 11:49 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 29 Dec 2022 11:22:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > >
> > > > > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > > > > problematic since:
> > > > > > > > > >
> > > > > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > > > > >    command
> > > > > > > > > >
> > > > > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > > > > >
> > > > > > > > > I don't think that a fixed 1S is a good choice.
> > > > > > > >
> > > > > > > > Well, it could be tweaked to be a little bit longer.
> > > > > > > >
> > > > > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > > > > needs extension to the spec.
> > > > > > >
> > > > > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > > > > timeout or no timeout.
> > > > > >
> > > > > > Yes. That is good.
> > > > > >
> > > > > > Before introducing this feature, I personally like to use "wait", rather than
> > > > > > define a timeout.
> > > > >
> > > > > Note that the driver still needs to validate what device advertises to
> > > > > avoid infinite wait.
> > > >
> > > > Sorry, I didn't understand what you mean.
> > >
> > > I meant the interface needs to carefully designed to
> > >
> > > 1) avoid device to advertise a infinite (or very long) timeout
> > > 2) driver need to have its own max timeout regardless what device advertises
> >
> >
> > I see.
> >
> > As far as I know, different operations will take different time.
> > For example, the queues are initialized one by one when performing
> > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET commands. If the number of queues is large, then
> > this time will be very long.
>
> I see. This is the case even for the software backends.
>
> >
> > So we should set different timeouts for different commands.
>
> Probably but it would result in a very complex interface, the device
> can just choose to advertise the maximum timeout of all the commands
> in this case. As discussed, I think we can start a very long timeout.
> Is 1 minutes sufficient in this case?


For now, 1 minutes are safe.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > Some of the DPUs are very
> > > > > > > > > lazy for cvq handle.
> > > > > > > >
> > > > > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > > > > better priority or QOS than datapath.
> > > > > > >
> > > > > > > Spec says nothing about this, so driver can't assume this either.
> > > > > > >
> > > > > > > > > In particular, we will also directly break the device.
> > > > > > > >
> > > > > > > > It's kind of hardening for malicious devices.
> > > > > > >
> > > > > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > > > > nice but I think if we do bother then we should try harder to recover,
> > > > > > > such as by driving device reset.
> > > > > > >
> > > > > > >
> > > > > > > Also, does your patch break surprise removal? There's no callback
> > > > > > > in this case ATM.
> > > > > > >
> > > > > > > > >
> > > > > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > > > > >
> > > > > > > > Very hard and unfriendly to the end users.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > > > > device.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > ---
> > > > > > > > > > Changes since V1:
> > > > > > > > > > - break the device when timeout
> > > > > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > > > > ---
> > > > > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > > > > >
> > > > > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > > > > >       return !oom;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > > > > +{
> > > > > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > > > > >  {
> > > > > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > > > > >       return err;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > > > > +
> > > > > > > > > >  /*
> > > > > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > >
> > > > > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > > -      */
> > > > > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > > > > -             cpu_relax();
> > > > > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > +     }
> > > > > > > > > >
> > > > > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > > > > >
> > > > > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > > > > >       if (vi->has_cvq) {
> > > > > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > > > > >               names[total_vqs - 1] = "control";
> > > > > > > > > >       }
> > > > > > > > > >
> > > > > > > > > > --
> > > > > > > > > > 2.25.1
> > > > > > > > > >
> > > > > > > > > > _______________________________________________
> > > > > > > > > > Virtualization mailing list
> > > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
@ 2022-12-29  6:13                       ` Xuan Zhuo
  0 siblings, 0 replies; 104+ messages in thread
From: Xuan Zhuo @ 2022-12-29  6:13 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, kuba, maxime.coquelin, pabeni, davem

On Thu, 29 Dec 2022 12:08:23 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Dec 29, 2022 at 11:49 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 29 Dec 2022 11:22:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Dec 29, 2022 at 10:10 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Wed, 28 Dec 2022 19:41:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Wed, Dec 28, 2022 at 4:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Tue, 27 Dec 2022 01:58:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > On Tue, Dec 27, 2022 at 12:33:53PM +0800, Jason Wang wrote:
> > > > > > > > On Tue, Dec 27, 2022 at 10:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > >
> > > > > > > > > On Mon, 26 Dec 2022 15:49:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > We used to busy waiting on the cvq command this tends to be
> > > > > > > > > > problematic since:
> > > > > > > > > >
> > > > > > > > > > 1) CPU could wait for ever on a buggy/malicous device
> > > > > > > > > > 2) There's no wait to terminate the process that triggers the cvq
> > > > > > > > > >    command
> > > > > > > > > >
> > > > > > > > > > So this patch switch to use virtqueue_wait_for_used() to sleep with a
> > > > > > > > > > timeout (1s) instead of busy polling for the cvq command forever. This
> > > > > > > > >
> > > > > > > > > I don't think that a fixed 1S is a good choice.
> > > > > > > >
> > > > > > > > Well, it could be tweaked to be a little bit longer.
> > > > > > > >
> > > > > > > > One way, as discussed, is to let the device advertise a timeout then
> > > > > > > > the driver can validate if it's valid and use that timeout. But it
> > > > > > > > needs extension to the spec.
> > > > > > >
> > > > > > > Controlling timeout from device is a good idea, e.g. hardware devices
> > > > > > > would benefit from a shorter timeout, hypervisor devices from a longer
> > > > > > > timeout or no timeout.
> > > > > >
> > > > > > Yes. That is good.
> > > > > >
> > > > > > Before introducing this feature, I personally like to use "wait", rather than
> > > > > > define a timeout.
> > > > >
> > > > > Note that the driver still needs to validate what device advertises to
> > > > > avoid infinite wait.
> > > >
> > > > Sorry, I didn't understand what you mean.
> > >
> > > I meant the interface needs to carefully designed to
> > >
> > > 1) avoid device to advertise a infinite (or very long) timeout
> > > 2) driver need to have its own max timeout regardless what device advertises
> >
> >
> > I see.
> >
> > As far as I know, different operations will take different time.
> > For example, the queues are initialized one by one when performing
> > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET commands. If the number of queues is large, then
> > this time will be very long.
>
> I see. This is the case even for the software backends.
>
> >
> > So we should set different timeouts for different commands.
>
> Probably but it would result in a very complex interface, the device
> can just choose to advertise the maximum timeout of all the commands
> in this case. As discussed, I think we can start a very long timeout.
> Is 1 minutes sufficient in this case?


For now, 1 minutes are safe.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > Some of the DPUs are very
> > > > > > > > > lazy for cvq handle.
> > > > > > > >
> > > > > > > > Such design needs to be revisited, cvq (control path) should have a
> > > > > > > > better priority or QOS than datapath.
> > > > > > >
> > > > > > > Spec says nothing about this, so driver can't assume this either.
> > > > > > >
> > > > > > > > > In particular, we will also directly break the device.
> > > > > > > >
> > > > > > > > It's kind of hardening for malicious devices.
> > > > > > >
> > > > > > > ATM no amount of hardening can prevent a malicious hypervisor from
> > > > > > > blocking the guest. Recovering when a hardware device is broken would be
> > > > > > > nice but I think if we do bother then we should try harder to recover,
> > > > > > > such as by driving device reset.
> > > > > > >
> > > > > > >
> > > > > > > Also, does your patch break surprise removal? There's no callback
> > > > > > > in this case ATM.
> > > > > > >
> > > > > > > > >
> > > > > > > > > I think it is necessary to add a Virtio-Net parameter to allow users to define
> > > > > > > > > this timeout by themselves. Although I don't think this is a good way.
> > > > > > > >
> > > > > > > > Very hard and unfriendly to the end users.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > gives the scheduler a breath and can let the process can respond to
> > > > > > > > > > asignal. If the device doesn't respond in the timeout, break the
> > > > > > > > > > device.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > ---
> > > > > > > > > > Changes since V1:
> > > > > > > > > > - break the device when timeout
> > > > > > > > > > - get buffer manually since the virtio core check more_used() instead
> > > > > > > > > > ---
> > > > > > > > > >  drivers/net/virtio_net.c | 24 ++++++++++++++++--------
> > > > > > > > > >  1 file changed, 16 insertions(+), 8 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > index efd9dd55828b..6a2ea64cfcb5 100644
> > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > > > > > > >       vi->rx_mode_work_enabled = false;
> > > > > > > > > >       spin_unlock_bh(&vi->rx_mode_lock);
> > > > > > > > > >
> > > > > > > > > > +     virtqueue_wake_up(vi->cvq);
> > > > > > > > > >       flush_work(&vi->rx_mode_work);
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
> > > > > > > > > >       return !oom;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static void virtnet_cvq_done(struct virtqueue *cvq)
> > > > > > > > > > +{
> > > > > > > > > > +     virtqueue_wake_up(cvq);
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  static void skb_recv_done(struct virtqueue *rvq)
> > > > > > > > > >  {
> > > > > > > > > >       struct virtnet_info *vi = rvq->vdev->priv;
> > > > > > > > > > @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > > > > >       return err;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static int virtnet_close(struct net_device *dev);
> > > > > > > > > > +
> > > > > > > > > >  /*
> > > > > > > > > >   * Send command via the control virtqueue and check status.  Commands
> > > > > > > > > >   * supported by the hypervisor, as indicated by feature bits, should
> > > > > > > > > > @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > >       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > > > > > > >               return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > >
> > > > > > > > > > -     /* Spin for a response, the kick causes an ioport write, trapping
> > > > > > > > > > -      * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > > -      */
> > > > > > > > > > -     while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > > -            !virtqueue_is_broken(vi->cvq))
> > > > > > > > > > -             cpu_relax();
> > > > > > > > > > +     if (virtqueue_wait_for_used(vi->cvq)) {
> > > > > > > > > > +             virtqueue_get_buf(vi->cvq, &tmp);
> > > > > > > > > > +             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > +     }
> > > > > > > > > >
> > > > > > > > > > -     return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > +     netdev_err(vi->dev, "CVQ command timeout, break the virtio device.");
> > > > > > > > > > +     virtio_break_device(vi->vdev);
> > > > > > > > > > +     return VIRTIO_NET_ERR;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > > > > > > > > @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > > > > > > > > >
> > > > > > > > > >       /* Parameters for control virtqueue, if any */
> > > > > > > > > >       if (vi->has_cvq) {
> > > > > > > > > > -             callbacks[total_vqs - 1] = NULL;
> > > > > > > > > > +             callbacks[total_vqs - 1] = virtnet_cvq_done;
> > > > > > > > > >               names[total_vqs - 1] = "control";
> > > > > > > > > >       }
> > > > > > > > > >
> > > > > > > > > > --
> > > > > > > > > > 2.25.1
> > > > > > > > > >
> > > > > > > > > > _______________________________________________
> > > > > > > > > > Virtualization mailing list
> > > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-28 11:53                 ` Jason Wang
@ 2022-12-29  7:07                   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-29  7:07 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > >>>>> But device is still going and will later use the buffers.
> > >>>>>
> > >>>>> Same for timeout really.
> > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > >>>> If we think the timeout is hard, we can start from the wait.
> > >>>>
> > >>>> Thanks
> > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > >>> a spike in CPU usage might be unwelcome.
> > >>
> > >> Yes, this would be more obvious is UP is used.
> > >>
> > >>
> > >>> things we should be careful to address then:
> > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > >>>      in a loop for a while, and we also get a backtrace.
> > >>>      E.g. with this - how do we know who has the RTNL?
> > >>>      We need to integrate with kernel/watchdog.c for good results
> > >>>      and to make sure policy is consistent.
> > >>
> > >> That's fine, will consider this.
> 
> So after some investigation, it seems the watchdog.c doesn't help. The
> only export helper is touch_softlockup_watchdog() which tries to avoid
> triggering the lockups warning for the known slow path.

I never said you can just use existing exporting APIs. You'll have to
write new ones :)

> And before the patch, we end up with a real infinite loop which could
> be caught by RCU stall detector which is not the case of the sleep.
> What we can do is probably do a periodic netdev_err().
> 
> Thanks

Only with a bad device.

> > >>
> > >>
> > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > >>>      programming timers etc has a very high overhead, at bootup
> > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > >>>      let's poll for a bit before waiting?
> > >>
> > >> Then we go back to the question of choosing a good timeout for poll. And
> > >> poll seems problematic in the case of UP, scheduler might not have the
> > >> chance to run.
> > > Poll just a bit :) Seriously I don't know, but at least check once
> > > after kick.
> >
> >
> > I think it is what the current code did where the condition will be
> > check before trying to sleep in the wait_event().
> >
> >
> > >
> > >>> 3- suprise removal. need to wake up thread in some way. what about
> > >>>      other cases of device breakage - is there a chance this
> > >>>      introduces new bugs around that? at least enumerate them please.
> > >>
> > >> The current code did:
> > >>
> > >> 1) check for vq->broken
> > >> 2) wakeup during BAD_RING()
> > >>
> > >> So we won't end up with a never woke up process which should be fine.
> > >>
> > >> Thanks
> > >
> > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > idea - can cause crashes if kernel panics on error.
> >
> >
> > Yes, it's better to use __virtqueue_break() instead.
> >
> > But consider we will start from a wait first, I will limit the changes
> > in virtio-net without bothering virtio core.
> >
> > Thanks
> >
> >
> > >
> > >>>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-29  7:07                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-29  7:07 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > >>>>> But device is still going and will later use the buffers.
> > >>>>>
> > >>>>> Same for timeout really.
> > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > >>>> If we think the timeout is hard, we can start from the wait.
> > >>>>
> > >>>> Thanks
> > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > >>> a spike in CPU usage might be unwelcome.
> > >>
> > >> Yes, this would be more obvious is UP is used.
> > >>
> > >>
> > >>> things we should be careful to address then:
> > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > >>>      in a loop for a while, and we also get a backtrace.
> > >>>      E.g. with this - how do we know who has the RTNL?
> > >>>      We need to integrate with kernel/watchdog.c for good results
> > >>>      and to make sure policy is consistent.
> > >>
> > >> That's fine, will consider this.
> 
> So after some investigation, it seems the watchdog.c doesn't help. The
> only export helper is touch_softlockup_watchdog() which tries to avoid
> triggering the lockups warning for the known slow path.

I never said you can just use existing exporting APIs. You'll have to
write new ones :)

> And before the patch, we end up with a real infinite loop which could
> be caught by RCU stall detector which is not the case of the sleep.
> What we can do is probably do a periodic netdev_err().
> 
> Thanks

Only with a bad device.

> > >>
> > >>
> > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > >>>      programming timers etc has a very high overhead, at bootup
> > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > >>>      let's poll for a bit before waiting?
> > >>
> > >> Then we go back to the question of choosing a good timeout for poll. And
> > >> poll seems problematic in the case of UP, scheduler might not have the
> > >> chance to run.
> > > Poll just a bit :) Seriously I don't know, but at least check once
> > > after kick.
> >
> >
> > I think it is what the current code did where the condition will be
> > check before trying to sleep in the wait_event().
> >
> >
> > >
> > >>> 3- suprise removal. need to wake up thread in some way. what about
> > >>>      other cases of device breakage - is there a chance this
> > >>>      introduces new bugs around that? at least enumerate them please.
> > >>
> > >> The current code did:
> > >>
> > >> 1) check for vq->broken
> > >> 2) wakeup during BAD_RING()
> > >>
> > >> So we won't end up with a never woke up process which should be fine.
> > >>
> > >> Thanks
> > >
> > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > idea - can cause crashes if kernel panics on error.
> >
> >
> > Yes, it's better to use __virtqueue_break() instead.
> >
> > But consider we will start from a wait first, I will limit the changes
> > in virtio-net without bothering virtio core.
> >
> > Thanks
> >
> >
> > >
> > >>>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-29  7:07                   ` Michael S. Tsirkin
@ 2022-12-29  8:04                     ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-29  8:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > >
> > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > >>>>> But device is still going and will later use the buffers.
> > > >>>>>
> > > >>>>> Same for timeout really.
> > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > >>>> If we think the timeout is hard, we can start from the wait.
> > > >>>>
> > > >>>> Thanks
> > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > >>> a spike in CPU usage might be unwelcome.
> > > >>
> > > >> Yes, this would be more obvious is UP is used.
> > > >>
> > > >>
> > > >>> things we should be careful to address then:
> > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > >>>      in a loop for a while, and we also get a backtrace.
> > > >>>      E.g. with this - how do we know who has the RTNL?
> > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > >>>      and to make sure policy is consistent.
> > > >>
> > > >> That's fine, will consider this.
> >
> > So after some investigation, it seems the watchdog.c doesn't help. The
> > only export helper is touch_softlockup_watchdog() which tries to avoid
> > triggering the lockups warning for the known slow path.
>
> I never said you can just use existing exporting APIs. You'll have to
> write new ones :)

Ok, I thought you wanted to trigger similar warnings as a watchdog.

Btw, I wonder what kind of logic you want here. If we switch to using
sleep, there won't be soft lockup anymore. A simple wait + timeout +
warning seems sufficient?

Thanks

>
> > And before the patch, we end up with a real infinite loop which could
> > be caught by RCU stall detector which is not the case of the sleep.
> > What we can do is probably do a periodic netdev_err().
> >
> > Thanks
>
> Only with a bad device.
>
> > > >>
> > > >>
> > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > >>>      programming timers etc has a very high overhead, at bootup
> > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > >>>      let's poll for a bit before waiting?
> > > >>
> > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > >> chance to run.
> > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > after kick.
> > >
> > >
> > > I think it is what the current code did where the condition will be
> > > check before trying to sleep in the wait_event().
> > >
> > >
> > > >
> > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > >>>      other cases of device breakage - is there a chance this
> > > >>>      introduces new bugs around that? at least enumerate them please.
> > > >>
> > > >> The current code did:
> > > >>
> > > >> 1) check for vq->broken
> > > >> 2) wakeup during BAD_RING()
> > > >>
> > > >> So we won't end up with a never woke up process which should be fine.
> > > >>
> > > >> Thanks
> > > >
> > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > idea - can cause crashes if kernel panics on error.
> > >
> > >
> > > Yes, it's better to use __virtqueue_break() instead.
> > >
> > > But consider we will start from a wait first, I will limit the changes
> > > in virtio-net without bothering virtio core.
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >>>
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-29  8:04                     ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-29  8:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > >
> > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > >>>>> But device is still going and will later use the buffers.
> > > >>>>>
> > > >>>>> Same for timeout really.
> > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > >>>> If we think the timeout is hard, we can start from the wait.
> > > >>>>
> > > >>>> Thanks
> > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > >>> a spike in CPU usage might be unwelcome.
> > > >>
> > > >> Yes, this would be more obvious is UP is used.
> > > >>
> > > >>
> > > >>> things we should be careful to address then:
> > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > >>>      in a loop for a while, and we also get a backtrace.
> > > >>>      E.g. with this - how do we know who has the RTNL?
> > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > >>>      and to make sure policy is consistent.
> > > >>
> > > >> That's fine, will consider this.
> >
> > So after some investigation, it seems the watchdog.c doesn't help. The
> > only export helper is touch_softlockup_watchdog() which tries to avoid
> > triggering the lockups warning for the known slow path.
>
> I never said you can just use existing exporting APIs. You'll have to
> write new ones :)

Ok, I thought you wanted to trigger similar warnings as a watchdog.

Btw, I wonder what kind of logic you want here. If we switch to using
sleep, there won't be soft lockup anymore. A simple wait + timeout +
warning seems sufficient?

Thanks

>
> > And before the patch, we end up with a real infinite loop which could
> > be caught by RCU stall detector which is not the case of the sleep.
> > What we can do is probably do a periodic netdev_err().
> >
> > Thanks
>
> Only with a bad device.
>
> > > >>
> > > >>
> > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > >>>      programming timers etc has a very high overhead, at bootup
> > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > >>>      let's poll for a bit before waiting?
> > > >>
> > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > >> chance to run.
> > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > after kick.
> > >
> > >
> > > I think it is what the current code did where the condition will be
> > > check before trying to sleep in the wait_event().
> > >
> > >
> > > >
> > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > >>>      other cases of device breakage - is there a chance this
> > > >>>      introduces new bugs around that? at least enumerate them please.
> > > >>
> > > >> The current code did:
> > > >>
> > > >> 1) check for vq->broken
> > > >> 2) wakeup during BAD_RING()
> > > >>
> > > >> So we won't end up with a never woke up process which should be fine.
> > > >>
> > > >> Thanks
> > > >
> > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > idea - can cause crashes if kernel panics on error.
> > >
> > >
> > > Yes, it's better to use __virtqueue_break() instead.
> > >
> > > But consider we will start from a wait first, I will limit the changes
> > > in virtio-net without bothering virtio core.
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >>>
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-29  8:04                     ` Jason Wang
@ 2022-12-29  8:10                       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-29  8:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > >
> > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > >>>>> But device is still going and will later use the buffers.
> > > > >>>>>
> > > > >>>>> Same for timeout really.
> > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > >>>>
> > > > >>>> Thanks
> > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > >>> a spike in CPU usage might be unwelcome.
> > > > >>
> > > > >> Yes, this would be more obvious is UP is used.
> > > > >>
> > > > >>
> > > > >>> things we should be careful to address then:
> > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > >>>      and to make sure policy is consistent.
> > > > >>
> > > > >> That's fine, will consider this.
> > >
> > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > triggering the lockups warning for the known slow path.
> >
> > I never said you can just use existing exporting APIs. You'll have to
> > write new ones :)
> 
> Ok, I thought you wanted to trigger similar warnings as a watchdog.
> 
> Btw, I wonder what kind of logic you want here. If we switch to using
> sleep, there won't be soft lockup anymore. A simple wait + timeout +
> warning seems sufficient?
> 
> Thanks

I'd like to avoid need to teach users new APIs. So watchdog setup to apply
to this driver. The warning can be different.


> >
> > > And before the patch, we end up with a real infinite loop which could
> > > be caught by RCU stall detector which is not the case of the sleep.
> > > What we can do is probably do a periodic netdev_err().
> > >
> > > Thanks
> >
> > Only with a bad device.
> >
> > > > >>
> > > > >>
> > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > >>>      let's poll for a bit before waiting?
> > > > >>
> > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > >> chance to run.
> > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > after kick.
> > > >
> > > >
> > > > I think it is what the current code did where the condition will be
> > > > check before trying to sleep in the wait_event().
> > > >
> > > >
> > > > >
> > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > >>>      other cases of device breakage - is there a chance this
> > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > >>
> > > > >> The current code did:
> > > > >>
> > > > >> 1) check for vq->broken
> > > > >> 2) wakeup during BAD_RING()
> > > > >>
> > > > >> So we won't end up with a never woke up process which should be fine.
> > > > >>
> > > > >> Thanks
> > > > >
> > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > idea - can cause crashes if kernel panics on error.
> > > >
> > > >
> > > > Yes, it's better to use __virtqueue_break() instead.
> > > >
> > > > But consider we will start from a wait first, I will limit the changes
> > > > in virtio-net without bothering virtio core.
> > > >
> > > > Thanks
> > > >
> > > >
> > > > >
> > > > >>>
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-29  8:10                       ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2022-12-29  8:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > >
> > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > >>>>> But device is still going and will later use the buffers.
> > > > >>>>>
> > > > >>>>> Same for timeout really.
> > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > >>>>
> > > > >>>> Thanks
> > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > >>> a spike in CPU usage might be unwelcome.
> > > > >>
> > > > >> Yes, this would be more obvious is UP is used.
> > > > >>
> > > > >>
> > > > >>> things we should be careful to address then:
> > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > >>>      and to make sure policy is consistent.
> > > > >>
> > > > >> That's fine, will consider this.
> > >
> > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > triggering the lockups warning for the known slow path.
> >
> > I never said you can just use existing exporting APIs. You'll have to
> > write new ones :)
> 
> Ok, I thought you wanted to trigger similar warnings as a watchdog.
> 
> Btw, I wonder what kind of logic you want here. If we switch to using
> sleep, there won't be soft lockup anymore. A simple wait + timeout +
> warning seems sufficient?
> 
> Thanks

I'd like to avoid need to teach users new APIs. So watchdog setup to apply
to this driver. The warning can be different.


> >
> > > And before the patch, we end up with a real infinite loop which could
> > > be caught by RCU stall detector which is not the case of the sleep.
> > > What we can do is probably do a periodic netdev_err().
> > >
> > > Thanks
> >
> > Only with a bad device.
> >
> > > > >>
> > > > >>
> > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > >>>      let's poll for a bit before waiting?
> > > > >>
> > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > >> chance to run.
> > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > after kick.
> > > >
> > > >
> > > > I think it is what the current code did where the condition will be
> > > > check before trying to sleep in the wait_event().
> > > >
> > > >
> > > > >
> > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > >>>      other cases of device breakage - is there a chance this
> > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > >>
> > > > >> The current code did:
> > > > >>
> > > > >> 1) check for vq->broken
> > > > >> 2) wakeup during BAD_RING()
> > > > >>
> > > > >> So we won't end up with a never woke up process which should be fine.
> > > > >>
> > > > >> Thanks
> > > > >
> > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > idea - can cause crashes if kernel panics on error.
> > > >
> > > >
> > > > Yes, it's better to use __virtqueue_break() instead.
> > > >
> > > > But consider we will start from a wait first, I will limit the changes
> > > > in virtio-net without bothering virtio core.
> > > >
> > > > Thanks
> > > >
> > > >
> > > > >
> > > > >>>
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
  2022-12-27  9:06       ` Jason Wang
  (?)
@ 2022-12-30  2:51       ` Jakub Kicinski
  2022-12-30  3:40           ` Jason Wang
  -1 siblings, 1 reply; 104+ messages in thread
From: Jakub Kicinski @ 2022-12-30  2:51 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S. Tsirkin, davem, edumazet, pabeni, virtualization,
	netdev, linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, 27 Dec 2022 17:06:10 +0800 Jason Wang wrote:
> > Hmm so user tells us to e.g enable promisc. We report completion
> > but card is still dropping packets. I think this
> > has a chance to break some setups.  
> 
> I think all those filters are best efforts, am I wrong?

Are the flags protected by the addr lock which needs BH, tho?

Taking netif_addr_lock_bh() to look at dev->flags seems a bit 
surprising to me.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
  2022-12-30  2:51       ` Jakub Kicinski
@ 2022-12-30  3:40           ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-30  3:40 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Michael S. Tsirkin, netdev, linux-kernel, virtualization,
	eperezma, edumazet, maxime.coquelin, pabeni, davem

On Fri, Dec 30, 2022 at 10:51 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 27 Dec 2022 17:06:10 +0800 Jason Wang wrote:
> > > Hmm so user tells us to e.g enable promisc. We report completion
> > > but card is still dropping packets. I think this
> > > has a chance to break some setups.
> >
> > I think all those filters are best efforts, am I wrong?
>
> Are the flags protected by the addr lock which needs BH, tho?
>
> Taking netif_addr_lock_bh() to look at dev->flags seems a bit
> surprising to me.
>

Yes, RTNL should be sufficient here. Will fix it.

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
@ 2022-12-30  3:40           ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-30  3:40 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Michael S. Tsirkin, davem, edumazet, pabeni, virtualization,
	netdev, linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Fri, Dec 30, 2022 at 10:51 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 27 Dec 2022 17:06:10 +0800 Jason Wang wrote:
> > > Hmm so user tells us to e.g enable promisc. We report completion
> > > but card is still dropping packets. I think this
> > > has a chance to break some setups.
> >
> > I think all those filters are best efforts, am I wrong?
>
> Are the flags protected by the addr lock which needs BH, tho?
>
> Taking netif_addr_lock_bh() to look at dev->flags seems a bit
> surprising to me.
>

Yes, RTNL should be sufficient here. Will fix it.

Thanks


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-29  8:10                       ` Michael S. Tsirkin
@ 2022-12-30  3:43                         ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-30  3:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > >
> > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > >>>>> But device is still going and will later use the buffers.
> > > > > >>>>>
> > > > > >>>>> Same for timeout really.
> > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > >>>>
> > > > > >>>> Thanks
> > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > >>
> > > > > >> Yes, this would be more obvious is UP is used.
> > > > > >>
> > > > > >>
> > > > > >>> things we should be careful to address then:
> > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > >>>      and to make sure policy is consistent.
> > > > > >>
> > > > > >> That's fine, will consider this.
> > > >
> > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > triggering the lockups warning for the known slow path.
> > >
> > > I never said you can just use existing exporting APIs. You'll have to
> > > write new ones :)
> >
> > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> >
> > Btw, I wonder what kind of logic you want here. If we switch to using
> > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > warning seems sufficient?
> >
> > Thanks
>
> I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> to this driver. The warning can be different.

Right, so it looks to me the only possible setup is the
watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
second (as softlockup did).

And I think it would still make sense to fail, we can start with a
very long timeout like 1 minutes and break the device. Does this make
sense?

Thanks

>
>
> > >
> > > > And before the patch, we end up with a real infinite loop which could
> > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > What we can do is probably do a periodic netdev_err().
> > > >
> > > > Thanks
> > >
> > > Only with a bad device.
> > >
> > > > > >>
> > > > > >>
> > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > >>>      let's poll for a bit before waiting?
> > > > > >>
> > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > >> chance to run.
> > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > after kick.
> > > > >
> > > > >
> > > > > I think it is what the current code did where the condition will be
> > > > > check before trying to sleep in the wait_event().
> > > > >
> > > > >
> > > > > >
> > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > >>>      other cases of device breakage - is there a chance this
> > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > >>
> > > > > >> The current code did:
> > > > > >>
> > > > > >> 1) check for vq->broken
> > > > > >> 2) wakeup during BAD_RING()
> > > > > >>
> > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > >>
> > > > > >> Thanks
> > > > > >
> > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > idea - can cause crashes if kernel panics on error.
> > > > >
> > > > >
> > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > >
> > > > > But consider we will start from a wait first, I will limit the changes
> > > > > in virtio-net without bothering virtio core.
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > >>>
> > >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2022-12-30  3:43                         ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2022-12-30  3:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > >
> > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > >>>>> But device is still going and will later use the buffers.
> > > > > >>>>>
> > > > > >>>>> Same for timeout really.
> > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > >>>>
> > > > > >>>> Thanks
> > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > >>
> > > > > >> Yes, this would be more obvious is UP is used.
> > > > > >>
> > > > > >>
> > > > > >>> things we should be careful to address then:
> > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > >>>      and to make sure policy is consistent.
> > > > > >>
> > > > > >> That's fine, will consider this.
> > > >
> > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > triggering the lockups warning for the known slow path.
> > >
> > > I never said you can just use existing exporting APIs. You'll have to
> > > write new ones :)
> >
> > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> >
> > Btw, I wonder what kind of logic you want here. If we switch to using
> > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > warning seems sufficient?
> >
> > Thanks
>
> I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> to this driver. The warning can be different.

Right, so it looks to me the only possible setup is the
watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
second (as softlockup did).

And I think it would still make sense to fail, we can start with a
very long timeout like 1 minutes and break the device. Does this make
sense?

Thanks

>
>
> > >
> > > > And before the patch, we end up with a real infinite loop which could
> > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > What we can do is probably do a periodic netdev_err().
> > > >
> > > > Thanks
> > >
> > > Only with a bad device.
> > >
> > > > > >>
> > > > > >>
> > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > >>>      let's poll for a bit before waiting?
> > > > > >>
> > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > >> chance to run.
> > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > after kick.
> > > > >
> > > > >
> > > > > I think it is what the current code did where the condition will be
> > > > > check before trying to sleep in the wait_event().
> > > > >
> > > > >
> > > > > >
> > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > >>>      other cases of device breakage - is there a chance this
> > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > >>
> > > > > >> The current code did:
> > > > > >>
> > > > > >> 1) check for vq->broken
> > > > > >> 2) wakeup during BAD_RING()
> > > > > >>
> > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > >>
> > > > > >> Thanks
> > > > > >
> > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > idea - can cause crashes if kernel panics on error.
> > > > >
> > > > >
> > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > >
> > > > > But consider we will start from a wait first, I will limit the changes
> > > > > in virtio-net without bothering virtio core.
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > >>>
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2022-12-30  3:43                         ` Jason Wang
@ 2023-01-27 10:35                           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-27 10:35 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > >
> > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > >>>>>
> > > > > > >>>>> Same for timeout really.
> > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > >>>>
> > > > > > >>>> Thanks
> > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > >>
> > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > >>
> > > > > > >>
> > > > > > >>> things we should be careful to address then:
> > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > >>>      and to make sure policy is consistent.
> > > > > > >>
> > > > > > >> That's fine, will consider this.
> > > > >
> > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > triggering the lockups warning for the known slow path.
> > > >
> > > > I never said you can just use existing exporting APIs. You'll have to
> > > > write new ones :)
> > >
> > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > >
> > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > warning seems sufficient?
> > >
> > > Thanks
> >
> > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > to this driver. The warning can be different.
> 
> Right, so it looks to me the only possible setup is the
> watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> second (as softlockup did).
> 
> And I think it would still make sense to fail, we can start with a
> very long timeout like 1 minutes and break the device. Does this make
> sense?
> 
> Thanks

I'd say we need to make this manageable then. Can't we do it normally
e.g. react to an interrupt to return to userspace?



> >
> >
> > > >
> > > > > And before the patch, we end up with a real infinite loop which could
> > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > What we can do is probably do a periodic netdev_err().
> > > > >
> > > > > Thanks
> > > >
> > > > Only with a bad device.
> > > >
> > > > > > >>
> > > > > > >>
> > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > >>>      let's poll for a bit before waiting?
> > > > > > >>
> > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > >> chance to run.
> > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > after kick.
> > > > > >
> > > > > >
> > > > > > I think it is what the current code did where the condition will be
> > > > > > check before trying to sleep in the wait_event().
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > >>
> > > > > > >> The current code did:
> > > > > > >>
> > > > > > >> 1) check for vq->broken
> > > > > > >> 2) wakeup during BAD_RING()
> > > > > > >>
> > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > >>
> > > > > > >> Thanks
> > > > > > >
> > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > >
> > > > > >
> > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > >
> > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > in virtio-net without bothering virtio core.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >>>
> > > >
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-27 10:35                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-27 10:35 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > >
> > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > >>>>>
> > > > > > >>>>> Same for timeout really.
> > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > >>>>
> > > > > > >>>> Thanks
> > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > >>
> > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > >>
> > > > > > >>
> > > > > > >>> things we should be careful to address then:
> > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > >>>      and to make sure policy is consistent.
> > > > > > >>
> > > > > > >> That's fine, will consider this.
> > > > >
> > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > triggering the lockups warning for the known slow path.
> > > >
> > > > I never said you can just use existing exporting APIs. You'll have to
> > > > write new ones :)
> > >
> > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > >
> > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > warning seems sufficient?
> > >
> > > Thanks
> >
> > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > to this driver. The warning can be different.
> 
> Right, so it looks to me the only possible setup is the
> watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> second (as softlockup did).
> 
> And I think it would still make sense to fail, we can start with a
> very long timeout like 1 minutes and break the device. Does this make
> sense?
> 
> Thanks

I'd say we need to make this manageable then. Can't we do it normally
e.g. react to an interrupt to return to userspace?



> >
> >
> > > >
> > > > > And before the patch, we end up with a real infinite loop which could
> > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > What we can do is probably do a periodic netdev_err().
> > > > >
> > > > > Thanks
> > > >
> > > > Only with a bad device.
> > > >
> > > > > > >>
> > > > > > >>
> > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > >>>      let's poll for a bit before waiting?
> > > > > > >>
> > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > >> chance to run.
> > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > after kick.
> > > > > >
> > > > > >
> > > > > > I think it is what the current code did where the condition will be
> > > > > > check before trying to sleep in the wait_event().
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > >>
> > > > > > >> The current code did:
> > > > > > >>
> > > > > > >> 1) check for vq->broken
> > > > > > >> 2) wakeup during BAD_RING()
> > > > > > >>
> > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > >>
> > > > > > >> Thanks
> > > > > > >
> > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > >
> > > > > >
> > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > >
> > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > in virtio-net without bothering virtio core.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >>>
> > > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-27 10:35                           ` Michael S. Tsirkin
@ 2023-01-29  5:48                             ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-29  5:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > >>>>>
> > > > > > > >>>>> Same for timeout really.
> > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > >>>>
> > > > > > > >>>> Thanks
> > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > >>
> > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > >>
> > > > > > > >>
> > > > > > > >>> things we should be careful to address then:
> > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > >>
> > > > > > > >> That's fine, will consider this.
> > > > > >
> > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > triggering the lockups warning for the known slow path.
> > > > >
> > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > write new ones :)
> > > >
> > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > >
> > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > warning seems sufficient?
> > > >
> > > > Thanks
> > >
> > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > to this driver. The warning can be different.
> >
> > Right, so it looks to me the only possible setup is the
> > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > second (as softlockup did).
> >
> > And I think it would still make sense to fail, we can start with a
> > very long timeout like 1 minutes and break the device. Does this make
> > sense?
> >
> > Thanks
>
> I'd say we need to make this manageable then.

Did you mean something like sysfs or module parameters?

> Can't we do it normally
> e.g. react to an interrupt to return to userspace?

I didn't get the meaning of this. Sorry.

Thanks

>
>
>
> > >
> > >
> > > > >
> > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > What we can do is probably do a periodic netdev_err().
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > Only with a bad device.
> > > > >
> > > > > > > >>
> > > > > > > >>
> > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > >>
> > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > >> chance to run.
> > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > after kick.
> > > > > > >
> > > > > > >
> > > > > > > I think it is what the current code did where the condition will be
> > > > > > > check before trying to sleep in the wait_event().
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > >>
> > > > > > > >> The current code did:
> > > > > > > >>
> > > > > > > >> 1) check for vq->broken
> > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > >>
> > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > >>
> > > > > > > >> Thanks
> > > > > > > >
> > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > >
> > > > > > >
> > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > >
> > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > in virtio-net without bothering virtio core.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >>>
> > > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-29  5:48                             ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-29  5:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > >>>>>
> > > > > > > >>>>> Same for timeout really.
> > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > >>>>
> > > > > > > >>>> Thanks
> > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > >>
> > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > >>
> > > > > > > >>
> > > > > > > >>> things we should be careful to address then:
> > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > >>
> > > > > > > >> That's fine, will consider this.
> > > > > >
> > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > triggering the lockups warning for the known slow path.
> > > > >
> > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > write new ones :)
> > > >
> > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > >
> > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > warning seems sufficient?
> > > >
> > > > Thanks
> > >
> > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > to this driver. The warning can be different.
> >
> > Right, so it looks to me the only possible setup is the
> > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > second (as softlockup did).
> >
> > And I think it would still make sense to fail, we can start with a
> > very long timeout like 1 minutes and break the device. Does this make
> > sense?
> >
> > Thanks
>
> I'd say we need to make this manageable then.

Did you mean something like sysfs or module parameters?

> Can't we do it normally
> e.g. react to an interrupt to return to userspace?

I didn't get the meaning of this. Sorry.

Thanks

>
>
>
> > >
> > >
> > > > >
> > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > What we can do is probably do a periodic netdev_err().
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > Only with a bad device.
> > > > >
> > > > > > > >>
> > > > > > > >>
> > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > >>
> > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > >> chance to run.
> > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > after kick.
> > > > > > >
> > > > > > >
> > > > > > > I think it is what the current code did where the condition will be
> > > > > > > check before trying to sleep in the wait_event().
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > >>
> > > > > > > >> The current code did:
> > > > > > > >>
> > > > > > > >> 1) check for vq->broken
> > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > >>
> > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > >>
> > > > > > > >> Thanks
> > > > > > > >
> > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > >
> > > > > > >
> > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > >
> > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > in virtio-net without bothering virtio core.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >>>
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-29  5:48                             ` Jason Wang
@ 2023-01-29  7:30                               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-29  7:30 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > >>>>>
> > > > > > > > >>>>> Same for timeout really.
> > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > >>>>
> > > > > > > > >>>> Thanks
> > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > >>
> > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >>> things we should be careful to address then:
> > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > >>
> > > > > > > > >> That's fine, will consider this.
> > > > > > >
> > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > triggering the lockups warning for the known slow path.
> > > > > >
> > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > write new ones :)
> > > > >
> > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > >
> > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > warning seems sufficient?
> > > > >
> > > > > Thanks
> > > >
> > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > to this driver. The warning can be different.
> > >
> > > Right, so it looks to me the only possible setup is the
> > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > second (as softlockup did).
> > >
> > > And I think it would still make sense to fail, we can start with a
> > > very long timeout like 1 minutes and break the device. Does this make
> > > sense?
> > >
> > > Thanks
> >
> > I'd say we need to make this manageable then.
> 
> Did you mean something like sysfs or module parameters?

No I'd say pass it with an ioctl.

> > Can't we do it normally
> > e.g. react to an interrupt to return to userspace?
> 
> I didn't get the meaning of this. Sorry.
> 
> Thanks

Standard way to handle things that can timeout and where userspace
did not supply the time is to block until an interrupt
then return EINTR. Userspace controls the timeout by
using e.g. alarm(2).


> >
> >
> >
> > > >
> > > >
> > > > > >
> > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > Only with a bad device.
> > > > > >
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > >>
> > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > >> chance to run.
> > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > after kick.
> > > > > > > >
> > > > > > > >
> > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > >>
> > > > > > > > >> The current code did:
> > > > > > > > >>
> > > > > > > > >> 1) check for vq->broken
> > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > >>
> > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > >>
> > > > > > > > >> Thanks
> > > > > > > > >
> > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > >
> > > > > > > >
> > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > >
> > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >>>
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-29  7:30                               ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-29  7:30 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > >>>>>
> > > > > > > > >>>>> Same for timeout really.
> > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > >>>>
> > > > > > > > >>>> Thanks
> > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > >>
> > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >>> things we should be careful to address then:
> > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > >>
> > > > > > > > >> That's fine, will consider this.
> > > > > > >
> > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > triggering the lockups warning for the known slow path.
> > > > > >
> > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > write new ones :)
> > > > >
> > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > >
> > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > warning seems sufficient?
> > > > >
> > > > > Thanks
> > > >
> > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > to this driver. The warning can be different.
> > >
> > > Right, so it looks to me the only possible setup is the
> > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > second (as softlockup did).
> > >
> > > And I think it would still make sense to fail, we can start with a
> > > very long timeout like 1 minutes and break the device. Does this make
> > > sense?
> > >
> > > Thanks
> >
> > I'd say we need to make this manageable then.
> 
> Did you mean something like sysfs or module parameters?

No I'd say pass it with an ioctl.

> > Can't we do it normally
> > e.g. react to an interrupt to return to userspace?
> 
> I didn't get the meaning of this. Sorry.
> 
> Thanks

Standard way to handle things that can timeout and where userspace
did not supply the time is to block until an interrupt
then return EINTR. Userspace controls the timeout by
using e.g. alarm(2).


> >
> >
> >
> > > >
> > > >
> > > > > >
> > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > Only with a bad device.
> > > > > >
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > >>
> > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > >> chance to run.
> > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > after kick.
> > > > > > > >
> > > > > > > >
> > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > >>
> > > > > > > > >> The current code did:
> > > > > > > > >>
> > > > > > > > >> 1) check for vq->broken
> > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > >>
> > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > >>
> > > > > > > > >> Thanks
> > > > > > > > >
> > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > >
> > > > > > > >
> > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > >
> > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >>>
> > > > > >
> > > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-29  7:30                               ` Michael S. Tsirkin
@ 2023-01-30  2:53                                 ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-30  2:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > >>>>>
> > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > >>>>
> > > > > > > > > >>>> Thanks
> > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > >>
> > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > >>
> > > > > > > > > >>
> > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > >>
> > > > > > > > > >> That's fine, will consider this.
> > > > > > > >
> > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > >
> > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > write new ones :)
> > > > > >
> > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > >
> > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > warning seems sufficient?
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > to this driver. The warning can be different.
> > > >
> > > > Right, so it looks to me the only possible setup is the
> > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > second (as softlockup did).
> > > >
> > > > And I think it would still make sense to fail, we can start with a
> > > > very long timeout like 1 minutes and break the device. Does this make
> > > > sense?
> > > >
> > > > Thanks
> > >
> > > I'd say we need to make this manageable then.
> >
> > Did you mean something like sysfs or module parameters?
>
> No I'd say pass it with an ioctl.
>
> > > Can't we do it normally
> > > e.g. react to an interrupt to return to userspace?
> >
> > I didn't get the meaning of this. Sorry.
> >
> > Thanks
>
> Standard way to handle things that can timeout and where userspace
> did not supply the time is to block until an interrupt
> then return EINTR.

Well this seems to be a huge change, ioctl(2) doesn't say it can
return EINTR now.

Actually, a driver timeout is used by other drivers when using
controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
to avoid false negatives) seems to be a good first step.

> Userspace controls the timeout by
> using e.g. alarm(2).

Not used in iproute2 after a git grep.

Thanks

>
>
> > >
> > >
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > Only with a bad device.
> > > > > > >
> > > > > > > > > >>
> > > > > > > > > >>
> > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > >>
> > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > >> chance to run.
> > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > after kick.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > >>
> > > > > > > > > >> The current code did:
> > > > > > > > > >>
> > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > >>
> > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > >>
> > > > > > > > > >> Thanks
> > > > > > > > > >
> > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > >
> > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >>>
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-30  2:53                                 ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-30  2:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > >>>>>
> > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > >>>>
> > > > > > > > > >>>> Thanks
> > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > >>
> > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > >>
> > > > > > > > > >>
> > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > >>
> > > > > > > > > >> That's fine, will consider this.
> > > > > > > >
> > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > >
> > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > write new ones :)
> > > > > >
> > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > >
> > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > warning seems sufficient?
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > to this driver. The warning can be different.
> > > >
> > > > Right, so it looks to me the only possible setup is the
> > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > second (as softlockup did).
> > > >
> > > > And I think it would still make sense to fail, we can start with a
> > > > very long timeout like 1 minutes and break the device. Does this make
> > > > sense?
> > > >
> > > > Thanks
> > >
> > > I'd say we need to make this manageable then.
> >
> > Did you mean something like sysfs or module parameters?
>
> No I'd say pass it with an ioctl.
>
> > > Can't we do it normally
> > > e.g. react to an interrupt to return to userspace?
> >
> > I didn't get the meaning of this. Sorry.
> >
> > Thanks
>
> Standard way to handle things that can timeout and where userspace
> did not supply the time is to block until an interrupt
> then return EINTR.

Well this seems to be a huge change, ioctl(2) doesn't say it can
return EINTR now.

Actually, a driver timeout is used by other drivers when using
controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
to avoid false negatives) seems to be a good first step.

> Userspace controls the timeout by
> using e.g. alarm(2).

Not used in iproute2 after a git grep.

Thanks

>
>
> > >
> > >
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > Only with a bad device.
> > > > > > >
> > > > > > > > > >>
> > > > > > > > > >>
> > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > >>
> > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > >> chance to run.
> > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > after kick.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > >>
> > > > > > > > > >> The current code did:
> > > > > > > > > >>
> > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > >>
> > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > >>
> > > > > > > > > >> Thanks
> > > > > > > > > >
> > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > >
> > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >>>
> > > > > > >
> > > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
       [not found]   ` <20230129073713.5236-1-hdanton@sina.com>
@ 2023-01-30  3:58     ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-30  3:58 UTC (permalink / raw)
  To: Hillf Danton; +Cc: mst, netdev, linux-kernel, eperezma

On Sun, Jan 29, 2023 at 3:37 PM Hillf Danton <hdanton@sina.com> wrote:
>
> On Mon, 26 Dec 2022 15:49:07 +0800 Jason Wang <jasowang@redhat.com>
> > @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
> >  {
> >       struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > +     wake_up_interruptible(&vq->wq);
> > +
> >       if (vq->we_own_ring) {
> >               if (vq->packed_ring) {
> >                       vring_free_queue(vq->vq.vdev,
> > @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> >
> > +int virtqueue_wait_for_used(struct virtqueue *_vq)
> > +{
> > +     struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +     /* TODO: Tweak the timeout. */
> > +     return wait_event_interruptible_timeout(vq->wq,
> > +            virtqueue_is_broken(_vq) || more_used(vq), HZ);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
>
>         waker           waiter
>         ---             ---
>         vring_del_virtqueue
>           vring_free(_vq);
>             wakeup
>           kfree(vq);
>                         get on CPU a tick later
>                         uaf ?
>

Exactly, this wakeup of vring_free is not needed. It's up to the
driver to do the proper wake up to avoid race when subsystem un
registration.

Thanks


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-30  2:53                                 ` Jason Wang
@ 2023-01-30  5:43                                   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-30  5:43 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > >>>>>
> > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > >>>>
> > > > > > > > > > >>>> Thanks
> > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > >>
> > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > >>
> > > > > > > > > > >>
> > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > >>
> > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > >
> > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > >
> > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > write new ones :)
> > > > > > >
> > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > >
> > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > warning seems sufficient?
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > to this driver. The warning can be different.
> > > > >
> > > > > Right, so it looks to me the only possible setup is the
> > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > second (as softlockup did).
> > > > >
> > > > > And I think it would still make sense to fail, we can start with a
> > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > sense?
> > > > >
> > > > > Thanks
> > > >
> > > > I'd say we need to make this manageable then.
> > >
> > > Did you mean something like sysfs or module parameters?
> >
> > No I'd say pass it with an ioctl.
> >
> > > > Can't we do it normally
> > > > e.g. react to an interrupt to return to userspace?
> > >
> > > I didn't get the meaning of this. Sorry.
> > >
> > > Thanks
> >
> > Standard way to handle things that can timeout and where userspace
> > did not supply the time is to block until an interrupt
> > then return EINTR.
> 
> Well this seems to be a huge change, ioctl(2) doesn't say it can
> return EINTR now.

the one on fedora 37 does not but it says:
       No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
       used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).

so it depends on the device e.g. for a streams device it does:
https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
has EINTR.



> Actually, a driver timeout is used by other drivers when using
> controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> to avoid false negatives) seems to be a good first step.

Well because it's specific hardware so timeout matches what it can
promise.  virtio spec does not give guarantees.  One issue is with
software implementations. At the moment I can set a breakpoint in qemu
or vhost user backend and nothing bad happens in just continues.


> > Userspace controls the timeout by
> > using e.g. alarm(2).
> 
> Not used in iproute2 after a git grep.
> 
> Thanks

No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.

> >
> >
> > > >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > Only with a bad device.
> > > > > > > >
> > > > > > > > > > >>
> > > > > > > > > > >>
> > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > >>
> > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > >> chance to run.
> > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > after kick.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > >>
> > > > > > > > > > >> The current code did:
> > > > > > > > > > >>
> > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > >>
> > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > >>
> > > > > > > > > > >> Thanks
> > > > > > > > > > >
> > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > >
> > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >>>
> > > > > > > >
> > > > > >
> > > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-30  5:43                                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-30  5:43 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > >>>>>
> > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > >>>>
> > > > > > > > > > >>>> Thanks
> > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > >>
> > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > >>
> > > > > > > > > > >>
> > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > >>
> > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > >
> > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > >
> > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > write new ones :)
> > > > > > >
> > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > >
> > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > warning seems sufficient?
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > to this driver. The warning can be different.
> > > > >
> > > > > Right, so it looks to me the only possible setup is the
> > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > second (as softlockup did).
> > > > >
> > > > > And I think it would still make sense to fail, we can start with a
> > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > sense?
> > > > >
> > > > > Thanks
> > > >
> > > > I'd say we need to make this manageable then.
> > >
> > > Did you mean something like sysfs or module parameters?
> >
> > No I'd say pass it with an ioctl.
> >
> > > > Can't we do it normally
> > > > e.g. react to an interrupt to return to userspace?
> > >
> > > I didn't get the meaning of this. Sorry.
> > >
> > > Thanks
> >
> > Standard way to handle things that can timeout and where userspace
> > did not supply the time is to block until an interrupt
> > then return EINTR.
> 
> Well this seems to be a huge change, ioctl(2) doesn't say it can
> return EINTR now.

the one on fedora 37 does not but it says:
       No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
       used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).

so it depends on the device e.g. for a streams device it does:
https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
has EINTR.



> Actually, a driver timeout is used by other drivers when using
> controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> to avoid false negatives) seems to be a good first step.

Well because it's specific hardware so timeout matches what it can
promise.  virtio spec does not give guarantees.  One issue is with
software implementations. At the moment I can set a breakpoint in qemu
or vhost user backend and nothing bad happens in just continues.


> > Userspace controls the timeout by
> > using e.g. alarm(2).
> 
> Not used in iproute2 after a git grep.
> 
> Thanks

No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.

> >
> >
> > > >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > Only with a bad device.
> > > > > > > >
> > > > > > > > > > >>
> > > > > > > > > > >>
> > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > >>
> > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > >> chance to run.
> > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > after kick.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > >>
> > > > > > > > > > >> The current code did:
> > > > > > > > > > >>
> > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > >>
> > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > >>
> > > > > > > > > > >> Thanks
> > > > > > > > > > >
> > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > >
> > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >>>
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-30  5:43                                   ` Michael S. Tsirkin
@ 2023-01-30  7:44                                     ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-30  7:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > >>>>>
> > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > >>>>
> > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > >>
> > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > >>
> > > > > > > > > > > >>
> > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > >>
> > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > >
> > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > >
> > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > write new ones :)
> > > > > > > >
> > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > >
> > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > warning seems sufficient?
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > to this driver. The warning can be different.
> > > > > >
> > > > > > Right, so it looks to me the only possible setup is the
> > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > second (as softlockup did).
> > > > > >
> > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > sense?
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I'd say we need to make this manageable then.
> > > >
> > > > Did you mean something like sysfs or module parameters?
> > >
> > > No I'd say pass it with an ioctl.
> > >
> > > > > Can't we do it normally
> > > > > e.g. react to an interrupt to return to userspace?
> > > >
> > > > I didn't get the meaning of this. Sorry.
> > > >
> > > > Thanks
> > >
> > > Standard way to handle things that can timeout and where userspace
> > > did not supply the time is to block until an interrupt
> > > then return EINTR.
> >
> > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > return EINTR now.
>
> the one on fedora 37 does not but it says:
>        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
>        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
>
> so it depends on the device e.g. for a streams device it does:
> https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> has EINTR.

Ok, I saw signal(7) also mention about EINTR for ioctl(2):

"""
       If  a  blocked call to one of the following interfaces is
interrupted by a signal handler, then the call is automatically
restarted after the signal handler re‐
       turns if the SA_RESTART flag was used; otherwise the call fails
with the error EINTR:

       * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
"slow" devices.  A "slow" device is one where the I/O call may block
for an indefinite time, for
         example,  a  terminal,  pipe, or socket.  If an I/O call on a
slow device has already transferred some data by the time it is
interrupted by a signal handler,
         then the call will return a success status (normally, the
number of bytes transferred).  Note that a (local) disk is not a slow
device according to this defi‐
         nition; I/O operations on disk devices are not interrupted by signals.
"""

>
>
>
> > Actually, a driver timeout is used by other drivers when using
> > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > to avoid false negatives) seems to be a good first step.
>
> Well because it's specific hardware so timeout matches what it can
> promise.  virtio spec does not give guarantees.  One issue is with
> software implementations. At the moment I can set a breakpoint in qemu
> or vhost user backend and nothing bad happens in just continues.

Yes but it should be no difference from using a kgdb to debug i40e drivers.

>
>
> > > Userspace controls the timeout by
> > > using e.g. alarm(2).
> >
> > Not used in iproute2 after a git grep.
> >
> > Thanks
>
> No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.

Yes, but iproute2 needs to deal with EINTR, that is the challenge
part, if we simply return an error, the next cvq command might get
confused.

Thanks

>
> > >
> > >
> > > > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > Only with a bad device.
> > > > > > > > >
> > > > > > > > > > > >>
> > > > > > > > > > > >>
> > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > >>
> > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > after kick.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > >>
> > > > > > > > > > > >> The current code did:
> > > > > > > > > > > >>
> > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > >>
> > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > >>
> > > > > > > > > > > >> Thanks
> > > > > > > > > > > >
> > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > >
> > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > >>>
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-30  7:44                                     ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-30  7:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > >>>>>
> > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > >>>>
> > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > >>
> > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > >>
> > > > > > > > > > > >>
> > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > >>
> > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > >
> > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > >
> > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > write new ones :)
> > > > > > > >
> > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > >
> > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > warning seems sufficient?
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > to this driver. The warning can be different.
> > > > > >
> > > > > > Right, so it looks to me the only possible setup is the
> > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > second (as softlockup did).
> > > > > >
> > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > sense?
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I'd say we need to make this manageable then.
> > > >
> > > > Did you mean something like sysfs or module parameters?
> > >
> > > No I'd say pass it with an ioctl.
> > >
> > > > > Can't we do it normally
> > > > > e.g. react to an interrupt to return to userspace?
> > > >
> > > > I didn't get the meaning of this. Sorry.
> > > >
> > > > Thanks
> > >
> > > Standard way to handle things that can timeout and where userspace
> > > did not supply the time is to block until an interrupt
> > > then return EINTR.
> >
> > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > return EINTR now.
>
> the one on fedora 37 does not but it says:
>        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
>        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
>
> so it depends on the device e.g. for a streams device it does:
> https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> has EINTR.

Ok, I saw signal(7) also mention about EINTR for ioctl(2):

"""
       If  a  blocked call to one of the following interfaces is
interrupted by a signal handler, then the call is automatically
restarted after the signal handler re‐
       turns if the SA_RESTART flag was used; otherwise the call fails
with the error EINTR:

       * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
"slow" devices.  A "slow" device is one where the I/O call may block
for an indefinite time, for
         example,  a  terminal,  pipe, or socket.  If an I/O call on a
slow device has already transferred some data by the time it is
interrupted by a signal handler,
         then the call will return a success status (normally, the
number of bytes transferred).  Note that a (local) disk is not a slow
device according to this defi‐
         nition; I/O operations on disk devices are not interrupted by signals.
"""

>
>
>
> > Actually, a driver timeout is used by other drivers when using
> > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > to avoid false negatives) seems to be a good first step.
>
> Well because it's specific hardware so timeout matches what it can
> promise.  virtio spec does not give guarantees.  One issue is with
> software implementations. At the moment I can set a breakpoint in qemu
> or vhost user backend and nothing bad happens in just continues.

Yes but it should be no difference from using a kgdb to debug i40e drivers.

>
>
> > > Userspace controls the timeout by
> > > using e.g. alarm(2).
> >
> > Not used in iproute2 after a git grep.
> >
> > Thanks
>
> No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.

Yes, but iproute2 needs to deal with EINTR, that is the challenge
part, if we simply return an error, the next cvq command might get
confused.

Thanks

>
> > >
> > >
> > > > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > Only with a bad device.
> > > > > > > > >
> > > > > > > > > > > >>
> > > > > > > > > > > >>
> > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > >>
> > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > after kick.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > >>
> > > > > > > > > > > >> The current code did:
> > > > > > > > > > > >>
> > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > >>
> > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > >>
> > > > > > > > > > > >> Thanks
> > > > > > > > > > > >
> > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > >
> > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > >>>
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-30  7:44                                     ` Jason Wang
@ 2023-01-30 11:18                                       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-30 11:18 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Jan 30, 2023 at 03:44:24PM +0800, Jason Wang wrote:
> On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > > >>>>>
> > > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > > >>>>
> > > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > > >
> > > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > > >
> > > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > > write new ones :)
> > > > > > > > >
> > > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > > >
> > > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > > warning seems sufficient?
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > > to this driver. The warning can be different.
> > > > > > >
> > > > > > > Right, so it looks to me the only possible setup is the
> > > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > > second (as softlockup did).
> > > > > > >
> > > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > > sense?
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I'd say we need to make this manageable then.
> > > > >
> > > > > Did you mean something like sysfs or module parameters?
> > > >
> > > > No I'd say pass it with an ioctl.
> > > >
> > > > > > Can't we do it normally
> > > > > > e.g. react to an interrupt to return to userspace?
> > > > >
> > > > > I didn't get the meaning of this. Sorry.
> > > > >
> > > > > Thanks
> > > >
> > > > Standard way to handle things that can timeout and where userspace
> > > > did not supply the time is to block until an interrupt
> > > > then return EINTR.
> > >
> > > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > > return EINTR now.
> >
> > the one on fedora 37 does not but it says:
> >        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
> >        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
> >
> > so it depends on the device e.g. for a streams device it does:
> > https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> > has EINTR.
> 
> Ok, I saw signal(7) also mention about EINTR for ioctl(2):
> 
> """
>        If  a  blocked call to one of the following interfaces is
> interrupted by a signal handler, then the call is automatically
> restarted after the signal handler re‐
>        turns if the SA_RESTART flag was used; otherwise the call fails
> with the error EINTR:
> 
>        * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
> "slow" devices.  A "slow" device is one where the I/O call may block
> for an indefinite time, for
>          example,  a  terminal,  pipe, or socket.  If an I/O call on a
> slow device has already transferred some data by the time it is
> interrupted by a signal handler,
>          then the call will return a success status (normally, the
> number of bytes transferred).  Note that a (local) disk is not a slow
> device according to this defi‐
>          nition; I/O operations on disk devices are not interrupted by signals.
> """


And note that if you interrupt then you don't know whether ioctl
changed device state or not generally.
> >
> >
> >
> > > Actually, a driver timeout is used by other drivers when using
> > > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > > to avoid false negatives) seems to be a good first step.
> >
> > Well because it's specific hardware so timeout matches what it can
> > promise.  virtio spec does not give guarantees.  One issue is with
> > software implementations. At the moment I can set a breakpoint in qemu
> > or vhost user backend and nothing bad happens in just continues.
> 
> Yes but it should be no difference from using a kgdb to debug i40e drivers.

Except one of the reasons people prefer programming in userspace is
because debugging is so much less painful. Someone using kgdb
knows what driver is doing and can work around that.

> >
> >
> > > > Userspace controls the timeout by
> > > > using e.g. alarm(2).
> > >
> > > Not used in iproute2 after a git grep.
> > >
> > > Thanks
> >
> > No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.
> 
> Yes, but iproute2 needs to deal with EINTR, that is the challenge
> part, if we simply return an error, the next cvq command might get
> confused.
> 
> Thanks

You mean this:
	start command
	interrupt
	start next command

?

next command is confused?
I think if you try a new command until previous
one finished it's ok to just return EBUSY.

> >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > Only with a bad device.
> > > > > > > > > >
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > > after kick.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> The current code did:
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> Thanks
> > > > > > > > > > > > >
> > > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > > >
> > > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > >>>
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-30 11:18                                       ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-30 11:18 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Jan 30, 2023 at 03:44:24PM +0800, Jason Wang wrote:
> On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > > >>>>>
> > > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > > >>>>
> > > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > > >
> > > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > > >
> > > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > > write new ones :)
> > > > > > > > >
> > > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > > >
> > > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > > warning seems sufficient?
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > > to this driver. The warning can be different.
> > > > > > >
> > > > > > > Right, so it looks to me the only possible setup is the
> > > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > > second (as softlockup did).
> > > > > > >
> > > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > > sense?
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I'd say we need to make this manageable then.
> > > > >
> > > > > Did you mean something like sysfs or module parameters?
> > > >
> > > > No I'd say pass it with an ioctl.
> > > >
> > > > > > Can't we do it normally
> > > > > > e.g. react to an interrupt to return to userspace?
> > > > >
> > > > > I didn't get the meaning of this. Sorry.
> > > > >
> > > > > Thanks
> > > >
> > > > Standard way to handle things that can timeout and where userspace
> > > > did not supply the time is to block until an interrupt
> > > > then return EINTR.
> > >
> > > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > > return EINTR now.
> >
> > the one on fedora 37 does not but it says:
> >        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
> >        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
> >
> > so it depends on the device e.g. for a streams device it does:
> > https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> > has EINTR.
> 
> Ok, I saw signal(7) also mention about EINTR for ioctl(2):
> 
> """
>        If  a  blocked call to one of the following interfaces is
> interrupted by a signal handler, then the call is automatically
> restarted after the signal handler re‐
>        turns if the SA_RESTART flag was used; otherwise the call fails
> with the error EINTR:
> 
>        * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
> "slow" devices.  A "slow" device is one where the I/O call may block
> for an indefinite time, for
>          example,  a  terminal,  pipe, or socket.  If an I/O call on a
> slow device has already transferred some data by the time it is
> interrupted by a signal handler,
>          then the call will return a success status (normally, the
> number of bytes transferred).  Note that a (local) disk is not a slow
> device according to this defi‐
>          nition; I/O operations on disk devices are not interrupted by signals.
> """


And note that if you interrupt then you don't know whether ioctl
changed device state or not generally.
> >
> >
> >
> > > Actually, a driver timeout is used by other drivers when using
> > > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > > to avoid false negatives) seems to be a good first step.
> >
> > Well because it's specific hardware so timeout matches what it can
> > promise.  virtio spec does not give guarantees.  One issue is with
> > software implementations. At the moment I can set a breakpoint in qemu
> > or vhost user backend and nothing bad happens in just continues.
> 
> Yes but it should be no difference from using a kgdb to debug i40e drivers.

Except one of the reasons people prefer programming in userspace is
because debugging is so much less painful. Someone using kgdb
knows what driver is doing and can work around that.

> >
> >
> > > > Userspace controls the timeout by
> > > > using e.g. alarm(2).
> > >
> > > Not used in iproute2 after a git grep.
> > >
> > > Thanks
> >
> > No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.
> 
> Yes, but iproute2 needs to deal with EINTR, that is the challenge
> part, if we simply return an error, the next cvq command might get
> confused.
> 
> Thanks

You mean this:
	start command
	interrupt
	start next command

?

next command is confused?
I think if you try a new command until previous
one finished it's ok to just return EBUSY.

> >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > Only with a bad device.
> > > > > > > > > >
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>
> > > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > > after kick.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> The current code did:
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > > >>
> > > > > > > > > > > > >> Thanks
> > > > > > > > > > > > >
> > > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > > >
> > > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > >>>
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-30 11:18                                       ` Michael S. Tsirkin
@ 2023-01-31  3:24                                         ` Jason Wang
  -1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-31  3:24 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Mon, Jan 30, 2023 at 7:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jan 30, 2023 at 03:44:24PM +0800, Jason Wang wrote:
> > On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > > > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > > > >>>>>
> > > > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > > > >>>>
> > > > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > > > >
> > > > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > > > >
> > > > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > > > write new ones :)
> > > > > > > > > >
> > > > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > > > >
> > > > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > > > warning seems sufficient?
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > > > to this driver. The warning can be different.
> > > > > > > >
> > > > > > > > Right, so it looks to me the only possible setup is the
> > > > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > > > second (as softlockup did).
> > > > > > > >
> > > > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > > > sense?
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > I'd say we need to make this manageable then.
> > > > > >
> > > > > > Did you mean something like sysfs or module parameters?
> > > > >
> > > > > No I'd say pass it with an ioctl.
> > > > >
> > > > > > > Can't we do it normally
> > > > > > > e.g. react to an interrupt to return to userspace?
> > > > > >
> > > > > > I didn't get the meaning of this. Sorry.
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > Standard way to handle things that can timeout and where userspace
> > > > > did not supply the time is to block until an interrupt
> > > > > then return EINTR.
> > > >
> > > > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > > > return EINTR now.
> > >
> > > the one on fedora 37 does not but it says:
> > >        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
> > >        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
> > >
> > > so it depends on the device e.g. for a streams device it does:
> > > https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> > > has EINTR.
> >
> > Ok, I saw signal(7) also mention about EINTR for ioctl(2):
> >
> > """
> >        If  a  blocked call to one of the following interfaces is
> > interrupted by a signal handler, then the call is automatically
> > restarted after the signal handler re‐
> >        turns if the SA_RESTART flag was used; otherwise the call fails
> > with the error EINTR:
> >
> >        * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
> > "slow" devices.  A "slow" device is one where the I/O call may block
> > for an indefinite time, for
> >          example,  a  terminal,  pipe, or socket.  If an I/O call on a
> > slow device has already transferred some data by the time it is
> > interrupted by a signal handler,
> >          then the call will return a success status (normally, the
> > number of bytes transferred).  Note that a (local) disk is not a slow
> > device according to this defi‐
> >          nition; I/O operations on disk devices are not interrupted by signals.
> > """
>
>
> And note that if you interrupt then you don't know whether ioctl
> changed device state or not generally.

Yes.

> > >
> > >
> > >
> > > > Actually, a driver timeout is used by other drivers when using
> > > > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > > > to avoid false negatives) seems to be a good first step.
> > >
> > > Well because it's specific hardware so timeout matches what it can
> > > promise.  virtio spec does not give guarantees.  One issue is with
> > > software implementations. At the moment I can set a breakpoint in qemu
> > > or vhost user backend and nothing bad happens in just continues.
> >
> > Yes but it should be no difference from using a kgdb to debug i40e drivers.
>
> Except one of the reasons people prefer programming in userspace is
> because debugging is so much less painful. Someone using kgdb
> knows what driver is doing and can work around that.

Ok.

>
> > >
> > >
> > > > > Userspace controls the timeout by
> > > > > using e.g. alarm(2).
> > > >
> > > > Not used in iproute2 after a git grep.
> > > >
> > > > Thanks
> > >
> > > No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.
> >
> > Yes, but iproute2 needs to deal with EINTR, that is the challenge
> > part, if we simply return an error, the next cvq command might get
> > confused.
> >
> > Thanks
>
> You mean this:
>         start command
>         interrupt
>         start next command
>
> ?
>
> next command is confused?
> I think if you try a new command until previous
> one finished it's ok to just return EBUSY.

That would be fine.

And we go back to somehow the idea here:

https://lore.kernel.org/all/CACGkMEvQwhOhgGW6F22+3vmR4AW90qYXF+ZO6BQZguUF2xt2SA@mail.gmail.com/T/#m2da63932eae775d7d05d93d44c2f1d115ffbcefe

Will try to do that in the next version.

Thanks

>
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > > Only with a bad device.
> > > > > > > > > > >
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > > > after kick.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> The current code did:
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> Thanks
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > > > >
> > > > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Thanks
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >>>
> > > > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-31  3:24                                         ` Jason Wang
  0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2023-01-31  3:24 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Mon, Jan 30, 2023 at 7:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jan 30, 2023 at 03:44:24PM +0800, Jason Wang wrote:
> > On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > > > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > > > >>>>>
> > > > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > > > >>>>
> > > > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > > > >
> > > > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > > > >
> > > > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > > > write new ones :)
> > > > > > > > > >
> > > > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > > > >
> > > > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > > > warning seems sufficient?
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > > > to this driver. The warning can be different.
> > > > > > > >
> > > > > > > > Right, so it looks to me the only possible setup is the
> > > > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > > > second (as softlockup did).
> > > > > > > >
> > > > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > > > sense?
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > I'd say we need to make this manageable then.
> > > > > >
> > > > > > Did you mean something like sysfs or module parameters?
> > > > >
> > > > > No I'd say pass it with an ioctl.
> > > > >
> > > > > > > Can't we do it normally
> > > > > > > e.g. react to an interrupt to return to userspace?
> > > > > >
> > > > > > I didn't get the meaning of this. Sorry.
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > Standard way to handle things that can timeout and where userspace
> > > > > did not supply the time is to block until an interrupt
> > > > > then return EINTR.
> > > >
> > > > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > > > return EINTR now.
> > >
> > > the one on fedora 37 does not but it says:
> > >        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
> > >        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
> > >
> > > so it depends on the device e.g. for a streams device it does:
> > > https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> > > has EINTR.
> >
> > Ok, I saw signal(7) also mention about EINTR for ioctl(2):
> >
> > """
> >        If  a  blocked call to one of the following interfaces is
> > interrupted by a signal handler, then the call is automatically
> > restarted after the signal handler re‐
> >        turns if the SA_RESTART flag was used; otherwise the call fails
> > with the error EINTR:
> >
> >        * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
> > "slow" devices.  A "slow" device is one where the I/O call may block
> > for an indefinite time, for
> >          example,  a  terminal,  pipe, or socket.  If an I/O call on a
> > slow device has already transferred some data by the time it is
> > interrupted by a signal handler,
> >          then the call will return a success status (normally, the
> > number of bytes transferred).  Note that a (local) disk is not a slow
> > device according to this defi‐
> >          nition; I/O operations on disk devices are not interrupted by signals.
> > """
>
>
> And note that if you interrupt then you don't know whether ioctl
> changed device state or not generally.

Yes.

> > >
> > >
> > >
> > > > Actually, a driver timeout is used by other drivers when using
> > > > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > > > to avoid false negatives) seems to be a good first step.
> > >
> > > Well because it's specific hardware so timeout matches what it can
> > > promise.  virtio spec does not give guarantees.  One issue is with
> > > software implementations. At the moment I can set a breakpoint in qemu
> > > or vhost user backend and nothing bad happens in just continues.
> >
> > Yes but it should be no difference from using a kgdb to debug i40e drivers.
>
> Except one of the reasons people prefer programming in userspace is
> because debugging is so much less painful. Someone using kgdb
> knows what driver is doing and can work around that.

Ok.

>
> > >
> > >
> > > > > Userspace controls the timeout by
> > > > > using e.g. alarm(2).
> > > >
> > > > Not used in iproute2 after a git grep.
> > > >
> > > > Thanks
> > >
> > > No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.
> >
> > Yes, but iproute2 needs to deal with EINTR, that is the challenge
> > part, if we simply return an error, the next cvq command might get
> > confused.
> >
> > Thanks
>
> You mean this:
>         start command
>         interrupt
>         start next command
>
> ?
>
> next command is confused?
> I think if you try a new command until previous
> one finished it's ok to just return EBUSY.

That would be fine.

And we go back to somehow the idea here:

https://lore.kernel.org/all/CACGkMEvQwhOhgGW6F22+3vmR4AW90qYXF+ZO6BQZguUF2xt2SA@mail.gmail.com/T/#m2da63932eae775d7d05d93d44c2f1d115ffbcefe

Will try to do that in the next version.

Thanks

>
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > > Only with a bad device.
> > > > > > > > > > >
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > > > after kick.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> The current code did:
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > > > >>
> > > > > > > > > > > > > >> Thanks
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > > > >
> > > > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Thanks
> > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >>>
> > > > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
  2023-01-31  3:24                                         ` Jason Wang
@ 2023-01-31  7:32                                           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-31  7:32 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, virtualization, eperezma, edumazet,
	maxime.coquelin, kuba, pabeni, davem

On Tue, Jan 31, 2023 at 11:24:52AM +0800, Jason Wang wrote:
> On Mon, Jan 30, 2023 at 7:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Jan 30, 2023 at 03:44:24PM +0800, Jason Wang wrote:
> > > On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > > > > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > > > > >>>>>
> > > > > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > > > > >>>>
> > > > > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > > > > >
> > > > > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > > > > >
> > > > > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > > > > write new ones :)
> > > > > > > > > > >
> > > > > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > > > > >
> > > > > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > > > > warning seems sufficient?
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > > > > to this driver. The warning can be different.
> > > > > > > > >
> > > > > > > > > Right, so it looks to me the only possible setup is the
> > > > > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > > > > second (as softlockup did).
> > > > > > > > >
> > > > > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > > > > sense?
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > I'd say we need to make this manageable then.
> > > > > > >
> > > > > > > Did you mean something like sysfs or module parameters?
> > > > > >
> > > > > > No I'd say pass it with an ioctl.
> > > > > >
> > > > > > > > Can't we do it normally
> > > > > > > > e.g. react to an interrupt to return to userspace?
> > > > > > >
> > > > > > > I didn't get the meaning of this. Sorry.
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > Standard way to handle things that can timeout and where userspace
> > > > > > did not supply the time is to block until an interrupt
> > > > > > then return EINTR.
> > > > >
> > > > > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > > > > return EINTR now.
> > > >
> > > > the one on fedora 37 does not but it says:
> > > >        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
> > > >        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
> > > >
> > > > so it depends on the device e.g. for a streams device it does:
> > > > https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> > > > has EINTR.
> > >
> > > Ok, I saw signal(7) also mention about EINTR for ioctl(2):
> > >
> > > """
> > >        If  a  blocked call to one of the following interfaces is
> > > interrupted by a signal handler, then the call is automatically
> > > restarted after the signal handler re‐
> > >        turns if the SA_RESTART flag was used; otherwise the call fails
> > > with the error EINTR:
> > >
> > >        * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
> > > "slow" devices.  A "slow" device is one where the I/O call may block
> > > for an indefinite time, for
> > >          example,  a  terminal,  pipe, or socket.  If an I/O call on a
> > > slow device has already transferred some data by the time it is
> > > interrupted by a signal handler,
> > >          then the call will return a success status (normally, the
> > > number of bytes transferred).  Note that a (local) disk is not a slow
> > > device according to this defi‐
> > >          nition; I/O operations on disk devices are not interrupted by signals.
> > > """
> >
> >
> > And note that if you interrupt then you don't know whether ioctl
> > changed device state or not generally.
> 
> Yes.
> 
> > > >
> > > >
> > > >
> > > > > Actually, a driver timeout is used by other drivers when using
> > > > > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > > > > to avoid false negatives) seems to be a good first step.
> > > >
> > > > Well because it's specific hardware so timeout matches what it can
> > > > promise.  virtio spec does not give guarantees.  One issue is with
> > > > software implementations. At the moment I can set a breakpoint in qemu
> > > > or vhost user backend and nothing bad happens in just continues.
> > >
> > > Yes but it should be no difference from using a kgdb to debug i40e drivers.
> >
> > Except one of the reasons people prefer programming in userspace is
> > because debugging is so much less painful. Someone using kgdb
> > knows what driver is doing and can work around that.
> 
> Ok.
> 
> >
> > > >
> > > >
> > > > > > Userspace controls the timeout by
> > > > > > using e.g. alarm(2).
> > > > >
> > > > > Not used in iproute2 after a git grep.
> > > > >
> > > > > Thanks
> > > >
> > > > No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.
> > >
> > > Yes, but iproute2 needs to deal with EINTR, that is the challenge
> > > part, if we simply return an error, the next cvq command might get
> > > confused.
> > >
> > > Thanks
> >
> > You mean this:
> >         start command
> >         interrupt
> >         start next command
> >
> > ?
> >
> > next command is confused?
> > I think if you try a new command until previous
> > one finished it's ok to just return EBUSY.
> 
> That would be fine.
> 
> And we go back to somehow the idea here:
> 
> https://lore.kernel.org/all/CACGkMEvQwhOhgGW6F22+3vmR4AW90qYXF+ZO6BQZguUF2xt2SA@mail.gmail.com/T/#m2da63932eae775d7d05d93d44c2f1d115ffbcefe
> 
> Will try to do that in the next version.
> 
> Thanks

Where you wrote:
	We can put the process into interruptible sleep, then it should be fine?

	(FYI, some transport specific methods may sleep e.g ccw).

indeed.


> >
> > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > > > > >
> > > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > > Only with a bad device.
> > > > > > > > > > > >
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > > > > after kick.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> The current code did:
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> Thanks
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Thanks
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > >>>
> > > > > > > > > > > >
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
@ 2023-01-31  7:32                                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2023-01-31  7:32 UTC (permalink / raw)
  To: Jason Wang
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, maxime.coquelin, alvaro.karsz, eperezma

On Tue, Jan 31, 2023 at 11:24:52AM +0800, Jason Wang wrote:
> On Mon, Jan 30, 2023 at 7:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Jan 30, 2023 at 03:44:24PM +0800, Jason Wang wrote:
> > > On Mon, Jan 30, 2023 at 1:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Mon, Jan 30, 2023 at 10:53:54AM +0800, Jason Wang wrote:
> > > > > On Sun, Jan 29, 2023 at 3:30 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Sun, Jan 29, 2023 at 01:48:49PM +0800, Jason Wang wrote:
> > > > > > > On Fri, Jan 27, 2023 at 6:35 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Fri, Dec 30, 2022 at 11:43:08AM +0800, Jason Wang wrote:
> > > > > > > > > On Thu, Dec 29, 2022 at 4:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Thu, Dec 29, 2022 at 04:04:13PM +0800, Jason Wang wrote:
> > > > > > > > > > > On Thu, Dec 29, 2022 at 3:07 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > On Wed, Dec 28, 2022 at 07:53:08PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > On Wed, Dec 28, 2022 at 2:34 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > 在 2022/12/27 17:38, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > > > On Tue, Dec 27, 2022 at 05:12:58PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > > >> 在 2022/12/27 15:33, Michael S. Tsirkin 写道:
> > > > > > > > > > > > > > >>> On Tue, Dec 27, 2022 at 12:30:35PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > > >>>>> But device is still going and will later use the buffers.
> > > > > > > > > > > > > > >>>>>
> > > > > > > > > > > > > > >>>>> Same for timeout really.
> > > > > > > > > > > > > > >>>> Avoiding infinite wait/poll is one of the goals, another is to sleep.
> > > > > > > > > > > > > > >>>> If we think the timeout is hard, we can start from the wait.
> > > > > > > > > > > > > > >>>>
> > > > > > > > > > > > > > >>>> Thanks
> > > > > > > > > > > > > > >>> If the goal is to avoid disrupting traffic while CVQ is in use,
> > > > > > > > > > > > > > >>> that sounds more reasonable. E.g. someone is turning on promisc,
> > > > > > > > > > > > > > >>> a spike in CPU usage might be unwelcome.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> Yes, this would be more obvious is UP is used.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>> things we should be careful to address then:
> > > > > > > > > > > > > > >>> 1- debugging. Currently it's easy to see a warning if CPU is stuck
> > > > > > > > > > > > > > >>>      in a loop for a while, and we also get a backtrace.
> > > > > > > > > > > > > > >>>      E.g. with this - how do we know who has the RTNL?
> > > > > > > > > > > > > > >>>      We need to integrate with kernel/watchdog.c for good results
> > > > > > > > > > > > > > >>>      and to make sure policy is consistent.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> That's fine, will consider this.
> > > > > > > > > > > > >
> > > > > > > > > > > > > So after some investigation, it seems the watchdog.c doesn't help. The
> > > > > > > > > > > > > only export helper is touch_softlockup_watchdog() which tries to avoid
> > > > > > > > > > > > > triggering the lockups warning for the known slow path.
> > > > > > > > > > > >
> > > > > > > > > > > > I never said you can just use existing exporting APIs. You'll have to
> > > > > > > > > > > > write new ones :)
> > > > > > > > > > >
> > > > > > > > > > > Ok, I thought you wanted to trigger similar warnings as a watchdog.
> > > > > > > > > > >
> > > > > > > > > > > Btw, I wonder what kind of logic you want here. If we switch to using
> > > > > > > > > > > sleep, there won't be soft lockup anymore. A simple wait + timeout +
> > > > > > > > > > > warning seems sufficient?
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > I'd like to avoid need to teach users new APIs. So watchdog setup to apply
> > > > > > > > > > to this driver. The warning can be different.
> > > > > > > > >
> > > > > > > > > Right, so it looks to me the only possible setup is the
> > > > > > > > > watchdog_thres. I plan to trigger the warning every watchdog_thres * 2
> > > > > > > > > second (as softlockup did).
> > > > > > > > >
> > > > > > > > > And I think it would still make sense to fail, we can start with a
> > > > > > > > > very long timeout like 1 minutes and break the device. Does this make
> > > > > > > > > sense?
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > I'd say we need to make this manageable then.
> > > > > > >
> > > > > > > Did you mean something like sysfs or module parameters?
> > > > > >
> > > > > > No I'd say pass it with an ioctl.
> > > > > >
> > > > > > > > Can't we do it normally
> > > > > > > > e.g. react to an interrupt to return to userspace?
> > > > > > >
> > > > > > > I didn't get the meaning of this. Sorry.
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > Standard way to handle things that can timeout and where userspace
> > > > > > did not supply the time is to block until an interrupt
> > > > > > then return EINTR.
> > > > >
> > > > > Well this seems to be a huge change, ioctl(2) doesn't say it can
> > > > > return EINTR now.
> > > >
> > > > the one on fedora 37 does not but it says:
> > > >        No single standard.  Arguments, returns, and semantics of ioctl() vary according to the device driver in question (the call  is
> > > >        used as a catch-all for operations that don't cleanly fit the UNIX stream I/O model).
> > > >
> > > > so it depends on the device e.g. for a streams device it does:
> > > > https://pubs.opengroup.org/onlinepubs/9699919799/functions/ioctl.html
> > > > has EINTR.
> > >
> > > Ok, I saw signal(7) also mention about EINTR for ioctl(2):
> > >
> > > """
> > >        If  a  blocked call to one of the following interfaces is
> > > interrupted by a signal handler, then the call is automatically
> > > restarted after the signal handler re‐
> > >        turns if the SA_RESTART flag was used; otherwise the call fails
> > > with the error EINTR:
> > >
> > >        * read(2), readv(2), write(2), writev(2), and ioctl(2) calls on
> > > "slow" devices.  A "slow" device is one where the I/O call may block
> > > for an indefinite time, for
> > >          example,  a  terminal,  pipe, or socket.  If an I/O call on a
> > > slow device has already transferred some data by the time it is
> > > interrupted by a signal handler,
> > >          then the call will return a success status (normally, the
> > > number of bytes transferred).  Note that a (local) disk is not a slow
> > > device according to this defi‐
> > >          nition; I/O operations on disk devices are not interrupted by signals.
> > > """
> >
> >
> > And note that if you interrupt then you don't know whether ioctl
> > changed device state or not generally.
> 
> Yes.
> 
> > > >
> > > >
> > > >
> > > > > Actually, a driver timeout is used by other drivers when using
> > > > > controlq/adminq (e.g i40e). Starting from a sane value (e.g 1 minutes
> > > > > to avoid false negatives) seems to be a good first step.
> > > >
> > > > Well because it's specific hardware so timeout matches what it can
> > > > promise.  virtio spec does not give guarantees.  One issue is with
> > > > software implementations. At the moment I can set a breakpoint in qemu
> > > > or vhost user backend and nothing bad happens in just continues.
> > >
> > > Yes but it should be no difference from using a kgdb to debug i40e drivers.
> >
> > Except one of the reasons people prefer programming in userspace is
> > because debugging is so much less painful. Someone using kgdb
> > knows what driver is doing and can work around that.
> 
> Ok.
> 
> >
> > > >
> > > >
> > > > > > Userspace controls the timeout by
> > > > > > using e.g. alarm(2).
> > > > >
> > > > > Not used in iproute2 after a git grep.
> > > > >
> > > > > Thanks
> > > >
> > > > No need for iproute2 to do it user can just do it from shell. Or user can just press CTRL-C.
> > >
> > > Yes, but iproute2 needs to deal with EINTR, that is the challenge
> > > part, if we simply return an error, the next cvq command might get
> > > confused.
> > >
> > > Thanks
> >
> > You mean this:
> >         start command
> >         interrupt
> >         start next command
> >
> > ?
> >
> > next command is confused?
> > I think if you try a new command until previous
> > one finished it's ok to just return EBUSY.
> 
> That would be fine.
> 
> And we go back to somehow the idea here:
> 
> https://lore.kernel.org/all/CACGkMEvQwhOhgGW6F22+3vmR4AW90qYXF+ZO6BQZguUF2xt2SA@mail.gmail.com/T/#m2da63932eae775d7d05d93d44c2f1d115ffbcefe
> 
> Will try to do that in the next version.
> 
> Thanks

Where you wrote:
	We can put the process into interruptible sleep, then it should be fine?

	(FYI, some transport specific methods may sleep e.g ccw).

indeed.


> >
> > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > And before the patch, we end up with a real infinite loop which could
> > > > > > > > > > > > > be caught by RCU stall detector which is not the case of the sleep.
> > > > > > > > > > > > > What we can do is probably do a periodic netdev_err().
> > > > > > > > > > > > >
> > > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > > Only with a bad device.
> > > > > > > > > > > >
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >>> 2- overhead. In a very common scenario when device is in hypervisor,
> > > > > > > > > > > > > > >>>      programming timers etc has a very high overhead, at bootup
> > > > > > > > > > > > > > >>>      lots of CVQ commands are run and slowing boot down is not nice.
> > > > > > > > > > > > > > >>>      let's poll for a bit before waiting?
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> Then we go back to the question of choosing a good timeout for poll. And
> > > > > > > > > > > > > > >> poll seems problematic in the case of UP, scheduler might not have the
> > > > > > > > > > > > > > >> chance to run.
> > > > > > > > > > > > > > > Poll just a bit :) Seriously I don't know, but at least check once
> > > > > > > > > > > > > > > after kick.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > I think it is what the current code did where the condition will be
> > > > > > > > > > > > > > check before trying to sleep in the wait_event().
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > >>> 3- suprise removal. need to wake up thread in some way. what about
> > > > > > > > > > > > > > >>>      other cases of device breakage - is there a chance this
> > > > > > > > > > > > > > >>>      introduces new bugs around that? at least enumerate them please.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> The current code did:
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> 1) check for vq->broken
> > > > > > > > > > > > > > >> 2) wakeup during BAD_RING()
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> So we won't end up with a never woke up process which should be fine.
> > > > > > > > > > > > > > >>
> > > > > > > > > > > > > > >> Thanks
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > BTW BAD_RING on removal will trigger dev_err. Not sure that is a good
> > > > > > > > > > > > > > > idea - can cause crashes if kernel panics on error.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Yes, it's better to use __virtqueue_break() instead.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > But consider we will start from a wait first, I will limit the changes
> > > > > > > > > > > > > > in virtio-net without bothering virtio core.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Thanks
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > >>>
> > > > > > > > > > > >
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 104+ messages in thread

end of thread, other threads:[~2023-01-31  7:33 UTC | newest]

Thread overview: 104+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-26  7:49 [PATCH 0/4] virtio-net: don't busy poll for cvq command Jason Wang
2022-12-26  7:49 ` Jason Wang
2022-12-26  7:49 ` [PATCH 1/4] virtio-net: convert rx mode setting to use workqueue Jason Wang
2022-12-26  7:49   ` Jason Wang
2022-12-27  7:39   ` Michael S. Tsirkin
2022-12-27  7:39     ` Michael S. Tsirkin
2022-12-27  9:06     ` Jason Wang
2022-12-27  9:06       ` Jason Wang
2022-12-30  2:51       ` Jakub Kicinski
2022-12-30  3:40         ` Jason Wang
2022-12-30  3:40           ` Jason Wang
2022-12-26  7:49 ` [PATCH 2/4] virtio_ring: switch to use BAD_RING() Jason Wang
2022-12-26  7:49   ` Jason Wang
2022-12-26 23:36   ` Michael S. Tsirkin
2022-12-26 23:36     ` Michael S. Tsirkin
2022-12-27  3:51     ` Jason Wang
2022-12-27  3:51       ` Jason Wang
2022-12-27  7:21       ` Michael S. Tsirkin
2022-12-27  7:21         ` Michael S. Tsirkin
2022-12-26  7:49 ` [PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue Jason Wang
2022-12-26  7:49   ` Jason Wang
2022-12-26 23:34   ` Michael S. Tsirkin
2022-12-26 23:34     ` Michael S. Tsirkin
2022-12-27  3:47     ` Jason Wang
2022-12-27  3:47       ` Jason Wang
2022-12-27  7:19       ` Michael S. Tsirkin
2022-12-27  7:19         ` Michael S. Tsirkin
2022-12-27  9:09         ` Jason Wang
2022-12-27  9:09           ` Jason Wang
2022-12-26 23:38   ` Michael S. Tsirkin
2022-12-26 23:38     ` Michael S. Tsirkin
2022-12-27  4:30     ` Jason Wang
2022-12-27  4:30       ` Jason Wang
2022-12-27  7:33       ` Michael S. Tsirkin
2022-12-27  7:33         ` Michael S. Tsirkin
2022-12-27  9:12         ` Jason Wang
2022-12-27  9:12           ` Jason Wang
2022-12-27  9:38           ` Michael S. Tsirkin
2022-12-27  9:38             ` Michael S. Tsirkin
2022-12-28  6:34             ` Jason Wang
2022-12-28  6:34               ` Jason Wang
2022-12-28 11:53               ` Jason Wang
2022-12-28 11:53                 ` Jason Wang
2022-12-29  7:07                 ` Michael S. Tsirkin
2022-12-29  7:07                   ` Michael S. Tsirkin
2022-12-29  8:04                   ` Jason Wang
2022-12-29  8:04                     ` Jason Wang
2022-12-29  8:10                     ` Michael S. Tsirkin
2022-12-29  8:10                       ` Michael S. Tsirkin
2022-12-30  3:43                       ` Jason Wang
2022-12-30  3:43                         ` Jason Wang
2023-01-27 10:35                         ` Michael S. Tsirkin
2023-01-27 10:35                           ` Michael S. Tsirkin
2023-01-29  5:48                           ` Jason Wang
2023-01-29  5:48                             ` Jason Wang
2023-01-29  7:30                             ` Michael S. Tsirkin
2023-01-29  7:30                               ` Michael S. Tsirkin
2023-01-30  2:53                               ` Jason Wang
2023-01-30  2:53                                 ` Jason Wang
2023-01-30  5:43                                 ` Michael S. Tsirkin
2023-01-30  5:43                                   ` Michael S. Tsirkin
2023-01-30  7:44                                   ` Jason Wang
2023-01-30  7:44                                     ` Jason Wang
2023-01-30 11:18                                     ` Michael S. Tsirkin
2023-01-30 11:18                                       ` Michael S. Tsirkin
2023-01-31  3:24                                       ` Jason Wang
2023-01-31  3:24                                         ` Jason Wang
2023-01-31  7:32                                         ` Michael S. Tsirkin
2023-01-31  7:32                                           ` Michael S. Tsirkin
     [not found]   ` <20230129073713.5236-1-hdanton@sina.com>
2023-01-30  3:58     ` Jason Wang
2022-12-26  7:49 ` [PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command Jason Wang
2022-12-26  7:49   ` Jason Wang
2022-12-27  2:19   ` Xuan Zhuo
2022-12-27  2:19     ` Xuan Zhuo
2022-12-27  4:33     ` Jason Wang
2022-12-27  4:33       ` Jason Wang
2022-12-27  6:58       ` Michael S. Tsirkin
2022-12-27  6:58         ` Michael S. Tsirkin
2022-12-27  9:17         ` Jason Wang
2022-12-27  9:17           ` Jason Wang
2022-12-27  9:31           ` Michael S. Tsirkin
2022-12-27  9:31             ` Michael S. Tsirkin
2022-12-28  6:35             ` Jason Wang
2022-12-28  6:35               ` Jason Wang
2022-12-28  8:31         ` Xuan Zhuo
2022-12-28  8:31           ` Xuan Zhuo
2022-12-28 11:41           ` Jason Wang
2022-12-28 11:41             ` Jason Wang
2022-12-29  2:09             ` Xuan Zhuo
2022-12-29  2:09               ` Xuan Zhuo
2022-12-29  3:22               ` Jason Wang
2022-12-29  3:22                 ` Jason Wang
2022-12-29  3:41                 ` Xuan Zhuo
2022-12-29  3:41                   ` Xuan Zhuo
2022-12-29  4:08                   ` Jason Wang
2022-12-29  4:08                     ` Jason Wang
2022-12-29  6:13                     ` Xuan Zhuo
2022-12-29  6:13                       ` Xuan Zhuo
2022-12-28  8:39       ` Xuan Zhuo
2022-12-28  8:39         ` Xuan Zhuo
2022-12-28 11:43         ` Jason Wang
2022-12-28 11:43           ` Jason Wang
2022-12-29  2:01           ` Xuan Zhuo
2022-12-29  2:01             ` Xuan Zhuo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.