[Qemu-devel] [RFC] virtio-rng: add a watchdog

* [Qemu-devel] [RFC] virtio-rng: add a watchdog
@ 2019-06-11 17:20 Laurent Vivier
  2019-06-12  7:03 ` Amit Shah
  0 siblings, 1 reply; 5+ messages in thread
From: Laurent Vivier @ 2019-06-11 17:20 UTC (permalink / raw)
  To: qemu-devel; +Cc: Laurent Vivier, Michael S. Tsirkin, Amit Shah

The virtio-rng linux driver can be stuck in virtio_read() on a
wait_for_completion_killable() call if the virtio-rng device in QEMU
doesn't provide data.

It's a problem, because virtio_read() is called from rng_get_data() with
reading_mutex() held.  The same mutex is taken by add_early_randomness()
and hwrng_fillfn() and this brings to a hang during the boot sequence if
the virtio-rng driver is builtin.
Moreover, another lock is taken (rng_mutex) when the hwrng driver
wants to switch the RNG device or the user tries to unplug the virtio-rng
PCI card, and this can hang too because the virtio-rng driver is only able
to release the card if the virtio-rng device sends back the virtqueue element.

  # echo -n virtio_rng.1 > /sys/class/misc/hw_random/rng_current
  [  240.165234] INFO: task kworker/u2:1:34 blocked for more than 120 seconds.
  [  240.165961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [  240.166708] kworker/u2:1    D ffffffffb86b85a8     0    34      2 0x00000000
  [  240.166714] Workqueue: kacpi_hotplug acpi_hotplug_work_fn
  [  240.166716]  ffffa0e8f3c0b890 0000000000000046 ffffa0e8f3c00000 ffffa0e8f3c0bfd8
  [  240.166717]  ffffa0e8f3c0bfd8 ffffa0e8f3c0bfd8 ffffa0e8f3c00000 ffffffffb86b85a0
  [  240.166719]  ffffffffb86b85a4 ffffa0e8f3c00000 00000000ffffffff ffffffffb86b85a8
  [  240.166720] Call Trace:
  [  240.166725]  [<ffffffffb82a61c9>] schedule_preempt_disabled+0x29/0x70
  [  240.166727]  [<ffffffffb82a40f7>] __mutex_lock_slowpath+0xc7/0x1d0
  [  240.166728]  [<ffffffffb82a350f>] mutex_lock+0x1f/0x2f
  [  240.166730]  [<ffffffffb8022b52>] hwrng_register+0x32/0x1d0
  [  240.166733]  [<ffffffffc07fa149>] virtrng_scan+0x19/0x30 [virtio_rng]
  [  240.166744]  [<ffffffffc03108db>] virtio_dev_probe+0x1eb/0x290 [virtio]
  [  240.166746]  [<ffffffffb803d6e5>] driver_probe_device+0x145/0x3c0
  ...

In some case, the QEMU RNG backend is not able to provide data, and
the virtio-rng device is not aware of that:
- with rng-random using /dev/random and no entropy is available,
- with rng-egd started with a socket in "server,nowait" mode and
  no daemon connected,
- with rng-egd and an egd daemon that is not providing enough data,
- ...

To release the locks regularly, this patch adds a watchdog in QEMU
virtio-rng device that sends back to the guest the virtqueue buffer
with a 0 byte payload. This case is expected and correctly managed by
the hwrng core.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 hw/virtio/virtio-rng.c         | 23 +++++++++++++++++++++++
 include/hw/virtio/virtio-rng.h |  1 +
 2 files changed, 24 insertions(+)

diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 30493a258622..173ecd370c0e 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -19,6 +19,8 @@
 #include "qom/object_interfaces.h"
 #include "trace.h"
 
+#define VIRTIO_RNG_WATCHDOG_MS 500
+
 static bool is_guest_ready(VirtIORNG *vrng)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(vrng);
@@ -38,6 +40,21 @@ static size_t get_request_size(VirtQueue *vq, unsigned quota)
     return in;
 }
 
+static void watchdog(void *opaque)
+{
+    VirtIORNG *vrng = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vrng);
+    VirtQueueElement *elem;
+
+    /* wake up driver */
+    elem = virtqueue_pop(vrng->vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        return;
+    }
+    virtqueue_push(vrng->vq, elem, 0);
+    virtio_notify(vdev, vrng->vq);
+}
+
 static void virtio_rng_process(VirtIORNG *vrng);
 
 /* Send data from a char device over to the guest */
@@ -98,6 +115,9 @@ static void virtio_rng_process(VirtIORNG *vrng)
         return;
     }
 
+    timer_mod(vrng->watchdog_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + VIRTIO_RNG_WATCHDOG_MS);
+
     if (vrng->activate_timer) {
         timer_mod(vrng->rate_limit_timer,
                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vrng->conf.period_ms);
@@ -222,6 +242,7 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
 
     vrng->vq = virtio_add_queue(vdev, 8, handle_input);
     vrng->quota_remaining = vrng->conf.max_bytes;
+    vrng->watchdog_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, watchdog, vrng);
     vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                                check_rate_limit, vrng);
     vrng->activate_timer = true;
@@ -236,6 +257,8 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
     VirtIORNG *vrng = VIRTIO_RNG(dev);
 
     qemu_del_vm_change_state_handler(vrng->vmstate);
+    timer_del(vrng->watchdog_timer);
+    timer_free(vrng->watchdog_timer);
     timer_del(vrng->rate_limit_timer);
     timer_free(vrng->rate_limit_timer);
     virtio_cleanup(vdev);
diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h
index 922dce7caccf..05d6b0e7d881 100644
--- a/include/hw/virtio/virtio-rng.h
+++ b/include/hw/virtio/virtio-rng.h
@@ -42,6 +42,7 @@ typedef struct VirtIORNG {
     /* We purposefully don't migrate this state.  The quota will reset on the
      * destination as a result.  Rate limiting is host state, not guest state.
      */
+    QEMUTimer *watchdog_timer;
     QEMUTimer *rate_limit_timer;
     int64_t quota_remaining;
     bool activate_timer;
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread