kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] virtio-net: More configurability and bh handling for tx
@ 2010-08-27 22:36 Alex Williamson
  2010-08-27 22:37 ` [PATCH 1/5] virtio-net: Make tx_timer timeout configurable Alex Williamson
                   ` (6 more replies)
  0 siblings, 7 replies; 23+ messages in thread
From: Alex Williamson @ 2010-08-27 22:36 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, anthony, jes.sorensen, alex.williamson

Add the ability to configure the tx_timer timeout and add a bottom
half tx handler that typically shows a nice perf boost over the
time based approach.  See last patch for perf details.  Make this
the new default when the iothread is enabled.  Thanks,

Alex

---

Alex Williamson (5):
      virtio-net: Switch default to new bottom half TX handler for iothread
      virtio-net: Introduce a new bottom half packet TX
      virtio-net: Rename tx_timer_active to tx_waiting
      virtio-net: Limit number of packets sent per TX flush
      virtio-net: Make tx_timer timeout configurable


 hw/s390-virtio-bus.c |    6 ++
 hw/s390-virtio-bus.h |    4 ++
 hw/syborg_virtio.c   |   10 +++-
 hw/virtio-net.c      |  129 ++++++++++++++++++++++++++++++++++++++++----------
 hw/virtio-net.h      |    2 -
 hw/virtio-pci.c      |   10 +++-
 hw/virtio.h          |    9 +++
 7 files changed, 137 insertions(+), 33 deletions(-)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/5] virtio-net: Make tx_timer timeout configurable
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
@ 2010-08-27 22:37 ` Alex Williamson
  2010-08-31 18:00   ` Chris Wright
  2010-08-27 22:37 ` [PATCH 2/5] virtio-net: Limit number of packets sent per TX flush Alex Williamson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-27 22:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, anthony, jes.sorensen, alex.williamson

The tx_timer used for TX mitigation in virtio-net has a hard coded
timeout.  Make an option for this to be configurable using a
txtimer=<ns> device config option.  Note that we reserve a value of
"1" to simply mean use the default, we'll later use the value "0"
to disable the timer.  Everything else is the timeout in ns.  We
limit the timeout to a uint32_t, because anything over a 4s timeout
probably doens't make any kind of practical sense.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/s390-virtio-bus.c |    3 ++-
 hw/s390-virtio-bus.h |    2 ++
 hw/syborg_virtio.c   |    5 ++++-
 hw/virtio-net.c      |   17 ++++++++++++++---
 hw/virtio-net.h      |    2 --
 hw/virtio-pci.c      |    5 ++++-
 hw/virtio.h          |    3 ++-
 7 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index fe6884d..4da0b40 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -110,7 +110,7 @@ static int s390_virtio_net_init(VirtIOS390Device *dev)
 {
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init((DeviceState *)dev, &dev->nic);
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, dev->txtimer);
     if (!vdev) {
         return -1;
     }
@@ -327,6 +327,7 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
     .qdev.size = sizeof(VirtIOS390Device),
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
+        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
         DEFINE_PROP_END_OF_LIST(),
     },
 };
diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
index 333fea8..922daf2 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390-virtio-bus.h
@@ -43,6 +43,8 @@ typedef struct VirtIOS390Device {
     uint32_t host_features;
     /* Max. number of ports we can have for a the virtio-serial device */
     uint32_t max_virtserial_ports;
+    /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
+    uint32_t txtimer;
 } VirtIOS390Device;
 
 typedef struct VirtIOS390Bus {
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index abf0370..c8d731a 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -68,6 +68,8 @@ typedef struct {
     uint32_t id;
     NICConf nic;
     uint32_t host_features;
+    /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
+    uint32_t txtimer;
 } SyborgVirtIOProxy;
 
 static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
@@ -284,7 +286,7 @@ static int syborg_virtio_net_init(SysBusDevice *dev)
     VirtIODevice *vdev;
     SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
 
-    vdev = virtio_net_init(&dev->qdev, &proxy->nic);
+    vdev = virtio_net_init(&dev->qdev, &proxy->nic, proxy->txtimer);
     return syborg_virtio_init(proxy, vdev);
 }
 
@@ -295,6 +297,7 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
         DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
+        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
         DEFINE_PROP_END_OF_LIST(),
     }
 };
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 075f72d..9ef29f0 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -36,6 +36,7 @@ typedef struct VirtIONet
     VirtQueue *ctrl_vq;
     NICState *nic;
     QEMUTimer *tx_timer;
+    uint32_t tx_timeout;
     int tx_timer_active;
     uint32_t has_vnet_hdr;
     uint8_t has_ufo;
@@ -702,7 +703,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
         virtio_net_flush_tx(n, vq);
     } else {
         qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
+                       qemu_get_clock(vm_clock) + n->tx_timeout);
         n->tx_timer_active = 1;
         virtio_queue_set_notification(vq, 0);
     }
@@ -842,7 +843,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
 
     if (n->tx_timer_active) {
         qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
+                       qemu_get_clock(vm_clock) + n->tx_timeout);
     }
     return 0;
 }
@@ -903,7 +904,8 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason)
     virtio_net_set_status(&n->vdev, n->vdev.status & status);
 }
 
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+                              uint32_t txtimer)
 {
     VirtIONet *n;
 
@@ -931,6 +933,15 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
 
     n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
     n->tx_timer_active = 0;
+    if (txtimer) {
+        if (txtimer == 1) {
+            /* For convenience, 1 = "on" = predefined default, anything else
+             * specifies and actual timeout value */
+            n->tx_timeout = 150000; /* 150 us */
+        } else {
+            n->tx_timeout = txtimer;
+        }
+    }
     n->mergeable_rx_bufs = 0;
     n->promisc = 1; /* for compatibility */
 
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 235f1a9..ae21c35 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -47,8 +47,6 @@
 
 #define VIRTIO_NET_S_LINK_UP    1       /* Link is up */
 
-#define TX_TIMER_INTERVAL 150000 /* 150 us */
-
 /* Maximum packet size we can receive from tap device: header + 64k */
 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6e8f88a..e3b9897 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -107,6 +107,8 @@ typedef struct {
 #endif
     /* Max. number of ports we can have for a the virtio-serial device */
     uint32_t max_virtserial_ports;
+    /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
+    uint32_t txtimer;
 } VirtIOPCIProxy;
 
 /* virtio device */
@@ -613,7 +615,7 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic);
+    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, proxy->txtimer);
 
     vdev->nvectors = proxy->nvectors;
     virtio_init_pci(proxy, vdev,
@@ -690,6 +692,7 @@ static PCIDeviceInfo virtio_info[] = {
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
+            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
             DEFINE_PROP_END_OF_LIST(),
         },
         .qdev.reset = virtio_pci_reset,
diff --git a/hw/virtio.h b/hw/virtio.h
index 5836ab6..77d05e0 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -185,7 +185,8 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
 
 /* Base devices.  */
 VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf);
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+                              uint32_t txtimer);
 VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
 VirtIODevice *virtio_balloon_init(DeviceState *dev);
 #ifdef CONFIG_LINUX


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/5] virtio-net: Limit number of packets sent per TX flush
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
  2010-08-27 22:37 ` [PATCH 1/5] virtio-net: Make tx_timer timeout configurable Alex Williamson
@ 2010-08-27 22:37 ` Alex Williamson
  2010-08-31 20:23   ` Michael S. Tsirkin
  2010-08-27 22:37 ` [PATCH 3/5] virtio-net: Rename tx_timer_active to tx_waiting Alex Williamson
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-27 22:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, anthony, jes.sorensen, alex.williamson

If virtio_net_flush_tx is called with notification disabled, we can
race with the guest, processing packets at the same rate as they
get produced.  The trouble is that this means we have no guaranteed
exit condition from the function and can spend minutes in there.
Currently flush_tx is only called with notification on, which seems
to limit us to one pass through the queue per call.  An upcoming
patch changes this.

One pass through the queue (256) seems to be a good default value
for this, balancing latency with throughput.  We use a signed int
for txburst because 2^31 packets in a burst would take many, many
minutes to process and it allows us to easily return a negative
value value from virtio_net_flush_tx() to indicate a back-off
or error condition.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/s390-virtio-bus.c |    4 +++-
 hw/s390-virtio-bus.h |    2 ++
 hw/syborg_virtio.c   |    6 +++++-
 hw/virtio-net.c      |   23 ++++++++++++++++-------
 hw/virtio-pci.c      |    6 +++++-
 hw/virtio.h          |    2 +-
 6 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 4da0b40..1483362 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -110,7 +110,8 @@ static int s390_virtio_net_init(VirtIOS390Device *dev)
 {
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, dev->txtimer);
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic,
+                           dev->txtimer, dev->txburst);
     if (!vdev) {
         return -1;
     }
@@ -328,6 +329,7 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
         DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
+        DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
         DEFINE_PROP_END_OF_LIST(),
     },
 };
diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
index 922daf2..808aea0 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390-virtio-bus.h
@@ -45,6 +45,8 @@ typedef struct VirtIOS390Device {
     uint32_t max_virtserial_ports;
     /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
     uint32_t txtimer;
+    /* Max tx packets for virtio-net to burst at a time */
+    int32_t txburst;
 } VirtIOS390Device;
 
 typedef struct VirtIOS390Bus {
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index c8d731a..7b76972 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -70,6 +70,8 @@ typedef struct {
     uint32_t host_features;
     /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
     uint32_t txtimer;
+    /* Max tx packets for virtio-net to burst at a time */
+    int32_t txburst;
 } SyborgVirtIOProxy;
 
 static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
@@ -286,7 +288,8 @@ static int syborg_virtio_net_init(SysBusDevice *dev)
     VirtIODevice *vdev;
     SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
 
-    vdev = virtio_net_init(&dev->qdev, &proxy->nic, proxy->txtimer);
+    vdev = virtio_net_init(&dev->qdev, &proxy->nic,
+                           proxy->txtimer, proxy->txburst);
     return syborg_virtio_init(proxy, vdev);
 }
 
@@ -298,6 +301,7 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
         DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
         DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
         DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
+        DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
         DEFINE_PROP_END_OF_LIST(),
     }
 };
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 9ef29f0..ac4aa8f 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -37,6 +37,7 @@ typedef struct VirtIONet
     NICState *nic;
     QEMUTimer *tx_timer;
     uint32_t tx_timeout;
+    int32_t tx_burst;
     int tx_timer_active;
     uint32_t has_vnet_hdr;
     uint8_t has_ufo;
@@ -620,7 +621,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
     return size;
 }
 
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
 
 static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
 {
@@ -636,16 +637,18 @@ static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
 }
 
 /* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 {
     VirtQueueElement elem;
+    int32_t num_packets = 0;
 
-    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
-        return;
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return num_packets;
+    }
 
     if (n->async_tx.elem.out_num) {
         virtio_queue_set_notification(n->tx_vq, 0);
-        return;
+        return num_packets;
     }
 
     while (virtqueue_pop(vq, &elem)) {
@@ -682,14 +685,19 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
             virtio_queue_set_notification(n->tx_vq, 0);
             n->async_tx.elem = elem;
             n->async_tx.len  = len;
-            return;
+            return -EBUSY;
         }
 
         len += ret;
 
         virtqueue_push(vq, &elem, len);
         virtio_notify(&n->vdev, vq);
+
+        if (++num_packets >= n->tx_burst) {
+            break;
+        }
     }
+    return num_packets;
 }
 
 static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
@@ -905,7 +913,7 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason)
 }
 
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
-                              uint32_t txtimer)
+                              uint32_t txtimer, int32_t txburst)
 {
     VirtIONet *n;
 
@@ -942,6 +950,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
             n->tx_timeout = txtimer;
         }
     }
+    n->tx_burst = txburst;
     n->mergeable_rx_bufs = 0;
     n->promisc = 1; /* for compatibility */
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index e3b9897..e025c09 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -109,6 +109,8 @@ typedef struct {
     uint32_t max_virtserial_ports;
     /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
     uint32_t txtimer;
+    /* Max tx packets for virtio-net to burst at a time */
+    int32_t txburst;
 } VirtIOPCIProxy;
 
 /* virtio device */
@@ -615,7 +617,8 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, proxy->txtimer);
+    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic,
+                           proxy->txtimer, proxy->txburst);
 
     vdev->nvectors = proxy->nvectors;
     virtio_init_pci(proxy, vdev,
@@ -693,6 +696,7 @@ static PCIDeviceInfo virtio_info[] = {
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
             DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
+            DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
             DEFINE_PROP_END_OF_LIST(),
         },
         .qdev.reset = virtio_pci_reset,
diff --git a/hw/virtio.h b/hw/virtio.h
index 77d05e0..4051889 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -186,7 +186,7 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
 /* Base devices.  */
 VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
-                              uint32_t txtimer);
+                              uint32_t txtimer, int32_t txburst);
 VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
 VirtIODevice *virtio_balloon_init(DeviceState *dev);
 #ifdef CONFIG_LINUX


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/5] virtio-net: Rename tx_timer_active to tx_waiting
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
  2010-08-27 22:37 ` [PATCH 1/5] virtio-net: Make tx_timer timeout configurable Alex Williamson
  2010-08-27 22:37 ` [PATCH 2/5] virtio-net: Limit number of packets sent per TX flush Alex Williamson
@ 2010-08-27 22:37 ` Alex Williamson
  2010-08-27 22:37 ` [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX Alex Williamson
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Alex Williamson @ 2010-08-27 22:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, anthony, jes.sorensen, alex.williamson

De-couple this from the timer since we might want to use
different backends to send the packet.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/virtio-net.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index ac4aa8f..8b652f2 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -38,7 +38,7 @@ typedef struct VirtIONet
     QEMUTimer *tx_timer;
     uint32_t tx_timeout;
     int32_t tx_burst;
-    int tx_timer_active;
+    int tx_waiting;
     uint32_t has_vnet_hdr;
     uint8_t has_ufo;
     struct {
@@ -704,15 +704,15 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
-    if (n->tx_timer_active) {
+    if (n->tx_waiting) {
         virtio_queue_set_notification(vq, 1);
         qemu_del_timer(n->tx_timer);
-        n->tx_timer_active = 0;
+        n->tx_waiting = 0;
         virtio_net_flush_tx(n, vq);
     } else {
         qemu_mod_timer(n->tx_timer,
                        qemu_get_clock(vm_clock) + n->tx_timeout);
-        n->tx_timer_active = 1;
+        n->tx_waiting = 1;
         virtio_queue_set_notification(vq, 0);
     }
 }
@@ -721,7 +721,7 @@ static void virtio_net_tx_timer(void *opaque)
 {
     VirtIONet *n = opaque;
 
-    n->tx_timer_active = 0;
+    n->tx_waiting = 0;
 
     /* Just in case the driver is not ready on more */
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
@@ -744,7 +744,7 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
     virtio_save(&n->vdev, f);
 
     qemu_put_buffer(f, n->mac, ETH_ALEN);
-    qemu_put_be32(f, n->tx_timer_active);
+    qemu_put_be32(f, n->tx_waiting);
     qemu_put_be32(f, n->mergeable_rx_bufs);
     qemu_put_be16(f, n->status);
     qemu_put_byte(f, n->promisc);
@@ -773,7 +773,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     virtio_load(&n->vdev, f);
 
     qemu_get_buffer(f, n->mac, ETH_ALEN);
-    n->tx_timer_active = qemu_get_be32(f);
+    n->tx_waiting = qemu_get_be32(f);
     n->mergeable_rx_bufs = qemu_get_be32(f);
 
     if (version_id >= 3)
@@ -849,7 +849,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     }
     n->mac_table.first_multi = i;
 
-    if (n->tx_timer_active) {
+    if (n->tx_waiting) {
         qemu_mod_timer(n->tx_timer,
                        qemu_get_clock(vm_clock) + n->tx_timeout);
     }
@@ -940,7 +940,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
 
     n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
-    n->tx_timer_active = 0;
+    n->tx_waiting = 0;
     if (txtimer) {
         if (txtimer == 1) {
             /* For convenience, 1 = "on" = predefined default, anything else


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
                   ` (2 preceding siblings ...)
  2010-08-27 22:37 ` [PATCH 3/5] virtio-net: Rename tx_timer_active to tx_waiting Alex Williamson
@ 2010-08-27 22:37 ` Alex Williamson
  2010-08-31 20:14   ` Michael S. Tsirkin
  2010-08-27 22:37 ` [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread Alex Williamson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-27 22:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, anthony, jes.sorensen, alex.williamson

Based on a patch from Mark McLoughlin, this patch introduces a new
bottom half packet transmitter that avoids the latency imposed by
the tx_timer approach.  Rather than scheduling a timer when a TX
packet comes in, schedule a bottom half to be run from the iothread.
The bottom half handler first attempts to flush the queue with
notification disabled (this is where we could race with a guest
without txburst).  If we flush a full burst, reschedule immediately.
If we send short of a full burst, try to re-enable notification.
To avoid a race with TXs that may have occurred, we must then
flush again.  If we find some packets to send, the guest it probably
active, so we can reschedule again.

tx_timer and tx_bh are mutually exclusive, so we can re-use the
tx_waiting flag to indicate one or the other needs to be setup.
This allows us to seamlessly migrate between timer and bh TX
handling.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/virtio-net.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 8b652f2..3288c77 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -36,6 +36,7 @@ typedef struct VirtIONet
     VirtQueue *ctrl_vq;
     NICState *nic;
     QEMUTimer *tx_timer;
+    QEMUBH *tx_bh;
     uint32_t tx_timeout;
     int32_t tx_burst;
     int tx_waiting;
@@ -704,16 +705,25 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
-    if (n->tx_waiting) {
-        virtio_queue_set_notification(vq, 1);
-        qemu_del_timer(n->tx_timer);
-        n->tx_waiting = 0;
-        virtio_net_flush_tx(n, vq);
+    if (n->tx_timer) {
+        if (n->tx_waiting) {
+            virtio_queue_set_notification(vq, 1);
+            qemu_del_timer(n->tx_timer);
+            n->tx_waiting = 0;
+            virtio_net_flush_tx(n, vq);
+        } else {
+            qemu_mod_timer(n->tx_timer,
+                           qemu_get_clock(vm_clock) + n->tx_timeout);
+            n->tx_waiting = 1;
+            virtio_queue_set_notification(vq, 0);
+        }
     } else {
-        qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock(vm_clock) + n->tx_timeout);
+        if (unlikely(n->tx_waiting)) {
+            return;
+        }
+        virtio_queue_set_notification(n->tx_vq, 0);
+        qemu_bh_schedule(n->tx_bh);
         n->tx_waiting = 1;
-        virtio_queue_set_notification(vq, 0);
     }
 }
 
@@ -731,6 +741,41 @@ static void virtio_net_tx_timer(void *opaque)
     virtio_net_flush_tx(n, n->tx_vq);
 }
 
+static void virtio_net_tx_bh(void *opaque)
+{
+    VirtIONet *n = opaque;
+    int32_t ret;
+
+    n->tx_waiting = 0;
+
+    /* Just in case the driver is not ready on more */
+    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
+        return;
+
+    ret = virtio_net_flush_tx(n, n->tx_vq);
+    if (ret == -EBUSY) {
+        return; /* Notification re-enable handled by tx_complete */
+    }
+
+    /* If we flush a full burst of packets, assume there are
+     * more coming and immediately reschedule */
+    if (ret >= n->tx_burst) {
+        qemu_bh_schedule(n->tx_bh);
+        n->tx_waiting = 1;
+        return;
+    }
+
+    /* If less than a full burst, re-enable notification and flush
+     * anything that may have come in while we weren't looking.  If
+     * we find something, assume the guest is still active and reschedule */
+    virtio_queue_set_notification(n->tx_vq, 1);
+    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
+        virtio_queue_set_notification(n->tx_vq, 0);
+        qemu_bh_schedule(n->tx_bh);
+        n->tx_waiting = 1;
+    }
+}
+
 static void virtio_net_save(QEMUFile *f, void *opaque)
 {
     VirtIONet *n = opaque;
@@ -850,8 +895,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     n->mac_table.first_multi = i;
 
     if (n->tx_waiting) {
-        qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock(vm_clock) + n->tx_timeout);
+        if (n->tx_timer) {
+            qemu_mod_timer(n->tx_timer,
+                           qemu_get_clock(vm_clock) + n->tx_timeout);
+        } else {
+            qemu_bh_schedule(n->tx_bh);
+        }
     }
     return 0;
 }
@@ -939,9 +988,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
 
     qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
 
-    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
     n->tx_waiting = 0;
     if (txtimer) {
+        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
         if (txtimer == 1) {
             /* For convenience, 1 = "on" = predefined default, anything else
              * specifies and actual timeout value */
@@ -949,6 +998,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
         } else {
             n->tx_timeout = txtimer;
         }
+    } else {
+        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
     }
     n->tx_burst = txburst;
     n->mergeable_rx_bufs = 0;
@@ -982,8 +1033,12 @@ void virtio_net_exit(VirtIODevice *vdev)
     qemu_free(n->mac_table.macs);
     qemu_free(n->vlans);
 
-    qemu_del_timer(n->tx_timer);
-    qemu_free_timer(n->tx_timer);
+    if (n->tx_timer) {
+        qemu_del_timer(n->tx_timer);
+        qemu_free_timer(n->tx_timer);
+    } else {
+        qemu_bh_delete(n->tx_bh);
+    }
 
     virtio_cleanup(&n->vdev);
     qemu_del_vlan_client(&n->nic->nc);


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
                   ` (3 preceding siblings ...)
  2010-08-27 22:37 ` [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX Alex Williamson
@ 2010-08-27 22:37 ` Alex Williamson
  2010-08-31 20:25   ` Michael S. Tsirkin
  2010-08-31 20:20 ` [PATCH 0/5] virtio-net: More configurability and bh handling for tx Michael S. Tsirkin
  2010-08-31 20:27 ` Michael S. Tsirkin
  6 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-27 22:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, anthony, jes.sorensen, alex.williamson

The bottom half handler shows big improvements over the timer
with few downsides, default to it when the iothread is enabled.

Using the following tests, with the guest and host connected
via tap+bridge:

guest> netperf -t TCP_STREAM -H $HOST
host> netperf -t TCP_STREAM -H $GUEST
guest> netperf -t UDP_STREAM -H $HOST
host> netperf -t UDP_STREAM -H $GUEST
guest> netperf -t TCP_RR -H $HOST

Results: base throughput, exits/throughput ->
                   patched throughput, exits/throughput

--enable-io-thread
TCP guest->host 2737.77, 47.82  -> 6767.09, 29.15 = 247%, 61%
TCP host->guest 2231.33, 74.00  -> 4125.80, 67.61 = 185%, 91%
UDP guest->host 6281.68, 14.66  -> 12569.27, 1.98 = 200%, 14%
UDP host->guest 275.91,  289.22 -> 264.80, 293.53 = 96%, 101%
interations/s   1949.65, 82.97  -> 7417.56, 84.31 = 380%, 102%

No --enable-io-thread
TCP guest->host 3041.57, 55.11 -> 1038.93, 517.57 = 34%, 939%
TCP host->guest 2416.03, 76.67 -> 5655.92, 55.52  = 234%, 72%
UDP guest->host 12255.82, 6.11 -> 7775.87, 31.32  = 63%, 513%
UDP host->guest 587.92, 245.95 -> 611.88, 239.92  = 104%, 98%
interations/s   1975.59, 83.21 -> 8935.50, 88.18  = 452%, 106%

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/s390-virtio-bus.c |    3 ++-
 hw/syborg_virtio.c   |    3 ++-
 hw/virtio-pci.c      |    3 ++-
 hw/virtio.h          |    6 ++++++
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 1483362..985f99a 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -328,7 +328,8 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
     .qdev.size = sizeof(VirtIOS390Device),
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
-        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
+        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer,
+                           TXTIMER_DEFAULT),
         DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
         DEFINE_PROP_END_OF_LIST(),
     },
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 7b76972..ee5746d 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -300,7 +300,8 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
         DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
-        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
+        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer,
+                           TXTIMER_DEFAULT),
         DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
         DEFINE_PROP_END_OF_LIST(),
     }
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index e025c09..9740f57 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -695,7 +695,8 @@ static PCIDeviceInfo virtio_info[] = {
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
-            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
+            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer,
+                               TXTIMER_DEFAULT),
             DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
             DEFINE_PROP_END_OF_LIST(),
         },
diff --git a/hw/virtio.h b/hw/virtio.h
index 4051889..a1a17a2 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -183,6 +183,12 @@ void virtio_update_irq(VirtIODevice *vdev);
 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
                         void *opaque);
 
+#ifdef CONFIG_IOTHREAD
+ #define TXTIMER_DEFAULT 0
+#else
+ #define TXTIMER_DEFAULT 1
+#endif
+
 /* Base devices.  */
 VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] virtio-net: Make tx_timer timeout configurable
  2010-08-27 22:37 ` [PATCH 1/5] virtio-net: Make tx_timer timeout configurable Alex Williamson
@ 2010-08-31 18:00   ` Chris Wright
  2010-08-31 18:07     ` [Qemu-devel] " Alex Williamson
  0 siblings, 1 reply; 23+ messages in thread
From: Chris Wright @ 2010-08-31 18:00 UTC (permalink / raw)
  To: Alex Williamson; +Cc: jes.sorensen, qemu-devel, kvm

* Alex Williamson (alex.williamson@redhat.com) wrote:
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 075f72d..9ef29f0 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -36,6 +36,7 @@ typedef struct VirtIONet
>      VirtQueue *ctrl_vq;
>      NICState *nic;
>      QEMUTimer *tx_timer;
> +    uint32_t tx_timeout;
>      int tx_timer_active;
>      uint32_t has_vnet_hdr;
>      uint8_t has_ufo;
> @@ -702,7 +703,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>          virtio_net_flush_tx(n, vq);
>      } else {
>          qemu_mod_timer(n->tx_timer,
> -                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> +                       qemu_get_clock(vm_clock) + n->tx_timeout);
>          n->tx_timer_active = 1;
>          virtio_queue_set_notification(vq, 0);
>      }
> @@ -842,7 +843,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>  
>      if (n->tx_timer_active) {
>          qemu_mod_timer(n->tx_timer,
> -                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> +                       qemu_get_clock(vm_clock) + n->tx_timeout);

I think I'm missing where this is stored?  Looks like migration
would revert a changed tx_timeout back to 150us.

thanks,
-chris

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] virtio-net: Make tx_timer timeout configurable
  2010-08-31 18:00   ` Chris Wright
@ 2010-08-31 18:07     ` Alex Williamson
  2010-08-31 19:29       ` Chris Wright
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-31 18:07 UTC (permalink / raw)
  To: Chris Wright; +Cc: qemu-devel, jes.sorensen, kvm

On Tue, 2010-08-31 at 11:00 -0700, Chris Wright wrote:
> * Alex Williamson (alex.williamson@redhat.com) wrote:
> > diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> > index 075f72d..9ef29f0 100644
> > --- a/hw/virtio-net.c
> > +++ b/hw/virtio-net.c
> > @@ -36,6 +36,7 @@ typedef struct VirtIONet
> >      VirtQueue *ctrl_vq;
> >      NICState *nic;
> >      QEMUTimer *tx_timer;
> > +    uint32_t tx_timeout;
> >      int tx_timer_active;
> >      uint32_t has_vnet_hdr;
> >      uint8_t has_ufo;
> > @@ -702,7 +703,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> >          virtio_net_flush_tx(n, vq);
> >      } else {
> >          qemu_mod_timer(n->tx_timer,
> > -                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> > +                       qemu_get_clock(vm_clock) + n->tx_timeout);
> >          n->tx_timer_active = 1;
> >          virtio_queue_set_notification(vq, 0);
> >      }
> > @@ -842,7 +843,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
> >  
> >      if (n->tx_timer_active) {
> >          qemu_mod_timer(n->tx_timer,
> > -                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> > +                       qemu_get_clock(vm_clock) + n->tx_timeout);
> 
> I think I'm missing where this is stored?  Looks like migration
> would revert a changed tx_timeout back to 150us.

It's not stored, it can be instantiated on the migration target any way
you please and we can migrate between different values or even different
TX mitigation strategies.  If a non-default value is used on the source
and you want to maintain the same behavior, the target needs to be
started the same way.

Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] virtio-net: Make tx_timer timeout configurable
  2010-08-31 18:07     ` [Qemu-devel] " Alex Williamson
@ 2010-08-31 19:29       ` Chris Wright
  0 siblings, 0 replies; 23+ messages in thread
From: Chris Wright @ 2010-08-31 19:29 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chris Wright, jes.sorensen, qemu-devel, kvm

* Alex Williamson (alex.williamson@redhat.com) wrote:
> On Tue, 2010-08-31 at 11:00 -0700, Chris Wright wrote:
> > * Alex Williamson (alex.williamson@redhat.com) wrote:
> > > diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> > > index 075f72d..9ef29f0 100644
> > > --- a/hw/virtio-net.c
> > > +++ b/hw/virtio-net.c
> > > @@ -36,6 +36,7 @@ typedef struct VirtIONet
> > >      VirtQueue *ctrl_vq;
> > >      NICState *nic;
> > >      QEMUTimer *tx_timer;
> > > +    uint32_t tx_timeout;
> > >      int tx_timer_active;
> > >      uint32_t has_vnet_hdr;
> > >      uint8_t has_ufo;
> > > @@ -702,7 +703,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> > >          virtio_net_flush_tx(n, vq);
> > >      } else {
> > >          qemu_mod_timer(n->tx_timer,
> > > -                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> > > +                       qemu_get_clock(vm_clock) + n->tx_timeout);
> > >          n->tx_timer_active = 1;
> > >          virtio_queue_set_notification(vq, 0);
> > >      }
> > > @@ -842,7 +843,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
> > >  
> > >      if (n->tx_timer_active) {
> > >          qemu_mod_timer(n->tx_timer,
> > > -                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> > > +                       qemu_get_clock(vm_clock) + n->tx_timeout);
> > 
> > I think I'm missing where this is stored?  Looks like migration
> > would revert a changed tx_timeout back to 150us.
> 
> It's not stored, it can be instantiated on the migration target any way
> you please and we can migrate between different values or even different
> TX mitigation strategies.  If a non-default value is used on the source
> and you want to maintain the same behavior, the target needs to be
> started the same way.

heh, IOW, I did miss how it's stored...on cmdline ;)

thanks,
-chris

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX
  2010-08-27 22:37 ` [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX Alex Williamson
@ 2010-08-31 20:14   ` Michael S. Tsirkin
  2010-08-31 20:33     ` Alex Williamson
  0 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-08-31 20:14 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Fri, Aug 27, 2010 at 04:37:36PM -0600, Alex Williamson wrote:
> Based on a patch from Mark McLoughlin, this patch introduces a new
> bottom half packet transmitter that avoids the latency imposed by
> the tx_timer approach.  Rather than scheduling a timer when a TX
> packet comes in, schedule a bottom half to be run from the iothread.
> The bottom half handler first attempts to flush the queue with
> notification disabled (this is where we could race with a guest
> without txburst).  If we flush a full burst, reschedule immediately.
> If we send short of a full burst, try to re-enable notification.
> To avoid a race with TXs that may have occurred, we must then
> flush again.  If we find some packets to send, the guest it probably
> active, so we can reschedule again.
> 
> tx_timer and tx_bh are mutually exclusive, so we can re-use the
> tx_waiting flag to indicate one or the other needs to be setup.
> This allows us to seamlessly migrate between timer and bh TX
> handling.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
> 
>  hw/virtio-net.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++---------
>  1 files changed, 68 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 8b652f2..3288c77 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -36,6 +36,7 @@ typedef struct VirtIONet
>      VirtQueue *ctrl_vq;
>      NICState *nic;
>      QEMUTimer *tx_timer;
> +    QEMUBH *tx_bh;
>      uint32_t tx_timeout;
>      int32_t tx_burst;
>      int tx_waiting;
> @@ -704,16 +705,25 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
>  
> -    if (n->tx_waiting) {
> -        virtio_queue_set_notification(vq, 1);
> -        qemu_del_timer(n->tx_timer);
> -        n->tx_waiting = 0;
> -        virtio_net_flush_tx(n, vq);
> +    if (n->tx_timer) {
> +        if (n->tx_waiting) {
> +            virtio_queue_set_notification(vq, 1);
> +            qemu_del_timer(n->tx_timer);
> +            n->tx_waiting = 0;
> +            virtio_net_flush_tx(n, vq);
> +        } else {
> +            qemu_mod_timer(n->tx_timer,
> +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> +            n->tx_waiting = 1;
> +            virtio_queue_set_notification(vq, 0);
> +        }
>      } else {
> -        qemu_mod_timer(n->tx_timer,
> -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> +        if (unlikely(n->tx_waiting)) {
> +            return;
> +        }
> +        virtio_queue_set_notification(n->tx_vq, 0);
> +        qemu_bh_schedule(n->tx_bh);
>          n->tx_waiting = 1;
> -        virtio_queue_set_notification(vq, 0);
>      }
>  }
>  
> @@ -731,6 +741,41 @@ static void virtio_net_tx_timer(void *opaque)
>      virtio_net_flush_tx(n, n->tx_vq);
>  }
>  
> +static void virtio_net_tx_bh(void *opaque)
> +{
> +    VirtIONet *n = opaque;
> +    int32_t ret;
> +
> +    n->tx_waiting = 0;
> +
> +    /* Just in case the driver is not ready on more */
> +    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
> +        return;
> +
> +    ret = virtio_net_flush_tx(n, n->tx_vq);
> +    if (ret == -EBUSY) {
> +        return; /* Notification re-enable handled by tx_complete */
> +    }
> +
> +    /* If we flush a full burst of packets, assume there are
> +     * more coming and immediately reschedule */
> +    if (ret >= n->tx_burst) {
> +        qemu_bh_schedule(n->tx_bh);
> +        n->tx_waiting = 1;
> +        return;
> +    }
> +
> +    /* If less than a full burst, re-enable notification and flush
> +     * anything that may have come in while we weren't looking.  If
> +     * we find something, assume the guest is still active and reschedule */
> +    virtio_queue_set_notification(n->tx_vq, 1);
> +    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {

Shouldn't this be virtio_net_flush_tx(n, n->tx_vq) >= n->tx_burst?
If we get less than tx_burst, the ring is empty now so no need to
reschedule.
Right?

> +        virtio_queue_set_notification(n->tx_vq, 0);
> +        qemu_bh_schedule(n->tx_bh);
> +        n->tx_waiting = 1;
> +    }
> +}
> +
>  static void virtio_net_save(QEMUFile *f, void *opaque)
>  {
>      VirtIONet *n = opaque;
> @@ -850,8 +895,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>      n->mac_table.first_multi = i;
>  
>      if (n->tx_waiting) {
> -        qemu_mod_timer(n->tx_timer,
> -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> +        if (n->tx_timer) {
> +            qemu_mod_timer(n->tx_timer,
> +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> +        } else {
> +            qemu_bh_schedule(n->tx_bh);
> +        }
>      }
>      return 0;
>  }
> @@ -939,9 +988,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>  
>      qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
>  
> -    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
>      n->tx_waiting = 0;
>      if (txtimer) {
> +        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
>          if (txtimer == 1) {
>              /* For convenience, 1 = "on" = predefined default, anything else
>               * specifies and actual timeout value */
> @@ -949,6 +998,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>          } else {
>              n->tx_timeout = txtimer;
>          }
> +    } else {
> +        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
>      }
>      n->tx_burst = txburst;
>      n->mergeable_rx_bufs = 0;
> @@ -982,8 +1033,12 @@ void virtio_net_exit(VirtIODevice *vdev)
>      qemu_free(n->mac_table.macs);
>      qemu_free(n->vlans);
>  
> -    qemu_del_timer(n->tx_timer);
> -    qemu_free_timer(n->tx_timer);
> +    if (n->tx_timer) {
> +        qemu_del_timer(n->tx_timer);
> +        qemu_free_timer(n->tx_timer);
> +    } else {
> +        qemu_bh_delete(n->tx_bh);
> +    }
>  
>      virtio_cleanup(&n->vdev);
>      qemu_del_vlan_client(&n->nic->nc);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] virtio-net: More configurability and bh handling for tx
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
                   ` (4 preceding siblings ...)
  2010-08-27 22:37 ` [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread Alex Williamson
@ 2010-08-31 20:20 ` Michael S. Tsirkin
  2010-08-31 20:27 ` Michael S. Tsirkin
  6 siblings, 0 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-08-31 20:20 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Fri, Aug 27, 2010 at 04:36:59PM -0600, Alex Williamson wrote:
> Add the ability to configure the tx_timer timeout and add a bottom
> half tx handler that typically shows a nice perf boost over the
> time based approach.  See last patch for perf details.  Make this
> the new default when the iothread is enabled.  Thanks,
> 
> Alex

Looks good overall, but do we really want to make these parameters qdev properties?
It really looks like exposing an implementation detail that we'll have hard
time supporting in the future and users will have hard time
understanding. OTOH developers should have no trouble tweaking
a define on top of the file.



> ---
> 
> Alex Williamson (5):
>       virtio-net: Switch default to new bottom half TX handler for iothread
>       virtio-net: Introduce a new bottom half packet TX
>       virtio-net: Rename tx_timer_active to tx_waiting
>       virtio-net: Limit number of packets sent per TX flush
>       virtio-net: Make tx_timer timeout configurable
> 
> 
>  hw/s390-virtio-bus.c |    6 ++
>  hw/s390-virtio-bus.h |    4 ++
>  hw/syborg_virtio.c   |   10 +++-
>  hw/virtio-net.c      |  129 ++++++++++++++++++++++++++++++++++++++++----------
>  hw/virtio-net.h      |    2 -
>  hw/virtio-pci.c      |   10 +++-
>  hw/virtio.h          |    9 +++
>  7 files changed, 137 insertions(+), 33 deletions(-)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/5] virtio-net: Limit number of packets sent per TX flush
  2010-08-27 22:37 ` [PATCH 2/5] virtio-net: Limit number of packets sent per TX flush Alex Williamson
@ 2010-08-31 20:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-08-31 20:23 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Fri, Aug 27, 2010 at 04:37:18PM -0600, Alex Williamson wrote:
> If virtio_net_flush_tx is called with notification disabled, we can
> race with the guest, processing packets at the same rate as they
> get produced.  The trouble is that this means we have no guaranteed
> exit condition from the function and can spend minutes in there.
> Currently flush_tx is only called with notification on, which seems
> to limit us to one pass through the queue per call.  An upcoming
> patch changes this.
> 
> One pass through the queue (256) seems to be a good default value
> for this, balancing latency with throughput.  We use a signed int
> for txburst because 2^31 packets in a burst would take many, many
> minutes to process and it allows us to easily return a negative
> value value from virtio_net_flush_tx() to indicate a back-off
> or error condition.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

It might be better not to make this configurable, and simply set
txburst = vq->num in code in a single place, than the magic
256 constant in multiuple places.
Alternatively, let's put a macro in a .h file.

It might also be a good idea to put the above motivation in comments in the code.


> ---
> 
>  hw/s390-virtio-bus.c |    4 +++-
>  hw/s390-virtio-bus.h |    2 ++
>  hw/syborg_virtio.c   |    6 +++++-
>  hw/virtio-net.c      |   23 ++++++++++++++++-------
>  hw/virtio-pci.c      |    6 +++++-
>  hw/virtio.h          |    2 +-
>  6 files changed, 32 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
> index 4da0b40..1483362 100644
> --- a/hw/s390-virtio-bus.c
> +++ b/hw/s390-virtio-bus.c
> @@ -110,7 +110,8 @@ static int s390_virtio_net_init(VirtIOS390Device *dev)
>  {
>      VirtIODevice *vdev;
>  
> -    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, dev->txtimer);
> +    vdev = virtio_net_init((DeviceState *)dev, &dev->nic,
> +                           dev->txtimer, dev->txburst);
>      if (!vdev) {
>          return -1;
>      }
> @@ -328,6 +329,7 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
>      .qdev.props = (Property[]) {
>          DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
>          DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
> +        DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
>          DEFINE_PROP_END_OF_LIST(),
>      },
>  };
> diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
> index 922daf2..808aea0 100644
> --- a/hw/s390-virtio-bus.h
> +++ b/hw/s390-virtio-bus.h
> @@ -45,6 +45,8 @@ typedef struct VirtIOS390Device {
>      uint32_t max_virtserial_ports;
>      /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
>      uint32_t txtimer;
> +    /* Max tx packets for virtio-net to burst at a time */
> +    int32_t txburst;
>  } VirtIOS390Device;
>  
>  typedef struct VirtIOS390Bus {
> diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
> index c8d731a..7b76972 100644
> --- a/hw/syborg_virtio.c
> +++ b/hw/syborg_virtio.c
> @@ -70,6 +70,8 @@ typedef struct {
>      uint32_t host_features;
>      /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
>      uint32_t txtimer;
> +    /* Max tx packets for virtio-net to burst at a time */
> +    int32_t txburst;
>  } SyborgVirtIOProxy;
>  
>  static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
> @@ -286,7 +288,8 @@ static int syborg_virtio_net_init(SysBusDevice *dev)
>      VirtIODevice *vdev;
>      SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
>  
> -    vdev = virtio_net_init(&dev->qdev, &proxy->nic, proxy->txtimer);
> +    vdev = virtio_net_init(&dev->qdev, &proxy->nic,
> +                           proxy->txtimer, proxy->txburst);
>      return syborg_virtio_init(proxy, vdev);
>  }
>  
> @@ -298,6 +301,7 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
>          DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
>          DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
>          DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
> +        DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
>          DEFINE_PROP_END_OF_LIST(),
>      }
>  };
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 9ef29f0..ac4aa8f 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -37,6 +37,7 @@ typedef struct VirtIONet
>      NICState *nic;
>      QEMUTimer *tx_timer;
>      uint32_t tx_timeout;
> +    int32_t tx_burst;
>      int tx_timer_active;
>      uint32_t has_vnet_hdr;
>      uint8_t has_ufo;
> @@ -620,7 +621,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
>      return size;
>  }
>  
> -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
> +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
>  
>  static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
>  {
> @@ -636,16 +637,18 @@ static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
>  }
>  
>  /* TX */
> -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
> +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
>  {
>      VirtQueueElement elem;
> +    int32_t num_packets = 0;
>  
> -    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> -        return;
> +    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +        return num_packets;
> +    }
>  
>      if (n->async_tx.elem.out_num) {
>          virtio_queue_set_notification(n->tx_vq, 0);
> -        return;
> +        return num_packets;
>      }
>  
>      while (virtqueue_pop(vq, &elem)) {
> @@ -682,14 +685,19 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
>              virtio_queue_set_notification(n->tx_vq, 0);
>              n->async_tx.elem = elem;
>              n->async_tx.len  = len;
> -            return;
> +            return -EBUSY;
>          }
>  
>          len += ret;
>  
>          virtqueue_push(vq, &elem, len);
>          virtio_notify(&n->vdev, vq);
> +
> +        if (++num_packets >= n->tx_burst) {
> +            break;
> +        }
>      }
> +    return num_packets;
>  }
>  
>  static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> @@ -905,7 +913,7 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason)
>  }
>  
>  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> -                              uint32_t txtimer)
> +                              uint32_t txtimer, int32_t txburst)
>  {
>      VirtIONet *n;
>  
> @@ -942,6 +950,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>              n->tx_timeout = txtimer;
>          }
>      }
> +    n->tx_burst = txburst;
>      n->mergeable_rx_bufs = 0;
>      n->promisc = 1; /* for compatibility */
>  
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index e3b9897..e025c09 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -109,6 +109,8 @@ typedef struct {
>      uint32_t max_virtserial_ports;
>      /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
>      uint32_t txtimer;
> +    /* Max tx packets for virtio-net to burst at a time */
> +    int32_t txburst;
>  } VirtIOPCIProxy;
>  
>  /* virtio device */
> @@ -615,7 +617,8 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
>      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
>      VirtIODevice *vdev;
>  
> -    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, proxy->txtimer);
> +    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic,
> +                           proxy->txtimer, proxy->txburst);
>  
>      vdev->nvectors = proxy->nvectors;
>      virtio_init_pci(proxy, vdev,
> @@ -693,6 +696,7 @@ static PCIDeviceInfo virtio_info[] = {
>              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
>              DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
> +            DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
>              DEFINE_PROP_END_OF_LIST(),
>          },
>          .qdev.reset = virtio_pci_reset,
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 77d05e0..4051889 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -186,7 +186,7 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
>  /* Base devices.  */
>  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
>  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> -                              uint32_t txtimer);
> +                              uint32_t txtimer, int32_t txburst);
>  VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
>  VirtIODevice *virtio_balloon_init(DeviceState *dev);
>  #ifdef CONFIG_LINUX
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread
  2010-08-27 22:37 ` [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread Alex Williamson
@ 2010-08-31 20:25   ` Michael S. Tsirkin
  2010-08-31 22:32     ` Alex Williamson
  0 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-08-31 20:25 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Fri, Aug 27, 2010 at 04:37:45PM -0600, Alex Williamson wrote:
> The bottom half handler shows big improvements over the timer
> with few downsides, default to it when the iothread is enabled.
> 
> Using the following tests, with the guest and host connected
> via tap+bridge:
> 
> guest> netperf -t TCP_STREAM -H $HOST
> host> netperf -t TCP_STREAM -H $GUEST
> guest> netperf -t UDP_STREAM -H $HOST
> host> netperf -t UDP_STREAM -H $GUEST
> guest> netperf -t TCP_RR -H $HOST
> 
> Results: base throughput, exits/throughput ->
>                    patched throughput, exits/throughput
> 
> --enable-io-thread
> TCP guest->host 2737.77, 47.82  -> 6767.09, 29.15 = 247%, 61%
> TCP host->guest 2231.33, 74.00  -> 4125.80, 67.61 = 185%, 91%
> UDP guest->host 6281.68, 14.66  -> 12569.27, 1.98 = 200%, 14%
> UDP host->guest 275.91,  289.22 -> 264.80, 293.53 = 96%, 101%
> interations/s   1949.65, 82.97  -> 7417.56, 84.31 = 380%, 102%
> 
> No --enable-io-thread
> TCP guest->host 3041.57, 55.11 -> 1038.93, 517.57 = 34%, 939%
> TCP host->guest 2416.03, 76.67 -> 5655.92, 55.52  = 234%, 72%
> UDP guest->host 12255.82, 6.11 -> 7775.87, 31.32  = 63%, 513%
> UDP host->guest 587.92, 245.95 -> 611.88, 239.92  = 104%, 98%
> interations/s   1975.59, 83.21 -> 8935.50, 88.18  = 452%, 106%
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

parameter having different settings based on config
options might surprise some users. I don't think
we really need a parameter here ...

> ---
> 
>  hw/s390-virtio-bus.c |    3 ++-
>  hw/syborg_virtio.c   |    3 ++-
>  hw/virtio-pci.c      |    3 ++-
>  hw/virtio.h          |    6 ++++++
>  4 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
> index 1483362..985f99a 100644
> --- a/hw/s390-virtio-bus.c
> +++ b/hw/s390-virtio-bus.c
> @@ -328,7 +328,8 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
>      .qdev.size = sizeof(VirtIOS390Device),
>      .qdev.props = (Property[]) {
>          DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
> -        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
> +        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer,
> +                           TXTIMER_DEFAULT),
>          DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
>          DEFINE_PROP_END_OF_LIST(),
>      },
> diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
> index 7b76972..ee5746d 100644
> --- a/hw/syborg_virtio.c
> +++ b/hw/syborg_virtio.c
> @@ -300,7 +300,8 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
>      .qdev.props = (Property[]) {
>          DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
>          DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
> -        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
> +        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer,
> +                           TXTIMER_DEFAULT),
>          DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
>          DEFINE_PROP_END_OF_LIST(),
>      }
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index e025c09..9740f57 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -695,7 +695,8 @@ static PCIDeviceInfo virtio_info[] = {
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
>              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
> -            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
> +            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer,
> +                               TXTIMER_DEFAULT),
>              DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
>              DEFINE_PROP_END_OF_LIST(),
>          },
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 4051889..a1a17a2 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -183,6 +183,12 @@ void virtio_update_irq(VirtIODevice *vdev);
>  void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
>                          void *opaque);
>  
> +#ifdef CONFIG_IOTHREAD
> + #define TXTIMER_DEFAULT 0
> +#else
> + #define TXTIMER_DEFAULT 1
> +#endif
> +

Add a comment explaning that this is just a performance optimization?

>  /* Base devices.  */
>  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
>  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] virtio-net: More configurability and bh handling for tx
  2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
                   ` (5 preceding siblings ...)
  2010-08-31 20:20 ` [PATCH 0/5] virtio-net: More configurability and bh handling for tx Michael S. Tsirkin
@ 2010-08-31 20:27 ` Michael S. Tsirkin
  2010-08-31 21:33   ` Anthony Liguori
  6 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-08-31 20:27 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen, kraxel

On Fri, Aug 27, 2010 at 04:36:59PM -0600, Alex Williamson wrote:
> Add the ability to configure the tx_timer timeout and add a bottom
> half tx handler that typically shows a nice perf boost over the
> time based approach.  See last patch for perf details.  Make this
> the new default when the iothread is enabled.  Thanks,
> 
> Alex

As a further thought, maybe it would help to have a
separate namespace for unsupported, developer-only parameters.
Thoughts?

> ---
> 
> Alex Williamson (5):
>       virtio-net: Switch default to new bottom half TX handler for iothread
>       virtio-net: Introduce a new bottom half packet TX
>       virtio-net: Rename tx_timer_active to tx_waiting
>       virtio-net: Limit number of packets sent per TX flush
>       virtio-net: Make tx_timer timeout configurable
> 
> 
>  hw/s390-virtio-bus.c |    6 ++
>  hw/s390-virtio-bus.h |    4 ++
>  hw/syborg_virtio.c   |   10 +++-
>  hw/virtio-net.c      |  129 ++++++++++++++++++++++++++++++++++++++++----------
>  hw/virtio-net.h      |    2 -
>  hw/virtio-pci.c      |   10 +++-
>  hw/virtio.h          |    9 +++
>  7 files changed, 137 insertions(+), 33 deletions(-)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX
  2010-08-31 20:14   ` Michael S. Tsirkin
@ 2010-08-31 20:33     ` Alex Williamson
  2010-09-01  9:47       ` Michael S. Tsirkin
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-31 20:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Tue, 2010-08-31 at 23:14 +0300, Michael S. Tsirkin wrote:
> On Fri, Aug 27, 2010 at 04:37:36PM -0600, Alex Williamson wrote:
> > Based on a patch from Mark McLoughlin, this patch introduces a new
> > bottom half packet transmitter that avoids the latency imposed by
> > the tx_timer approach.  Rather than scheduling a timer when a TX
> > packet comes in, schedule a bottom half to be run from the iothread.
> > The bottom half handler first attempts to flush the queue with
> > notification disabled (this is where we could race with a guest
> > without txburst).  If we flush a full burst, reschedule immediately.
> > If we send short of a full burst, try to re-enable notification.
> > To avoid a race with TXs that may have occurred, we must then
> > flush again.  If we find some packets to send, the guest it probably
> > active, so we can reschedule again.
> > 
> > tx_timer and tx_bh are mutually exclusive, so we can re-use the
> > tx_waiting flag to indicate one or the other needs to be setup.
> > This allows us to seamlessly migrate between timer and bh TX
> > handling.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> > 
> >  hw/virtio-net.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++---------
> >  1 files changed, 68 insertions(+), 13 deletions(-)
> > 
> > diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> > index 8b652f2..3288c77 100644
> > --- a/hw/virtio-net.c
> > +++ b/hw/virtio-net.c
> > @@ -36,6 +36,7 @@ typedef struct VirtIONet
> >      VirtQueue *ctrl_vq;
> >      NICState *nic;
> >      QEMUTimer *tx_timer;
> > +    QEMUBH *tx_bh;
> >      uint32_t tx_timeout;
> >      int32_t tx_burst;
> >      int tx_waiting;
> > @@ -704,16 +705,25 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> >  {
> >      VirtIONet *n = to_virtio_net(vdev);
> >  
> > -    if (n->tx_waiting) {
> > -        virtio_queue_set_notification(vq, 1);
> > -        qemu_del_timer(n->tx_timer);
> > -        n->tx_waiting = 0;
> > -        virtio_net_flush_tx(n, vq);
> > +    if (n->tx_timer) {
> > +        if (n->tx_waiting) {
> > +            virtio_queue_set_notification(vq, 1);
> > +            qemu_del_timer(n->tx_timer);
> > +            n->tx_waiting = 0;
> > +            virtio_net_flush_tx(n, vq);
> > +        } else {
> > +            qemu_mod_timer(n->tx_timer,
> > +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> > +            n->tx_waiting = 1;
> > +            virtio_queue_set_notification(vq, 0);
> > +        }
> >      } else {
> > -        qemu_mod_timer(n->tx_timer,
> > -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> > +        if (unlikely(n->tx_waiting)) {
> > +            return;
> > +        }
> > +        virtio_queue_set_notification(n->tx_vq, 0);
> > +        qemu_bh_schedule(n->tx_bh);
> >          n->tx_waiting = 1;
> > -        virtio_queue_set_notification(vq, 0);
> >      }
> >  }
> >  
> > @@ -731,6 +741,41 @@ static void virtio_net_tx_timer(void *opaque)
> >      virtio_net_flush_tx(n, n->tx_vq);
> >  }
> >  
> > +static void virtio_net_tx_bh(void *opaque)
> > +{
> > +    VirtIONet *n = opaque;
> > +    int32_t ret;
> > +
> > +    n->tx_waiting = 0;
> > +
> > +    /* Just in case the driver is not ready on more */
> > +    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
> > +        return;
> > +
> > +    ret = virtio_net_flush_tx(n, n->tx_vq);
> > +    if (ret == -EBUSY) {
> > +        return; /* Notification re-enable handled by tx_complete */
> > +    }
> > +
> > +    /* If we flush a full burst of packets, assume there are
> > +     * more coming and immediately reschedule */
> > +    if (ret >= n->tx_burst) {
> > +        qemu_bh_schedule(n->tx_bh);
> > +        n->tx_waiting = 1;
> > +        return;
> > +    }
> > +
> > +    /* If less than a full burst, re-enable notification and flush
> > +     * anything that may have come in while we weren't looking.  If
> > +     * we find something, assume the guest is still active and reschedule */
> > +    virtio_queue_set_notification(n->tx_vq, 1);
> > +    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
> 
> Shouldn't this be virtio_net_flush_tx(n, n->tx_vq) >= n->tx_burst?
> If we get less than tx_burst, the ring is empty now so no need to
> reschedule.
> Right?

I suppose it depends on how aggressive we want to be.  If the guest put
something on the queue between the first flush and this one, then it
might be actively transmitting, and if we want to optimize latency, we
anticipate that it might continue to transmit and re-schedule.  This is
taken straight from markmc's rhel5 patch.  I wouldn't argue that it's
wrong to not reschedule here, but it's clearly less aggressive.  Thanks,

Alex

> > +        virtio_queue_set_notification(n->tx_vq, 0);
> > +        qemu_bh_schedule(n->tx_bh);
> > +        n->tx_waiting = 1;
> > +    }
> > +}
> > +
> >  static void virtio_net_save(QEMUFile *f, void *opaque)
> >  {
> >      VirtIONet *n = opaque;
> > @@ -850,8 +895,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
> >      n->mac_table.first_multi = i;
> >  
> >      if (n->tx_waiting) {
> > -        qemu_mod_timer(n->tx_timer,
> > -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> > +        if (n->tx_timer) {
> > +            qemu_mod_timer(n->tx_timer,
> > +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> > +        } else {
> > +            qemu_bh_schedule(n->tx_bh);
> > +        }
> >      }
> >      return 0;
> >  }
> > @@ -939,9 +988,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> >  
> >      qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
> >  
> > -    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
> >      n->tx_waiting = 0;
> >      if (txtimer) {
> > +        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
> >          if (txtimer == 1) {
> >              /* For convenience, 1 = "on" = predefined default, anything else
> >               * specifies and actual timeout value */
> > @@ -949,6 +998,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> >          } else {
> >              n->tx_timeout = txtimer;
> >          }
> > +    } else {
> > +        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
> >      }
> >      n->tx_burst = txburst;
> >      n->mergeable_rx_bufs = 0;
> > @@ -982,8 +1033,12 @@ void virtio_net_exit(VirtIODevice *vdev)
> >      qemu_free(n->mac_table.macs);
> >      qemu_free(n->vlans);
> >  
> > -    qemu_del_timer(n->tx_timer);
> > -    qemu_free_timer(n->tx_timer);
> > +    if (n->tx_timer) {
> > +        qemu_del_timer(n->tx_timer);
> > +        qemu_free_timer(n->tx_timer);
> > +    } else {
> > +        qemu_bh_delete(n->tx_bh);
> > +    }
> >  
> >      virtio_cleanup(&n->vdev);
> >      qemu_del_vlan_client(&n->nic->nc);
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html




^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] virtio-net: More configurability and bh handling for tx
  2010-08-31 20:27 ` Michael S. Tsirkin
@ 2010-08-31 21:33   ` Anthony Liguori
  2010-08-31 22:26     ` Alex Williamson
  0 siblings, 1 reply; 23+ messages in thread
From: Anthony Liguori @ 2010-08-31 21:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, qemu-devel, kvm, jes.sorensen, kraxel

On 08/31/2010 03:27 PM, Michael S. Tsirkin wrote:
> On Fri, Aug 27, 2010 at 04:36:59PM -0600, Alex Williamson wrote:
>    
>> Add the ability to configure the tx_timer timeout and add a bottom
>> half tx handler that typically shows a nice perf boost over the
>> time based approach.  See last patch for perf details.  Make this
>> the new default when the iothread is enabled.  Thanks,
>>
>> Alex
>>      
> As a further thought, maybe it would help to have a
> separate namespace for unsupported, developer-only parameters.
> Thoughts?
>    

We already have an undocumented one, just prefix with 'x-' to indicate 
that it's experimental.

Regards,

Anthony Liguori

>    
>> ---
>>
>> Alex Williamson (5):
>>        virtio-net: Switch default to new bottom half TX handler for iothread
>>        virtio-net: Introduce a new bottom half packet TX
>>        virtio-net: Rename tx_timer_active to tx_waiting
>>        virtio-net: Limit number of packets sent per TX flush
>>        virtio-net: Make tx_timer timeout configurable
>>
>>
>>   hw/s390-virtio-bus.c |    6 ++
>>   hw/s390-virtio-bus.h |    4 ++
>>   hw/syborg_virtio.c   |   10 +++-
>>   hw/virtio-net.c      |  129 ++++++++++++++++++++++++++++++++++++++++----------
>>   hw/virtio-net.h      |    2 -
>>   hw/virtio-pci.c      |   10 +++-
>>   hw/virtio.h          |    9 +++
>>   7 files changed, 137 insertions(+), 33 deletions(-)
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>      


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] virtio-net: More configurability and bh handling for tx
  2010-08-31 21:33   ` Anthony Liguori
@ 2010-08-31 22:26     ` Alex Williamson
  2010-09-01 10:40       ` Michael S. Tsirkin
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Williamson @ 2010-08-31 22:26 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Michael S. Tsirkin, qemu-devel, kvm, jes.sorensen, kraxel

On Tue, 2010-08-31 at 16:33 -0500, Anthony Liguori wrote:
> On 08/31/2010 03:27 PM, Michael S. Tsirkin wrote:
> > On Fri, Aug 27, 2010 at 04:36:59PM -0600, Alex Williamson wrote:
> >    
> >> Add the ability to configure the tx_timer timeout and add a bottom
> >> half tx handler that typically shows a nice perf boost over the
> >> time based approach.  See last patch for perf details.  Make this
> >> the new default when the iothread is enabled.  Thanks,
> >>
> >> Alex
> >>      
> > As a further thought, maybe it would help to have a
> > separate namespace for unsupported, developer-only parameters.
> > Thoughts?
> >    
> 
> We already have an undocumented one, just prefix with 'x-' to indicate 
> that it's experimental.

Any objection then to x-txburst and x-txtimer then?  Easy enough to
rename.  Thanks,

Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread
  2010-08-31 20:25   ` Michael S. Tsirkin
@ 2010-08-31 22:32     ` Alex Williamson
  2010-08-31 22:46       ` Anthony Liguori
                         ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Alex Williamson @ 2010-08-31 22:32 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Tue, 2010-08-31 at 23:25 +0300, Michael S. Tsirkin wrote:
> On Fri, Aug 27, 2010 at 04:37:45PM -0600, Alex Williamson wrote:
> > The bottom half handler shows big improvements over the timer
> > with few downsides, default to it when the iothread is enabled.
> > 
> > Using the following tests, with the guest and host connected
> > via tap+bridge:
> > 
> > guest> netperf -t TCP_STREAM -H $HOST
> > host> netperf -t TCP_STREAM -H $GUEST
> > guest> netperf -t UDP_STREAM -H $HOST
> > host> netperf -t UDP_STREAM -H $GUEST
> > guest> netperf -t TCP_RR -H $HOST
> > 
> > Results: base throughput, exits/throughput ->
> >                    patched throughput, exits/throughput
> > 
> > --enable-io-thread
> > TCP guest->host 2737.77, 47.82  -> 6767.09, 29.15 = 247%, 61%
> > TCP host->guest 2231.33, 74.00  -> 4125.80, 67.61 = 185%, 91%
> > UDP guest->host 6281.68, 14.66  -> 12569.27, 1.98 = 200%, 14%
> > UDP host->guest 275.91,  289.22 -> 264.80, 293.53 = 96%, 101%
> > interations/s   1949.65, 82.97  -> 7417.56, 84.31 = 380%, 102%
> > 
> > No --enable-io-thread
> > TCP guest->host 3041.57, 55.11 -> 1038.93, 517.57 = 34%, 939%
> > TCP host->guest 2416.03, 76.67 -> 5655.92, 55.52  = 234%, 72%
> > UDP guest->host 12255.82, 6.11 -> 7775.87, 31.32  = 63%, 513%
> > UDP host->guest 587.92, 245.95 -> 611.88, 239.92  = 104%, 98%
> > interations/s   1975.59, 83.21 -> 8935.50, 88.18  = 452%, 106%
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> 
> parameter having different settings based on config
> options might surprise some users. I don't think
> we really need a parameter here ...

I'm not a bit fan of this either, but I'd also prefer not to introduce a
regression for a performance difference we know about in advance.  It
gets even more complicated when we factor in qemu-kvm, as it doesn't
build with iothread enabled, but seems to get and even better boost in
performance across the board thanks largely to the kvm-irqchip.  Should
we instead make this a configure option?  --enable-virtio-net-txbh?
Thanks,

Alex

> > ---
> > 
> >  hw/s390-virtio-bus.c |    3 ++-
> >  hw/syborg_virtio.c   |    3 ++-
> >  hw/virtio-pci.c      |    3 ++-
> >  hw/virtio.h          |    6 ++++++
> >  4 files changed, 12 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
> > index 1483362..985f99a 100644
> > --- a/hw/s390-virtio-bus.c
> > +++ b/hw/s390-virtio-bus.c
> > @@ -328,7 +328,8 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
> >      .qdev.size = sizeof(VirtIOS390Device),
> >      .qdev.props = (Property[]) {
> >          DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
> > -        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
> > +        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer,
> > +                           TXTIMER_DEFAULT),
> >          DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
> >          DEFINE_PROP_END_OF_LIST(),
> >      },
> > diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
> > index 7b76972..ee5746d 100644
> > --- a/hw/syborg_virtio.c
> > +++ b/hw/syborg_virtio.c
> > @@ -300,7 +300,8 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
> >      .qdev.props = (Property[]) {
> >          DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
> >          DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
> > -        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
> > +        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer,
> > +                           TXTIMER_DEFAULT),
> >          DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
> >          DEFINE_PROP_END_OF_LIST(),
> >      }
> > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> > index e025c09..9740f57 100644
> > --- a/hw/virtio-pci.c
> > +++ b/hw/virtio-pci.c
> > @@ -695,7 +695,8 @@ static PCIDeviceInfo virtio_info[] = {
> >              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
> >              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
> >              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
> > -            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
> > +            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer,
> > +                               TXTIMER_DEFAULT),
> >              DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
> >              DEFINE_PROP_END_OF_LIST(),
> >          },
> > diff --git a/hw/virtio.h b/hw/virtio.h
> > index 4051889..a1a17a2 100644
> > --- a/hw/virtio.h
> > +++ b/hw/virtio.h
> > @@ -183,6 +183,12 @@ void virtio_update_irq(VirtIODevice *vdev);
> >  void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
> >                          void *opaque);
> >  
> > +#ifdef CONFIG_IOTHREAD
> > + #define TXTIMER_DEFAULT 0
> > +#else
> > + #define TXTIMER_DEFAULT 1
> > +#endif
> > +
> 
> Add a comment explaning that this is just a performance optimization?
> 
> >  /* Base devices.  */
> >  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
> >  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html




^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread
  2010-08-31 22:32     ` Alex Williamson
@ 2010-08-31 22:46       ` Anthony Liguori
  2010-09-01  6:21       ` Stefan Hajnoczi
  2010-09-01  8:47       ` Michael S. Tsirkin
  2 siblings, 0 replies; 23+ messages in thread
From: Anthony Liguori @ 2010-08-31 22:46 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Michael S. Tsirkin, qemu-devel, kvm, jes.sorensen

On 08/31/2010 05:32 PM, Alex Williamson wrote:
> On Tue, 2010-08-31 at 23:25 +0300, Michael S. Tsirkin wrote:
>    
>> On Fri, Aug 27, 2010 at 04:37:45PM -0600, Alex Williamson wrote:
>>      
>>> The bottom half handler shows big improvements over the timer
>>> with few downsides, default to it when the iothread is enabled.
>>>
>>> Using the following tests, with the guest and host connected
>>> via tap+bridge:
>>>
>>> guest>  netperf -t TCP_STREAM -H $HOST
>>> host>  netperf -t TCP_STREAM -H $GUEST
>>> guest>  netperf -t UDP_STREAM -H $HOST
>>> host>  netperf -t UDP_STREAM -H $GUEST
>>> guest>  netperf -t TCP_RR -H $HOST
>>>
>>> Results: base throughput, exits/throughput ->
>>>                     patched throughput, exits/throughput
>>>
>>> --enable-io-thread
>>> TCP guest->host 2737.77, 47.82  ->  6767.09, 29.15 = 247%, 61%
>>> TCP host->guest 2231.33, 74.00  ->  4125.80, 67.61 = 185%, 91%
>>> UDP guest->host 6281.68, 14.66  ->  12569.27, 1.98 = 200%, 14%
>>> UDP host->guest 275.91,  289.22 ->  264.80, 293.53 = 96%, 101%
>>> interations/s   1949.65, 82.97  ->  7417.56, 84.31 = 380%, 102%
>>>
>>> No --enable-io-thread
>>> TCP guest->host 3041.57, 55.11 ->  1038.93, 517.57 = 34%, 939%
>>> TCP host->guest 2416.03, 76.67 ->  5655.92, 55.52  = 234%, 72%
>>> UDP guest->host 12255.82, 6.11 ->  7775.87, 31.32  = 63%, 513%
>>> UDP host->guest 587.92, 245.95 ->  611.88, 239.92  = 104%, 98%
>>> interations/s   1975.59, 83.21 ->  8935.50, 88.18  = 452%, 106%
>>>
>>> Signed-off-by: Alex Williamson<alex.williamson@redhat.com>
>>>        
>> parameter having different settings based on config
>> options might surprise some users. I don't think
>> we really need a parameter here ...
>>      
> I'm not a bit fan of this either, but I'd also prefer not to introduce a
> regression for a performance difference we know about in advance.  It
> gets even more complicated when we factor in qemu-kvm, as it doesn't
> build with iothread enabled, but seems to get and even better boost in
> performance across the board thanks largely to the kvm-irqchip.  Should
> we instead make this a configure option?  --enable-virtio-net-txbh?
>    

No, at this stage, we should ignore no --enable-io-thread with -enable-kvm.

Regards,

Anthony Liguori

> Thanks,
>
> Alex
>
>    
>>> ---
>>>
>>>   hw/s390-virtio-bus.c |    3 ++-
>>>   hw/syborg_virtio.c   |    3 ++-
>>>   hw/virtio-pci.c      |    3 ++-
>>>   hw/virtio.h          |    6 ++++++
>>>   4 files changed, 12 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
>>> index 1483362..985f99a 100644
>>> --- a/hw/s390-virtio-bus.c
>>> +++ b/hw/s390-virtio-bus.c
>>> @@ -328,7 +328,8 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
>>>       .qdev.size = sizeof(VirtIOS390Device),
>>>       .qdev.props = (Property[]) {
>>>           DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
>>> -        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
>>> +        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer,
>>> +                           TXTIMER_DEFAULT),
>>>           DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
>>>           DEFINE_PROP_END_OF_LIST(),
>>>       },
>>> diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
>>> index 7b76972..ee5746d 100644
>>> --- a/hw/syborg_virtio.c
>>> +++ b/hw/syborg_virtio.c
>>> @@ -300,7 +300,8 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
>>>       .qdev.props = (Property[]) {
>>>           DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
>>>           DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
>>> -        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
>>> +        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer,
>>> +                           TXTIMER_DEFAULT),
>>>           DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
>>>           DEFINE_PROP_END_OF_LIST(),
>>>       }
>>> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
>>> index e025c09..9740f57 100644
>>> --- a/hw/virtio-pci.c
>>> +++ b/hw/virtio-pci.c
>>> @@ -695,7 +695,8 @@ static PCIDeviceInfo virtio_info[] = {
>>>               DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
>>>               DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>>>               DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
>>> -            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
>>> +            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer,
>>> +                               TXTIMER_DEFAULT),
>>>               DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
>>>               DEFINE_PROP_END_OF_LIST(),
>>>           },
>>> diff --git a/hw/virtio.h b/hw/virtio.h
>>> index 4051889..a1a17a2 100644
>>> --- a/hw/virtio.h
>>> +++ b/hw/virtio.h
>>> @@ -183,6 +183,12 @@ void virtio_update_irq(VirtIODevice *vdev);
>>>   void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
>>>                           void *opaque);
>>>
>>> +#ifdef CONFIG_IOTHREAD
>>> + #define TXTIMER_DEFAULT 0
>>> +#else
>>> + #define TXTIMER_DEFAULT 1
>>> +#endif
>>> +
>>>        
>> Add a comment explaning that this is just a performance optimization?
>>
>>      
>>>   /* Base devices.  */
>>>   VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
>>>   VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>        
>
>
>    


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread
  2010-08-31 22:32     ` Alex Williamson
  2010-08-31 22:46       ` Anthony Liguori
@ 2010-09-01  6:21       ` Stefan Hajnoczi
  2010-09-01  8:47       ` Michael S. Tsirkin
  2 siblings, 0 replies; 23+ messages in thread
From: Stefan Hajnoczi @ 2010-09-01  6:21 UTC (permalink / raw)
  To: Alex Williamson
  Cc: Michael S. Tsirkin, qemu-devel, kvm, anthony, jes.sorensen

On Tue, Aug 31, 2010 at 11:32 PM, Alex Williamson
<alex.williamson@redhat.com> wrote:
> On Tue, 2010-08-31 at 23:25 +0300, Michael S. Tsirkin wrote:
>> On Fri, Aug 27, 2010 at 04:37:45PM -0600, Alex Williamson wrote:
>> > The bottom half handler shows big improvements over the timer
>> > with few downsides, default to it when the iothread is enabled.
>> >
>> > Using the following tests, with the guest and host connected
>> > via tap+bridge:
>> >
>> > guest> netperf -t TCP_STREAM -H $HOST
>> > host> netperf -t TCP_STREAM -H $GUEST
>> > guest> netperf -t UDP_STREAM -H $HOST
>> > host> netperf -t UDP_STREAM -H $GUEST
>> > guest> netperf -t TCP_RR -H $HOST
>> >
>> > Results: base throughput, exits/throughput ->
>> >                    patched throughput, exits/throughput
>> >
>> > --enable-io-thread
>> > TCP guest->host 2737.77, 47.82  -> 6767.09, 29.15 = 247%, 61%
>> > TCP host->guest 2231.33, 74.00  -> 4125.80, 67.61 = 185%, 91%
>> > UDP guest->host 6281.68, 14.66  -> 12569.27, 1.98 = 200%, 14%
>> > UDP host->guest 275.91,  289.22 -> 264.80, 293.53 = 96%, 101%
>> > interations/s   1949.65, 82.97  -> 7417.56, 84.31 = 380%, 102%
>> >
>> > No --enable-io-thread
>> > TCP guest->host 3041.57, 55.11 -> 1038.93, 517.57 = 34%, 939%
>> > TCP host->guest 2416.03, 76.67 -> 5655.92, 55.52  = 234%, 72%
>> > UDP guest->host 12255.82, 6.11 -> 7775.87, 31.32  = 63%, 513%
>> > UDP host->guest 587.92, 245.95 -> 611.88, 239.92  = 104%, 98%
>> > interations/s   1975.59, 83.21 -> 8935.50, 88.18  = 452%, 106%
>> >
>> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>
>> parameter having different settings based on config
>> options might surprise some users. I don't think
>> we really need a parameter here ...
>
> I'm not a bit fan of this either, but I'd also prefer not to introduce a
> regression for a performance difference we know about in advance.  It
> gets even more complicated when we factor in qemu-kvm, as it doesn't
> build with iothread enabled, but seems to get and even better boost in
> performance across the board thanks largely to the kvm-irqchip.  Should
> we instead make this a configure option?  --enable-virtio-net-txbh?
> Thanks,
>
> Alex

qemu-kvm uses its own iothread implementation by default.  It doesn't
need --enable-io-thread because it already uses a similar model.

Stefan

>
>> > ---
>> >
>> >  hw/s390-virtio-bus.c |    3 ++-
>> >  hw/syborg_virtio.c   |    3 ++-
>> >  hw/virtio-pci.c      |    3 ++-
>> >  hw/virtio.h          |    6 ++++++
>> >  4 files changed, 12 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
>> > index 1483362..985f99a 100644
>> > --- a/hw/s390-virtio-bus.c
>> > +++ b/hw/s390-virtio-bus.c
>> > @@ -328,7 +328,8 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
>> >      .qdev.size = sizeof(VirtIOS390Device),
>> >      .qdev.props = (Property[]) {
>> >          DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
>> > -        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
>> > +        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer,
>> > +                           TXTIMER_DEFAULT),
>> >          DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
>> >          DEFINE_PROP_END_OF_LIST(),
>> >      },
>> > diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
>> > index 7b76972..ee5746d 100644
>> > --- a/hw/syborg_virtio.c
>> > +++ b/hw/syborg_virtio.c
>> > @@ -300,7 +300,8 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
>> >      .qdev.props = (Property[]) {
>> >          DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
>> >          DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
>> > -        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
>> > +        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer,
>> > +                           TXTIMER_DEFAULT),
>> >          DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
>> >          DEFINE_PROP_END_OF_LIST(),
>> >      }
>> > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
>> > index e025c09..9740f57 100644
>> > --- a/hw/virtio-pci.c
>> > +++ b/hw/virtio-pci.c
>> > @@ -695,7 +695,8 @@ static PCIDeviceInfo virtio_info[] = {
>> >              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
>> >              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>> >              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
>> > -            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
>> > +            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer,
>> > +                               TXTIMER_DEFAULT),
>> >              DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
>> >              DEFINE_PROP_END_OF_LIST(),
>> >          },
>> > diff --git a/hw/virtio.h b/hw/virtio.h
>> > index 4051889..a1a17a2 100644
>> > --- a/hw/virtio.h
>> > +++ b/hw/virtio.h
>> > @@ -183,6 +183,12 @@ void virtio_update_irq(VirtIODevice *vdev);
>> >  void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
>> >                          void *opaque);
>> >
>> > +#ifdef CONFIG_IOTHREAD
>> > + #define TXTIMER_DEFAULT 0
>> > +#else
>> > + #define TXTIMER_DEFAULT 1
>> > +#endif
>> > +
>>
>> Add a comment explaning that this is just a performance optimization?
>>
>> >  /* Base devices.  */
>> >  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
>> >  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe kvm" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread
  2010-08-31 22:32     ` Alex Williamson
  2010-08-31 22:46       ` Anthony Liguori
  2010-09-01  6:21       ` Stefan Hajnoczi
@ 2010-09-01  8:47       ` Michael S. Tsirkin
  2 siblings, 0 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-09-01  8:47 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Tue, Aug 31, 2010 at 04:32:54PM -0600, Alex Williamson wrote:
> On Tue, 2010-08-31 at 23:25 +0300, Michael S. Tsirkin wrote:
> > On Fri, Aug 27, 2010 at 04:37:45PM -0600, Alex Williamson wrote:
> > > The bottom half handler shows big improvements over the timer
> > > with few downsides, default to it when the iothread is enabled.
> > > 
> > > Using the following tests, with the guest and host connected
> > > via tap+bridge:
> > > 
> > > guest> netperf -t TCP_STREAM -H $HOST
> > > host> netperf -t TCP_STREAM -H $GUEST
> > > guest> netperf -t UDP_STREAM -H $HOST
> > > host> netperf -t UDP_STREAM -H $GUEST
> > > guest> netperf -t TCP_RR -H $HOST
> > > 
> > > Results: base throughput, exits/throughput ->
> > >                    patched throughput, exits/throughput
> > > 
> > > --enable-io-thread
> > > TCP guest->host 2737.77, 47.82  -> 6767.09, 29.15 = 247%, 61%
> > > TCP host->guest 2231.33, 74.00  -> 4125.80, 67.61 = 185%, 91%
> > > UDP guest->host 6281.68, 14.66  -> 12569.27, 1.98 = 200%, 14%
> > > UDP host->guest 275.91,  289.22 -> 264.80, 293.53 = 96%, 101%
> > > interations/s   1949.65, 82.97  -> 7417.56, 84.31 = 380%, 102%
> > > 
> > > No --enable-io-thread
> > > TCP guest->host 3041.57, 55.11 -> 1038.93, 517.57 = 34%, 939%
> > > TCP host->guest 2416.03, 76.67 -> 5655.92, 55.52  = 234%, 72%
> > > UDP guest->host 12255.82, 6.11 -> 7775.87, 31.32  = 63%, 513%
> > > UDP host->guest 587.92, 245.95 -> 611.88, 239.92  = 104%, 98%
> > > interations/s   1975.59, 83.21 -> 8935.50, 88.18  = 452%, 106%
> > > 
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > 
> > parameter having different settings based on config
> > options might surprise some users. I don't think
> > we really need a parameter here ...
> 
> I'm not a bit fan of this either, but I'd also prefer not to introduce a
> regression for a performance difference we know about in advance.

I agree. The comment was mainly about a parameter. If we make it a hidden
parameter (mainbe with x- prefix), this won't be an issue.

> It gets even more complicated when we factor in qemu-kvm, as it doesn't
> build with iothread enabled, but seems to get and even better boost in
> performance across the board thanks largely to the kvm-irqchip.  Should
> we instead make this a configure option?  --enable-virtio-net-txbh?

That'll work too.

> Thanks,
> 
> Alex
> 
> > > ---
> > > 
> > >  hw/s390-virtio-bus.c |    3 ++-
> > >  hw/syborg_virtio.c   |    3 ++-
> > >  hw/virtio-pci.c      |    3 ++-
> > >  hw/virtio.h          |    6 ++++++
> > >  4 files changed, 12 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
> > > index 1483362..985f99a 100644
> > > --- a/hw/s390-virtio-bus.c
> > > +++ b/hw/s390-virtio-bus.c
> > > @@ -328,7 +328,8 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
> > >      .qdev.size = sizeof(VirtIOS390Device),
> > >      .qdev.props = (Property[]) {
> > >          DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
> > > -        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
> > > +        DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer,
> > > +                           TXTIMER_DEFAULT),
> > >          DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
> > >          DEFINE_PROP_END_OF_LIST(),
> > >      },
> > > diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
> > > index 7b76972..ee5746d 100644
> > > --- a/hw/syborg_virtio.c
> > > +++ b/hw/syborg_virtio.c
> > > @@ -300,7 +300,8 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
> > >      .qdev.props = (Property[]) {
> > >          DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
> > >          DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
> > > -        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
> > > +        DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer,
> > > +                           TXTIMER_DEFAULT),
> > >          DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
> > >          DEFINE_PROP_END_OF_LIST(),
> > >      }
> > > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> > > index e025c09..9740f57 100644
> > > --- a/hw/virtio-pci.c
> > > +++ b/hw/virtio-pci.c
> > > @@ -695,7 +695,8 @@ static PCIDeviceInfo virtio_info[] = {
> > >              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
> > >              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
> > >              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
> > > -            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
> > > +            DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer,
> > > +                               TXTIMER_DEFAULT),
> > >              DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
> > >              DEFINE_PROP_END_OF_LIST(),
> > >          },
> > > diff --git a/hw/virtio.h b/hw/virtio.h
> > > index 4051889..a1a17a2 100644
> > > --- a/hw/virtio.h
> > > +++ b/hw/virtio.h
> > > @@ -183,6 +183,12 @@ void virtio_update_irq(VirtIODevice *vdev);
> > >  void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
> > >                          void *opaque);
> > >  
> > > +#ifdef CONFIG_IOTHREAD
> > > + #define TXTIMER_DEFAULT 0
> > > +#else
> > > + #define TXTIMER_DEFAULT 1
> > > +#endif
> > > +
> > 
> > Add a comment explaning that this is just a performance optimization?
> > 
> > >  /* Base devices.  */
> > >  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
> > >  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX
  2010-08-31 20:33     ` Alex Williamson
@ 2010-09-01  9:47       ` Michael S. Tsirkin
  0 siblings, 0 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-09-01  9:47 UTC (permalink / raw)
  To: Alex Williamson; +Cc: qemu-devel, kvm, anthony, jes.sorensen

On Tue, Aug 31, 2010 at 02:33:46PM -0600, Alex Williamson wrote:
> On Tue, 2010-08-31 at 23:14 +0300, Michael S. Tsirkin wrote:
> > On Fri, Aug 27, 2010 at 04:37:36PM -0600, Alex Williamson wrote:
> > > Based on a patch from Mark McLoughlin, this patch introduces a new
> > > bottom half packet transmitter that avoids the latency imposed by
> > > the tx_timer approach.  Rather than scheduling a timer when a TX
> > > packet comes in, schedule a bottom half to be run from the iothread.
> > > The bottom half handler first attempts to flush the queue with
> > > notification disabled (this is where we could race with a guest
> > > without txburst).  If we flush a full burst, reschedule immediately.
> > > If we send short of a full burst, try to re-enable notification.
> > > To avoid a race with TXs that may have occurred, we must then
> > > flush again.  If we find some packets to send, the guest it probably
> > > active, so we can reschedule again.
> > > 
> > > tx_timer and tx_bh are mutually exclusive, so we can re-use the
> > > tx_waiting flag to indicate one or the other needs to be setup.
> > > This allows us to seamlessly migrate between timer and bh TX
> > > handling.
> > > 
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > ---
> > > 
> > >  hw/virtio-net.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++---------
> > >  1 files changed, 68 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> > > index 8b652f2..3288c77 100644
> > > --- a/hw/virtio-net.c
> > > +++ b/hw/virtio-net.c
> > > @@ -36,6 +36,7 @@ typedef struct VirtIONet
> > >      VirtQueue *ctrl_vq;
> > >      NICState *nic;
> > >      QEMUTimer *tx_timer;
> > > +    QEMUBH *tx_bh;
> > >      uint32_t tx_timeout;
> > >      int32_t tx_burst;
> > >      int tx_waiting;
> > > @@ -704,16 +705,25 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> > >  {
> > >      VirtIONet *n = to_virtio_net(vdev);
> > >  
> > > -    if (n->tx_waiting) {
> > > -        virtio_queue_set_notification(vq, 1);
> > > -        qemu_del_timer(n->tx_timer);
> > > -        n->tx_waiting = 0;
> > > -        virtio_net_flush_tx(n, vq);
> > > +    if (n->tx_timer) {
> > > +        if (n->tx_waiting) {
> > > +            virtio_queue_set_notification(vq, 1);
> > > +            qemu_del_timer(n->tx_timer);
> > > +            n->tx_waiting = 0;
> > > +            virtio_net_flush_tx(n, vq);
> > > +        } else {
> > > +            qemu_mod_timer(n->tx_timer,
> > > +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> > > +            n->tx_waiting = 1;
> > > +            virtio_queue_set_notification(vq, 0);
> > > +        }
> > >      } else {
> > > -        qemu_mod_timer(n->tx_timer,
> > > -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> > > +        if (unlikely(n->tx_waiting)) {
> > > +            return;
> > > +        }
> > > +        virtio_queue_set_notification(n->tx_vq, 0);
> > > +        qemu_bh_schedule(n->tx_bh);
> > >          n->tx_waiting = 1;
> > > -        virtio_queue_set_notification(vq, 0);
> > >      }
> > >  }
> > >  
> > > @@ -731,6 +741,41 @@ static void virtio_net_tx_timer(void *opaque)
> > >      virtio_net_flush_tx(n, n->tx_vq);
> > >  }
> > >  
> > > +static void virtio_net_tx_bh(void *opaque)
> > > +{
> > > +    VirtIONet *n = opaque;
> > > +    int32_t ret;
> > > +
> > > +    n->tx_waiting = 0;
> > > +
> > > +    /* Just in case the driver is not ready on more */
> > > +    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
> > > +        return;
> > > +
> > > +    ret = virtio_net_flush_tx(n, n->tx_vq);
> > > +    if (ret == -EBUSY) {
> > > +        return; /* Notification re-enable handled by tx_complete */
> > > +    }
> > > +
> > > +    /* If we flush a full burst of packets, assume there are
> > > +     * more coming and immediately reschedule */
> > > +    if (ret >= n->tx_burst) {
> > > +        qemu_bh_schedule(n->tx_bh);
> > > +        n->tx_waiting = 1;
> > > +        return;
> > > +    }
> > > +
> > > +    /* If less than a full burst, re-enable notification and flush
> > > +     * anything that may have come in while we weren't looking.  If
> > > +     * we find something, assume the guest is still active and reschedule */
> > > +    virtio_queue_set_notification(n->tx_vq, 1);
> > > +    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
> > 
> > Shouldn't this be virtio_net_flush_tx(n, n->tx_vq) >= n->tx_burst?
> > If we get less than tx_burst, the ring is empty now so no need to
> > reschedule.
> > Right?
> 
> I suppose it depends on how aggressive we want to be.  If the guest put
> something on the queue between the first flush and this one, then it
> might be actively transmitting, and if we want to optimize latency, we
> anticipate that it might continue to transmit and re-schedule.  This is
> taken straight from markmc's rhel5 patch.  I wouldn't argue that it's
> wrong to not reschedule here, but it's clearly less aggressive.  Thanks,
> 
> Alex

I'm a bit concerned that we are aggressive but not consistently aggressive.
For example if the guest adds a packet before we do disable
notification, we do not reschedule bh, but if it adds a packet
after this, we do. If we get 255 packets, then another 255 packets,
we poll without rescheduling an extra bh, if we get 255*2 packets in one
go we reschedule.

I think it might cause jitter in performance where e.g. slowing
guest down a bit suddenly speeds up networking.

It might be better to be consistent: always poll at most 256 entries,
if we get all of them reschedule, if we get x < 256 we enable notification,
and poll again, if we get 256 - x entries we reschedule, if we get less
stop polling.



> > > +        virtio_queue_set_notification(n->tx_vq, 0);
> > > +        qemu_bh_schedule(n->tx_bh);
> > > +        n->tx_waiting = 1;
> > > +    }
> > > +}
> > > +
> > >  static void virtio_net_save(QEMUFile *f, void *opaque)
> > >  {
> > >      VirtIONet *n = opaque;
> > > @@ -850,8 +895,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
> > >      n->mac_table.first_multi = i;
> > >  
> > >      if (n->tx_waiting) {
> > > -        qemu_mod_timer(n->tx_timer,
> > > -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> > > +        if (n->tx_timer) {
> > > +            qemu_mod_timer(n->tx_timer,
> > > +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> > > +        } else {
> > > +            qemu_bh_schedule(n->tx_bh);
> > > +        }
> > >      }
> > >      return 0;
> > >  }
> > > @@ -939,9 +988,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> > >  
> > >      qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
> > >  
> > > -    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
> > >      n->tx_waiting = 0;
> > >      if (txtimer) {
> > > +        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
> > >          if (txtimer == 1) {
> > >              /* For convenience, 1 = "on" = predefined default, anything else
> > >               * specifies and actual timeout value */
> > > @@ -949,6 +998,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> > >          } else {
> > >              n->tx_timeout = txtimer;
> > >          }
> > > +    } else {
> > > +        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
> > >      }
> > >      n->tx_burst = txburst;
> > >      n->mergeable_rx_bufs = 0;
> > > @@ -982,8 +1033,12 @@ void virtio_net_exit(VirtIODevice *vdev)
> > >      qemu_free(n->mac_table.macs);
> > >      qemu_free(n->vlans);
> > >  
> > > -    qemu_del_timer(n->tx_timer);
> > > -    qemu_free_timer(n->tx_timer);
> > > +    if (n->tx_timer) {
> > > +        qemu_del_timer(n->tx_timer);
> > > +        qemu_free_timer(n->tx_timer);
> > > +    } else {
> > > +        qemu_bh_delete(n->tx_bh);
> > > +    }
> > >  
> > >      virtio_cleanup(&n->vdev);
> > >      qemu_del_vlan_client(&n->nic->nc);
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] virtio-net: More configurability and bh handling for tx
  2010-08-31 22:26     ` Alex Williamson
@ 2010-09-01 10:40       ` Michael S. Tsirkin
  0 siblings, 0 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2010-09-01 10:40 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Anthony Liguori, qemu-devel, kvm, jes.sorensen, kraxel

On Tue, Aug 31, 2010 at 04:26:07PM -0600, Alex Williamson wrote:
> On Tue, 2010-08-31 at 16:33 -0500, Anthony Liguori wrote:
> > On 08/31/2010 03:27 PM, Michael S. Tsirkin wrote:
> > > On Fri, Aug 27, 2010 at 04:36:59PM -0600, Alex Williamson wrote:
> > >    
> > >> Add the ability to configure the tx_timer timeout and add a bottom
> > >> half tx handler that typically shows a nice perf boost over the
> > >> time based approach.  See last patch for perf details.  Make this
> > >> the new default when the iothread is enabled.  Thanks,
> > >>
> > >> Alex
> > >>      
> > > As a further thought, maybe it would help to have a
> > > separate namespace for unsupported, developer-only parameters.
> > > Thoughts?
> > >    
> > 
> > We already have an undocumented one, just prefix with 'x-' to indicate 
> > that it's experimental.
> 
> Any objection then to x-txburst and x-txtimer then?  Easy enough to
> rename.  Thanks,
> 
> Alex

No objection. Let's put the 256 constant in a header,
with a comment explaining where it came from?

-- 
MST

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2010-09-01 10:46 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-27 22:36 [PATCH 0/5] virtio-net: More configurability and bh handling for tx Alex Williamson
2010-08-27 22:37 ` [PATCH 1/5] virtio-net: Make tx_timer timeout configurable Alex Williamson
2010-08-31 18:00   ` Chris Wright
2010-08-31 18:07     ` [Qemu-devel] " Alex Williamson
2010-08-31 19:29       ` Chris Wright
2010-08-27 22:37 ` [PATCH 2/5] virtio-net: Limit number of packets sent per TX flush Alex Williamson
2010-08-31 20:23   ` Michael S. Tsirkin
2010-08-27 22:37 ` [PATCH 3/5] virtio-net: Rename tx_timer_active to tx_waiting Alex Williamson
2010-08-27 22:37 ` [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX Alex Williamson
2010-08-31 20:14   ` Michael S. Tsirkin
2010-08-31 20:33     ` Alex Williamson
2010-09-01  9:47       ` Michael S. Tsirkin
2010-08-27 22:37 ` [PATCH 5/5] virtio-net: Switch default to new bottom half TX handler for iothread Alex Williamson
2010-08-31 20:25   ` Michael S. Tsirkin
2010-08-31 22:32     ` Alex Williamson
2010-08-31 22:46       ` Anthony Liguori
2010-09-01  6:21       ` Stefan Hajnoczi
2010-09-01  8:47       ` Michael S. Tsirkin
2010-08-31 20:20 ` [PATCH 0/5] virtio-net: More configurability and bh handling for tx Michael S. Tsirkin
2010-08-31 20:27 ` Michael S. Tsirkin
2010-08-31 21:33   ` Anthony Liguori
2010-08-31 22:26     ` Alex Williamson
2010-09-01 10:40       ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).