All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V4)
@ 2009-11-19 15:06 Adam Litke
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
                   ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:06 UTC (permalink / raw)
  To: Avi Kivity, Anthony Liguori; +Cc: qemu-devel

Avi and Anthony,
If you agree that I've addressed all outstanding issues, please consider this
patch for inclusion.  Thanks.

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..def4c56 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include "cpu-defs.h"
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef int (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..b90e1f2 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,7 @@
 #include "balloon.h"
 #include "virtio-balloon.h"
 #include "kvm.h"
+#include "monitor.h"
 
 #if defined(__linux__)
 #include <sys/mman.h>
@@ -27,9 +28,13 @@
 typedef struct VirtIOBalloon
 {
     VirtIODevice vdev;
-    VirtQueue *ivq, *dvq;
+    VirtQueue *ivq, *dvq, *svq;
     uint32_t num_pages;
     uint32_t actual;
+    uint64_t stats[VIRTIO_BALLOON_S_NR];
+    VirtQueueElement stats_vq_elem;
+    size_t stats_vq_offset;
+    uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +51,33 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+    int i;
+    for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
+}
+
+static inline void print_stat(uint64_t val, const char *label)
+{
+    if (val != -1) {
+        monitor_printf(cur_mon, ",%s=%lld", label, (unsigned long long)val);
+    }
+}
+
+static void virtio_balloon_print_stats(VirtIOBalloon *dev)
+{
+    uint32_t actual = ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+
+    monitor_printf(cur_mon, "balloon: actual=%d", (int)(actual >> 20));
+    print_stat(dev->stats[VIRTIO_BALLOON_S_SWAP_IN], "mem_swapped_in");
+    print_stat(dev->stats[VIRTIO_BALLOON_S_SWAP_OUT], "mem_swapped_out");
+    print_stat(dev->stats[VIRTIO_BALLOON_S_MAJFLT], "major_page_faults");
+    print_stat(dev->stats[VIRTIO_BALLOON_S_MINFLT], "minor_page_faults");
+    print_stat(dev->stats[VIRTIO_BALLOON_S_MEMFREE], "free_mem");
+    print_stat(dev->stats[VIRTIO_BALLOON_S_MEMTOT], "total_mem");
+    monitor_printf(cur_mon, "\n");
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +136,31 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
     }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOBalloon *s = to_virtio_balloon(vdev);
+    VirtQueueElement *elem = &s->stats_vq_elem;
+    VirtIOBalloonStat stat;
+    size_t offset = 0;
+
+    if (!virtqueue_pop(vq, elem))
+        return;
+
+    while (memcpy_from_iovector(&stat, offset, sizeof(stat), elem->out_sg,
+                                elem->out_num) == sizeof(stat)) {
+        offset += sizeof(stat);
+        if (stat.tag < VIRTIO_BALLOON_S_NR)
+            s->stats[stat.tag] = stat.val;
+    }
+    s->stats_vq_offset = offset;
+
+    if (s->stats_requested) {
+        virtio_balloon_print_stats(s);
+        monitor_resume(cur_mon);
+        s->stats_requested = 0;
+    }
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
     VirtIOBalloon *dev = to_virtio_balloon(vdev);
@@ -126,10 +183,19 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
 {
-    return 0;
+    return 1 << VIRTIO_BALLOON_F_STATS_VQ;
 }
 
-static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+static void request_stats(VirtIOBalloon *vb)
+{
+    vb->stats_requested = 1;
+    reset_stats(vb);
+    monitor_suspend(cur_mon);
+    virtqueue_push(vb->svq, &vb->stats_vq_elem, vb->stats_vq_offset);
+    virtio_notify(&vb->vdev, vb->svq);
+}
+
+static int virtio_balloon_to_target(void *opaque, ram_addr_t target)
 {
     VirtIOBalloon *dev = opaque;
 
@@ -139,9 +205,14 @@ static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
     if (target) {
         dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
         virtio_notify_config(&dev->vdev);
+    } else if (dev->vdev.features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
+        request_stats(dev);
+    } else {
+        reset_stats(dev);
+        virtio_balloon_print_stats(dev);
     }
 
-    return ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+    return 0;
 }
 
 static void virtio_balloon_save(QEMUFile *f, void *opaque)
@@ -152,6 +223,9 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque)
 
     qemu_put_be32(f, s->num_pages);
     qemu_put_be32(f, s->actual);
+    qemu_put_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement));
+    qemu_put_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t));
+    qemu_put_byte(f, s->stats_requested);
 }
 
 static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
@@ -165,6 +239,9 @@ static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
 
     s->num_pages = qemu_get_be32(f);
     s->actual = qemu_get_be32(f);
+    qemu_get_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement));
+    qemu_get_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t));
+    s->stats_requested = qemu_get_byte(f);
 
     return 0;
 }
@@ -183,6 +260,7 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
 
     s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
     s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+    s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
 
     qemu_add_balloon_handler(virtio_balloon_to_target, s);
 
diff --git a/hw/virtio-balloon.h b/hw/virtio-balloon.h
index 9a0d119..8a08153 100644
--- a/hw/virtio-balloon.h
+++ b/hw/virtio-balloon.h
@@ -25,6 +25,7 @@
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ 1       /* Memory stats virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -37,4 +38,18 @@ struct virtio_balloon_config
     uint32_t actual;
 };
 
+/* Memory Statistics */
+#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR       6
+
+typedef struct VirtIOBalloonStat {
+    uint16_t tag;
+    uint64_t val;
+} VirtIOBalloonStat;
+
 #endif
diff --git a/monitor.c b/monitor.c
index ed1ce6e..73484c8 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1583,16 +1583,14 @@ static void do_balloon(Monitor *mon, int value)
 
 static void do_info_balloon(Monitor *mon)
 {
-    ram_addr_t actual;
+    int ret;
 
-    actual = qemu_balloon_status();
     if (kvm_enabled() && !kvm_has_sync_mmu())
         monitor_printf(mon, "Using KVM without synchronous MMU, "
                        "ballooning disabled\n");
-    else if (actual == 0)
+    ret = qemu_balloon_status();
+    if (ret == -1)
         monitor_printf(mon, "Ballooning not activated in VM\n");
-    else
-        monitor_printf(mon, "balloon: actual=%d\n", (int)(actual >> 20));
 }
 
 static qemu_acl *find_acl(Monitor *mon, const char *name)
diff --git a/vl.c b/vl.c
index 710d52e..aebceab 100644
--- a/vl.c
+++ b/vl.c
@@ -337,11 +337,11 @@ void qemu_balloon(ram_addr_t target)
         qemu_balloon_event(qemu_balloon_event_opaque, target);
 }
 
-ram_addr_t qemu_balloon_status(void)
+int qemu_balloon_status(void)
 {
     if (qemu_balloon_event)
         return qemu_balloon_event(qemu_balloon_event_opaque, 0);
-    return 0;
+    return -1;
 }
 
 /***********************************************************/


-- 
Thanks,
Adam

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:06 [Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Adam Litke
@ 2009-11-19 15:19   ` Adam Litke
  2009-11-19 15:19 ` Adam Litke
  2009-11-19 15:19 ` [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Avi Kivity
  2 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:19 UTC (permalink / raw)
  To: Anthony Liguori, Rusty Russell
  Cc: qemu-devel, Avi Kivity, virtualization, linux-kernel

Rusty and Anthony,
If I've addressed all outstanding issues, please consider this patch for
inclusion.  Thanks.

Changes since V2:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat and fix endianness conversion

Changes since V1:
 - Use a virtqueue instead of the device config space

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <anthony@codemonkey.ws>
Cc: virtualization@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..ebc9d39 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -29,7 +29,7 @@
 struct virtio_balloon
 {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
 	/* Where the ballooning thread waits for config to change. */
 	wait_queue_head_t config_change;
@@ -50,6 +50,9 @@ struct virtio_balloon
 	/* The array of pfns we tell the Host about. */
 	unsigned int num_pfns;
 	u32 pfns[256];
+
+	/* Memory statistics */
+	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 	}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+				__le16 tag, __le64 val)
+{
+	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
+	vb->stats[idx].tag = cpu_to_le16(tag);
+	vb->stats[idx].val = cpu_to_le64(val);
+}
+
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+	unsigned long events[NR_VM_EVENT_ITEMS];
+	struct sysinfo i;
+	int idx = 0;
+
+	all_vm_events(events);
+	si_meminfo(&i);
+
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.
+ */
+static void stats_ack(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb;
+	unsigned int len;
+	struct scatterlist sg;
+
+	vb = vq->vq_ops->get_buf(vq, &len);
+	if (!vb)
+		return;
+
+	update_balloon_stats(vb);
+
+	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
+		BUG();
+	vq->vq_ops->kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
@@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct virtqueue *vqs[2];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-	const char *names[] = { "inflate", "deflate" };
-	int err;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
+	const char *names[] = { "inflate", "deflate", "stats" };
+	int err, nvqs;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
 	if (!vb) {
@@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vb->config_change);
 	vb->vdev = vdev;
 
-	/* We expect two virtqueues. */
-	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	/* We expect two virtqueues: inflate and deflate,
+	 * and optionally stat. */
+	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
 	if (err)
 		goto out_free_vb;
 
 	vb->inflate_vq = vqs[0];
 	vb->deflate_vq = vqs[1];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later.
+		 */
+		sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
+			BUG();
+		vb->stats_vq->vq_ops->kick(vb->stats_vq);
+	}
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	kfree(vb);
 }
 
-static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+static unsigned int features[] = {
+	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
+};
 
 static struct virtio_driver virtio_balloon = {
 	.feature_table = features,
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 09d7300..a5c09e6 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -6,6 +6,7 @@
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -17,4 +18,19 @@ struct virtio_balloon_config
 	/* Number of pages we've actually got in balloon. */
 	__le32 actual;
 };
+
+#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR       6
+
+struct virtio_balloon_stat
+{
+	__le16 tag;
+	__le64 val;
+};
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */


-- 
Thanks,
Adam


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 15:19   ` Adam Litke
  0 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:19 UTC (permalink / raw)
  To: Anthony Liguori, Rusty Russell
  Cc: linux-kernel, virtualization, qemu-devel, Avi Kivity

Rusty and Anthony,
If I've addressed all outstanding issues, please consider this patch for
inclusion.  Thanks.

Changes since V2:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat and fix endianness conversion

Changes since V1:
 - Use a virtqueue instead of the device config space

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <anthony@codemonkey.ws>
Cc: virtualization@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..ebc9d39 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -29,7 +29,7 @@
 struct virtio_balloon
 {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
 	/* Where the ballooning thread waits for config to change. */
 	wait_queue_head_t config_change;
@@ -50,6 +50,9 @@ struct virtio_balloon
 	/* The array of pfns we tell the Host about. */
 	unsigned int num_pfns;
 	u32 pfns[256];
+
+	/* Memory statistics */
+	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 	}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+				__le16 tag, __le64 val)
+{
+	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
+	vb->stats[idx].tag = cpu_to_le16(tag);
+	vb->stats[idx].val = cpu_to_le64(val);
+}
+
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+	unsigned long events[NR_VM_EVENT_ITEMS];
+	struct sysinfo i;
+	int idx = 0;
+
+	all_vm_events(events);
+	si_meminfo(&i);
+
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.
+ */
+static void stats_ack(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb;
+	unsigned int len;
+	struct scatterlist sg;
+
+	vb = vq->vq_ops->get_buf(vq, &len);
+	if (!vb)
+		return;
+
+	update_balloon_stats(vb);
+
+	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
+		BUG();
+	vq->vq_ops->kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
@@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct virtqueue *vqs[2];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-	const char *names[] = { "inflate", "deflate" };
-	int err;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
+	const char *names[] = { "inflate", "deflate", "stats" };
+	int err, nvqs;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
 	if (!vb) {
@@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vb->config_change);
 	vb->vdev = vdev;
 
-	/* We expect two virtqueues. */
-	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	/* We expect two virtqueues: inflate and deflate,
+	 * and optionally stat. */
+	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
 	if (err)
 		goto out_free_vb;
 
 	vb->inflate_vq = vqs[0];
 	vb->deflate_vq = vqs[1];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later.
+		 */
+		sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
+			BUG();
+		vb->stats_vq->vq_ops->kick(vb->stats_vq);
+	}
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	kfree(vb);
 }
 
-static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+static unsigned int features[] = {
+	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
+};
 
 static struct virtio_driver virtio_balloon = {
 	.feature_table = features,
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 09d7300..a5c09e6 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -6,6 +6,7 @@
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -17,4 +18,19 @@ struct virtio_balloon_config
 	/* Number of pages we've actually got in balloon. */
 	__le32 actual;
 };
+
+#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR       6
+
+struct virtio_balloon_stat
+{
+	__le16 tag;
+	__le64 val;
+};
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */


-- 
Thanks,
Adam

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:06 [Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Adam Litke
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
@ 2009-11-19 15:19 ` Adam Litke
  2009-11-19 15:19 ` [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Avi Kivity
  2 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:19 UTC (permalink / raw)
  To: Anthony Liguori, Rusty Russell
  Cc: linux-kernel, virtualization, qemu-devel, Avi Kivity

Rusty and Anthony,
If I've addressed all outstanding issues, please consider this patch for
inclusion.  Thanks.

Changes since V2:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat and fix endianness conversion

Changes since V1:
 - Use a virtqueue instead of the device config space

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <anthony@codemonkey.ws>
Cc: virtualization@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..ebc9d39 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -29,7 +29,7 @@
 struct virtio_balloon
 {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
 	/* Where the ballooning thread waits for config to change. */
 	wait_queue_head_t config_change;
@@ -50,6 +50,9 @@ struct virtio_balloon
 	/* The array of pfns we tell the Host about. */
 	unsigned int num_pfns;
 	u32 pfns[256];
+
+	/* Memory statistics */
+	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 	}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+				__le16 tag, __le64 val)
+{
+	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
+	vb->stats[idx].tag = cpu_to_le16(tag);
+	vb->stats[idx].val = cpu_to_le64(val);
+}
+
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+	unsigned long events[NR_VM_EVENT_ITEMS];
+	struct sysinfo i;
+	int idx = 0;
+
+	all_vm_events(events);
+	si_meminfo(&i);
+
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.
+ */
+static void stats_ack(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb;
+	unsigned int len;
+	struct scatterlist sg;
+
+	vb = vq->vq_ops->get_buf(vq, &len);
+	if (!vb)
+		return;
+
+	update_balloon_stats(vb);
+
+	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
+		BUG();
+	vq->vq_ops->kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
@@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct virtqueue *vqs[2];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-	const char *names[] = { "inflate", "deflate" };
-	int err;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
+	const char *names[] = { "inflate", "deflate", "stats" };
+	int err, nvqs;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
 	if (!vb) {
@@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vb->config_change);
 	vb->vdev = vdev;
 
-	/* We expect two virtqueues. */
-	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	/* We expect two virtqueues: inflate and deflate,
+	 * and optionally stat. */
+	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
 	if (err)
 		goto out_free_vb;
 
 	vb->inflate_vq = vqs[0];
 	vb->deflate_vq = vqs[1];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later.
+		 */
+		sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
+			BUG();
+		vb->stats_vq->vq_ops->kick(vb->stats_vq);
+	}
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	kfree(vb);
 }
 
-static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+static unsigned int features[] = {
+	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
+};
 
 static struct virtio_driver virtio_balloon = {
 	.feature_table = features,
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 09d7300..a5c09e6 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -6,6 +6,7 @@
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -17,4 +18,19 @@ struct virtio_balloon_config
 	/* Number of pages we've actually got in balloon. */
 	__le32 actual;
 };
+
+#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR       6
+
+struct virtio_balloon_stat
+{
+	__le16 tag;
+	__le64 val;
+};
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */


-- 
Thanks,
Adam

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4)
  2009-11-19 15:06 [Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Adam Litke
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
  2009-11-19 15:19 ` Adam Litke
@ 2009-11-19 15:19 ` Avi Kivity
  2009-11-19 15:56   ` Adam Litke
  2 siblings, 1 reply; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 15:19 UTC (permalink / raw)
  To: Adam Litke; +Cc: Anthony Liguori, qemu-devel

On 11/19/2009 05:06 PM, Adam Litke wrote:
> Avi and Anthony,
> If you agree that I've addressed all outstanding issues, please consider this
> patch for inclusion.  Thanks.
>
>    

I'd like to see this (and all other virtio-ABI-modifying patches) first 
go into the virtio pci spec, then propagated to guest and host.

> Changes since V3:
>   - Increase stat field size to 64 bits
>   - Report all sizes in kb (not pages)
>    

Why not bytes?  It's the most natural unit.

> -static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
> +static void request_stats(VirtIOBalloon *vb)
> +{
> +    vb->stats_requested = 1;
> +    reset_stats(vb);
> +    monitor_suspend(cur_mon);
>    

You allow the guest to kill a monitor here.

> +    virtqueue_push(vb->svq,&vb->stats_vq_elem, vb->stats_vq_offset);
> +    virtio_notify(&vb->vdev, vb->svq);
> +}
> +
>    



> +typedef struct VirtIOBalloonStat {
> +    uint16_t tag;
> +    uint64_t val;
> +} VirtIOBalloonStat;
>    

Alignment here depends on word size.  This needs to be padded to be 
aligned the same way on 32 and 64 bit hosts and guests.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
  (?)
@ 2009-11-19 15:22     ` Avi Kivity
  -1 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 15:22 UTC (permalink / raw)
  To: Adam Litke
  Cc: Anthony Liguori, Rusty Russell, qemu-devel, virtualization, linux-kernel

On 11/19/2009 05:19 PM, Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
>
> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;
> +};
> +
>    

You're not doing endian conversion in the host?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 15:22     ` Avi Kivity
  0 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 15:22 UTC (permalink / raw)
  To: Adam Litke
  Cc: linux-kernel, Anthony Liguori, Rusty Russell, qemu-devel, virtualization

On 11/19/2009 05:19 PM, Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
>
> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;
> +};
> +
>    

You're not doing endian conversion in the host?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 15:22     ` Avi Kivity
  0 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 15:22 UTC (permalink / raw)
  To: Adam Litke; +Cc: linux-kernel, Anthony Liguori, qemu-devel, virtualization

On 11/19/2009 05:19 PM, Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
>
> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;
> +};
> +
>    

You're not doing endian conversion in the host?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4)
  2009-11-19 15:19 ` [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Avi Kivity
@ 2009-11-19 15:56   ` Adam Litke
  2009-11-19 16:02     ` Avi Kivity
  2009-11-20  1:24     ` Jamie Lokier
  0 siblings, 2 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:56 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, qemu-devel

On Thu, 2009-11-19 at 17:19 +0200, Avi Kivity wrote:
> On 11/19/2009 05:06 PM, Adam Litke wrote:
> > Avi and Anthony,
> > If you agree that I've addressed all outstanding issues, please consider this
> > patch for inclusion.  Thanks.
> >
> >    
> 
> I'd like to see this (and all other virtio-ABI-modifying patches) first 
> go into the virtio pci spec, then propagated to guest and host.

Where can I find information on the procedure for updating the virtio
pci spec?

> > Changes since V3:
> >   - Increase stat field size to 64 bits
> >   - Report all sizes in kb (not pages)
> >    
> 
> Why not bytes?  It's the most natural unit.

The precision for most of these stats (except major and minor faults)
is 4kb (at least for Linux guests on the platforms I can think of).  I
chose kb units to avoid wasting bits.  I suppose the 64bit fields are
"Bigger than we could ever possibly need (tm)".  Others have suggested
bytes as well so I will be happy to make the change.

> > -static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
> > +static void request_stats(VirtIOBalloon *vb)
> > +{
> > +    vb->stats_requested = 1;
> > +    reset_stats(vb);
> > +    monitor_suspend(cur_mon);
> >    
> 
> You allow the guest to kill a monitor here.

Is there a better way to handle asynchronous communication with the
guest?

> > +    virtqueue_push(vb->svq,&vb->stats_vq_elem, vb->stats_vq_offset);
> > +    virtio_notify(&vb->vdev, vb->svq);
> > +}
> > +
> >    
> 
> 
> 
> > +typedef struct VirtIOBalloonStat {
> > +    uint16_t tag;
> > +    uint64_t val;
> > +} VirtIOBalloonStat;
> >    
> 
> Alignment here depends on word size.  This needs to be padded to be 
> aligned the same way on 32 and 64 bit hosts and guests.

ok.  I assume that means the structure size must be a multiple of 64
bits in size?

-- 
Thanks,
Adam

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:22     ` [Qemu-devel] " Avi Kivity
@ 2009-11-19 15:58       ` Adam Litke
  -1 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Anthony Liguori, Rusty Russell, qemu-devel, virtualization, linux-kernel

On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
> On 11/19/2009 05:19 PM, Adam Litke wrote:
> > Rusty and Anthony,
> > If I've addressed all outstanding issues, please consider this patch for
> > inclusion.  Thanks.
> >
> > +struct virtio_balloon_stat
> > +{
> > +	__le16 tag;
> > +	__le64 val;
> > +};
> > +
> >    
> 
> You're not doing endian conversion in the host?

No.  I was following by example.  For the virtio_balloon, the existing
code is careful so that the guest always writes data in little endian.

-- 
Thanks,
Adam


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 15:58       ` Adam Litke
  0 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: linux-kernel, Anthony Liguori, Rusty Russell, qemu-devel, virtualization

On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
> On 11/19/2009 05:19 PM, Adam Litke wrote:
> > Rusty and Anthony,
> > If I've addressed all outstanding issues, please consider this patch for
> > inclusion.  Thanks.
> >
> > +struct virtio_balloon_stat
> > +{
> > +	__le16 tag;
> > +	__le64 val;
> > +};
> > +
> >    
> 
> You're not doing endian conversion in the host?

No.  I was following by example.  For the virtio_balloon, the existing
code is careful so that the guest always writes data in little endian.

-- 
Thanks,
Adam

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:22     ` [Qemu-devel] " Avi Kivity
                       ` (2 preceding siblings ...)
  (?)
@ 2009-11-19 15:58     ` Adam Litke
  -1 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 15:58 UTC (permalink / raw)
  To: Avi Kivity; +Cc: linux-kernel, Anthony Liguori, qemu-devel, virtualization

On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
> On 11/19/2009 05:19 PM, Adam Litke wrote:
> > Rusty and Anthony,
> > If I've addressed all outstanding issues, please consider this patch for
> > inclusion.  Thanks.
> >
> > +struct virtio_balloon_stat
> > +{
> > +	__le16 tag;
> > +	__le64 val;
> > +};
> > +
> >    
> 
> You're not doing endian conversion in the host?

No.  I was following by example.  For the virtio_balloon, the existing
code is careful so that the guest always writes data in little endian.

-- 
Thanks,
Adam

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4)
  2009-11-19 15:56   ` Adam Litke
@ 2009-11-19 16:02     ` Avi Kivity
  2009-11-23 18:47       ` Anthony Liguori
  2009-11-20  1:24     ` Jamie Lokier
  1 sibling, 1 reply; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 16:02 UTC (permalink / raw)
  To: Adam Litke; +Cc: Anthony Liguori, qemu-devel

On 11/19/2009 05:56 PM, Adam Litke wrote:
> On Thu, 2009-11-19 at 17:19 +0200, Avi Kivity wrote:
>    
>> On 11/19/2009 05:06 PM, Adam Litke wrote:
>>      
>>> Avi and Anthony,
>>> If you agree that I've addressed all outstanding issues, please consider this
>>> patch for inclusion.  Thanks.
>>>
>>>
>>>        
>> I'd like to see this (and all other virtio-ABI-modifying patches) first
>> go into the virtio pci spec, then propagated to guest and host.
>>      
> Where can I find information on the procedure for updating the virtio
> pci spec?
>
>    

Send a patch to Rusty (same copy list).

http://ozlabs.org/~rusty/virtio-spec/

>>> Changes since V3:
>>>    - Increase stat field size to 64 bits
>>>    - Report all sizes in kb (not pages)
>>>
>>>        
>> Why not bytes?  It's the most natural unit.
>>      
> The precision for most of these stats (except major and minor faults)
> is 4kb (at least for Linux guests on the platforms I can think of).  I
> chose kb units to avoid wasting bits.  I suppose the 64bit fields are
> "Bigger than we could ever possibly need (tm)".  Others have suggested
> bytes as well so I will be happy to make the change.
>    

It's my engineering backgrounds.  I'd actually prefer them in a kg/m/s 
combo but it doesn't really work out.

>>> -static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
>>> +static void request_stats(VirtIOBalloon *vb)
>>> +{
>>> +    vb->stats_requested = 1;
>>> +    reset_stats(vb);
>>> +    monitor_suspend(cur_mon);
>>>
>>>        
>> You allow the guest to kill a monitor here.
>>      
> Is there a better way to handle asynchronous communication with the
> guest?
>    

With the current monitor, the only option I can think of it a command to 
request stats and a command to report stats (with a version number so we 
can see if it actually worked).

The new monitor will support async command completion but even then I 
don't think we should allow a guest to stop command execution 
indefinitely (it would tie up resources at the client).

>>> +typedef struct VirtIOBalloonStat {
>>> +    uint16_t tag;
>>> +    uint64_t val;
>>> +} VirtIOBalloonStat;
>>>
>>>        
>> Alignment here depends on word size.  This needs to be padded to be
>> aligned the same way on 32 and 64 bit hosts and guests.
>>      
> ok.  I assume that means the structure size must be a multiple of 64
> bits in size

Yes, and all values must be naturally aligned.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:58       ` [Qemu-devel] " Adam Litke
@ 2009-11-19 16:13         ` Avi Kivity
  -1 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 16:13 UTC (permalink / raw)
  To: Adam Litke
  Cc: Anthony Liguori, Rusty Russell, qemu-devel, virtualization, linux-kernel

On 11/19/2009 05:58 PM, Adam Litke wrote:
> On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
>    
>> On 11/19/2009 05:19 PM, Adam Litke wrote:
>>      
>>> Rusty and Anthony,
>>> If I've addressed all outstanding issues, please consider this patch for
>>> inclusion.  Thanks.
>>>
>>> +struct virtio_balloon_stat
>>> +{
>>> +	__le16 tag;
>>> +	__le64 val;
>>> +};
>>> +
>>>
>>>        
>> You're not doing endian conversion in the host?
>>      
> No.  I was following by example.  For the virtio_balloon, the existing
> code is careful so that the guest always writes data in little endian.
>    

I don't follow.  If the guest is careful to write little-endian, surely 
the host must be equally careful to read little-endian?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 16:13         ` Avi Kivity
  0 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 16:13 UTC (permalink / raw)
  To: Adam Litke
  Cc: linux-kernel, Anthony Liguori, Rusty Russell, qemu-devel, virtualization

On 11/19/2009 05:58 PM, Adam Litke wrote:
> On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
>    
>> On 11/19/2009 05:19 PM, Adam Litke wrote:
>>      
>>> Rusty and Anthony,
>>> If I've addressed all outstanding issues, please consider this patch for
>>> inclusion.  Thanks.
>>>
>>> +struct virtio_balloon_stat
>>> +{
>>> +	__le16 tag;
>>> +	__le64 val;
>>> +};
>>> +
>>>
>>>        
>> You're not doing endian conversion in the host?
>>      
> No.  I was following by example.  For the virtio_balloon, the existing
> code is careful so that the guest always writes data in little endian.
>    

I don't follow.  If the guest is careful to write little-endian, surely 
the host must be equally careful to read little-endian?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:58       ` [Qemu-devel] " Adam Litke
  (?)
@ 2009-11-19 16:13       ` Avi Kivity
  -1 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-11-19 16:13 UTC (permalink / raw)
  To: Adam Litke; +Cc: linux-kernel, Anthony Liguori, qemu-devel, virtualization

On 11/19/2009 05:58 PM, Adam Litke wrote:
> On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
>    
>> On 11/19/2009 05:19 PM, Adam Litke wrote:
>>      
>>> Rusty and Anthony,
>>> If I've addressed all outstanding issues, please consider this patch for
>>> inclusion.  Thanks.
>>>
>>> +struct virtio_balloon_stat
>>> +{
>>> +	__le16 tag;
>>> +	__le64 val;
>>> +};
>>> +
>>>
>>>        
>> You're not doing endian conversion in the host?
>>      
> No.  I was following by example.  For the virtio_balloon, the existing
> code is careful so that the guest always writes data in little endian.
>    

I don't follow.  If the guest is careful to write little-endian, surely 
the host must be equally careful to read little-endian?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 16:13         ` [Qemu-devel] " Avi Kivity
@ 2009-11-19 16:29           ` Adam Litke
  -1 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 16:29 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Anthony Liguori, Rusty Russell, qemu-devel, virtualization, linux-kernel

On Thu, 2009-11-19 at 18:13 +0200, Avi Kivity wrote:
> On 11/19/2009 05:58 PM, Adam Litke wrote:
> > On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
> >    
> >> On 11/19/2009 05:19 PM, Adam Litke wrote:
> >>      
> >>> Rusty and Anthony,
> >>> If I've addressed all outstanding issues, please consider this patch for
> >>> inclusion.  Thanks.
> >>>
> >>> +struct virtio_balloon_stat
> >>> +{
> >>> +	__le16 tag;
> >>> +	__le64 val;
> >>> +};
> >>> +
> >>>
> >>>        
> >> You're not doing endian conversion in the host?
> >>      
> > No.  I was following by example.  For the virtio_balloon, the existing
> > code is careful so that the guest always writes data in little endian.
> >    
> 
> I don't follow.  If the guest is careful to write little-endian, surely 
> the host must be equally careful to read little-endian?

That is true and, by my reading of the existing qemu virtio-balloon
device code, isn't virtio_balloon_set_config() on a big endian host
already broken?


-- 
Thanks,
Adam


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 16:29           ` Adam Litke
  0 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 16:29 UTC (permalink / raw)
  To: Avi Kivity
  Cc: linux-kernel, Anthony Liguori, Rusty Russell, qemu-devel, virtualization

On Thu, 2009-11-19 at 18:13 +0200, Avi Kivity wrote:
> On 11/19/2009 05:58 PM, Adam Litke wrote:
> > On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
> >    
> >> On 11/19/2009 05:19 PM, Adam Litke wrote:
> >>      
> >>> Rusty and Anthony,
> >>> If I've addressed all outstanding issues, please consider this patch for
> >>> inclusion.  Thanks.
> >>>
> >>> +struct virtio_balloon_stat
> >>> +{
> >>> +	__le16 tag;
> >>> +	__le64 val;
> >>> +};
> >>> +
> >>>
> >>>        
> >> You're not doing endian conversion in the host?
> >>      
> > No.  I was following by example.  For the virtio_balloon, the existing
> > code is careful so that the guest always writes data in little endian.
> >    
> 
> I don't follow.  If the guest is careful to write little-endian, surely 
> the host must be equally careful to read little-endian?

That is true and, by my reading of the existing qemu virtio-balloon
device code, isn't virtio_balloon_set_config() on a big endian host
already broken?


-- 
Thanks,
Adam

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 16:13         ` [Qemu-devel] " Avi Kivity
  (?)
  (?)
@ 2009-11-19 16:29         ` Adam Litke
  -1 siblings, 0 replies; 33+ messages in thread
From: Adam Litke @ 2009-11-19 16:29 UTC (permalink / raw)
  To: Avi Kivity; +Cc: linux-kernel, Anthony Liguori, qemu-devel, virtualization

On Thu, 2009-11-19 at 18:13 +0200, Avi Kivity wrote:
> On 11/19/2009 05:58 PM, Adam Litke wrote:
> > On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
> >    
> >> On 11/19/2009 05:19 PM, Adam Litke wrote:
> >>      
> >>> Rusty and Anthony,
> >>> If I've addressed all outstanding issues, please consider this patch for
> >>> inclusion.  Thanks.
> >>>
> >>> +struct virtio_balloon_stat
> >>> +{
> >>> +	__le16 tag;
> >>> +	__le64 val;
> >>> +};
> >>> +
> >>>
> >>>        
> >> You're not doing endian conversion in the host?
> >>      
> > No.  I was following by example.  For the virtio_balloon, the existing
> > code is careful so that the guest always writes data in little endian.
> >    
> 
> I don't follow.  If the guest is careful to write little-endian, surely 
> the host must be equally careful to read little-endian?

That is true and, by my reading of the existing qemu virtio-balloon
device code, isn't virtio_balloon_set_config() on a big endian host
already broken?


-- 
Thanks,
Adam

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
@ 2009-11-19 23:53     ` Rusty Russell
  -1 siblings, 0 replies; 33+ messages in thread
From: Rusty Russell @ 2009-11-19 23:53 UTC (permalink / raw)
  To: Adam Litke
  Cc: Anthony Liguori, qemu-devel, Avi Kivity, virtualization, linux-kernel

On Fri, 20 Nov 2009 01:49:05 am Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
> 
> Changes since V2:
>  - Increase stat field size to 64 bits
>  - Report all sizes in kb (not pages)

Hi Adam,

   Looks like we're very close.  A few minor things:

Why k?  Why not just do the simplest possible thing, and just report all
stats as straight numbers, now we have 64 bits.

>  - Drop anon_pages stat and fix endianness conversion

Please drop endianness conversion.

> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;
> +};

Let's not introduce padding as well; __attribute__((packed)) here please.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-19 23:53     ` Rusty Russell
  0 siblings, 0 replies; 33+ messages in thread
From: Rusty Russell @ 2009-11-19 23:53 UTC (permalink / raw)
  To: Adam Litke
  Cc: linux-kernel, Anthony Liguori, virtualization, qemu-devel, Avi Kivity

On Fri, 20 Nov 2009 01:49:05 am Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
> 
> Changes since V2:
>  - Increase stat field size to 64 bits
>  - Report all sizes in kb (not pages)

Hi Adam,

   Looks like we're very close.  A few minor things:

Why k?  Why not just do the simplest possible thing, and just report all
stats as straight numbers, now we have 64 bits.

>  - Drop anon_pages stat and fix endianness conversion

Please drop endianness conversion.

> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;
> +};

Let's not introduce padding as well; __attribute__((packed)) here please.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
  (?)
  (?)
@ 2009-11-19 23:53   ` Rusty Russell
  -1 siblings, 0 replies; 33+ messages in thread
From: Rusty Russell @ 2009-11-19 23:53 UTC (permalink / raw)
  To: Adam Litke
  Cc: linux-kernel, Anthony Liguori, virtualization, qemu-devel, Avi Kivity

On Fri, 20 Nov 2009 01:49:05 am Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
> 
> Changes since V2:
>  - Increase stat field size to 64 bits
>  - Report all sizes in kb (not pages)

Hi Adam,

   Looks like we're very close.  A few minor things:

Why k?  Why not just do the simplest possible thing, and just report all
stats as straight numbers, now we have 64 bits.

>  - Drop anon_pages stat and fix endianness conversion

Please drop endianness conversion.

> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;
> +};

Let's not introduce padding as well; __attribute__((packed)) here please.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4)
  2009-11-19 15:56   ` Adam Litke
  2009-11-19 16:02     ` Avi Kivity
@ 2009-11-20  1:24     ` Jamie Lokier
  1 sibling, 0 replies; 33+ messages in thread
From: Jamie Lokier @ 2009-11-20  1:24 UTC (permalink / raw)
  To: Adam Litke; +Cc: Anthony Liguori, Avi Kivity, qemu-devel

Adam Litke wrote:
> The precision for most of these stats (except major and minor faults)
> is 4kb (at least for Linux guests on the platforms I can think of).

Sparc and Alpha have 8kb pages (minimum).

The stats could be counted in 4kb pages, but what's the point in that?

-- Jamie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
@ 2009-11-23  9:44     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 33+ messages in thread
From: Michael S. Tsirkin @ 2009-11-23  9:44 UTC (permalink / raw)
  To: Adam Litke
  Cc: Anthony Liguori, Rusty Russell, linux-kernel, virtualization,
	qemu-devel, Avi Kivity

On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
> 
> Changes since V2:
>  - Increase stat field size to 64 bits
>  - Report all sizes in kb (not pages)
>  - Drop anon_pages stat and fix endianness conversion
> 
> Changes since V1:
>  - Use a virtqueue instead of the device config space
> 
> When using ballooning to manage overcommitted memory on a host, a system for
> guests to communicate their memory usage to the host can provide information
> that will minimize the impact of ballooning on the guests.  The current method
> employs a daemon running in each guest that communicates memory statistics to a
> host daemon at a specified time interval.  The host daemon aggregates this
> information and inflates and/or deflates balloons according to the level of
> host memory pressure.  This approach is effective but overly complex since a
> daemon must be installed inside each guest and coordinated to communicate with
> the host.  A simpler approach is to collect memory statistics in the virtio
> balloon driver and communicate them directly to the hypervisor.
> 
> This patch enables the guest-side support by adding stats collection and
> reporting to the virtio balloon driver.
> 
> Signed-off-by: Adam Litke <agl@us.ibm.com>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Anthony Liguori <anthony@codemonkey.ws>
> Cc: virtualization@lists.linux-foundation.org
> Cc: linux-kernel@vger.kernel.org

that's pretty clean. some comments: I wrote them while off-line, and now
that I was going to send it out, I see that there's some overlap with
what Rusty wrote.  Still, here it is:

> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 200c22f..ebc9d39 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -29,7 +29,7 @@
>  struct virtio_balloon
>  {
>  	struct virtio_device *vdev;
> -	struct virtqueue *inflate_vq, *deflate_vq;
> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>  
>  	/* Where the ballooning thread waits for config to change. */
>  	wait_queue_head_t config_change;
> @@ -50,6 +50,9 @@ struct virtio_balloon
>  	/* The array of pfns we tell the Host about. */
>  	unsigned int num_pfns;
>  	u32 pfns[256];
> +
> +	/* Memory statistics */
> +	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>  };
>  
>  static struct virtio_device_id id_table[] = {
> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
>  	}
>  }
>  
> +static inline void update_stat(struct virtio_balloon *vb, int idx,
> +				__le16 tag, __le64 val)
> +{
> +	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
> +	vb->stats[idx].tag = cpu_to_le16(tag);
> +	vb->stats[idx].val = cpu_to_le64(val);

you should do le16_to_cpu etc on __le values.
Try running this patch through sparce checker.

> +}
> +
> +#define K(x) ((x) << (PAGE_SHIFT - 10))

can't this overflow?
also, won't it be simpler to just report memory is
in bytes, then just x * PAGE_SIZE instead of hairy constants?

> +static void update_balloon_stats(struct virtio_balloon *vb)
> +{
> +	unsigned long events[NR_VM_EVENT_ITEMS];

that's about 1/2K worth of stack space on a 64 bit machine.
better keep it as part of struct virtio_balloon.

> +	struct sysinfo i;
> +	int idx = 0;
> +
> +	all_vm_events(events);
> +	si_meminfo(&i);
> +
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
> +}
> +
> +/*
> + * While most virtqueues communicate guest-initiated requests to the hypervisor,
> + * the stats queue operates in reverse.  The driver initializes the virtqueue
> + * with a single buffer.  From that point forward, all conversations consist of
> + * a hypervisor request (a call to this function) which directs us to refill
> + * the virtqueue with a fresh stats buffer.
> + */
> +static void stats_ack(struct virtqueue *vq)
> +{
> +	struct virtio_balloon *vb;
> +	unsigned int len;
> +	struct scatterlist sg;
> +
> +	vb = vq->vq_ops->get_buf(vq, &len);
> +	if (!vb)
> +		return;
> +
> +	update_balloon_stats(vb);
> +
> +	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
> +	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
> +		BUG();
> +	vq->vq_ops->kick(vq);
> +}
> +
>  static void virtballoon_changed(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb = vdev->priv;
> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>  static int virtballoon_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb;
> -	struct virtqueue *vqs[2];
> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
> -	const char *names[] = { "inflate", "deflate" };
> -	int err;
> +	struct virtqueue *vqs[3];
> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
> +	const char *names[] = { "inflate", "deflate", "stats" };
> +	int err, nvqs;
>  
>  	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>  	if (!vb) {
> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	init_waitqueue_head(&vb->config_change);
>  	vb->vdev = vdev;
>  
> -	/* We expect two virtqueues. */
> -	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
> +	/* We expect two virtqueues: inflate and deflate,
> +	 * and optionally stat. */
> +	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> +	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>  	if (err)
>  		goto out_free_vb;
>  
>  	vb->inflate_vq = vqs[0];
>  	vb->deflate_vq = vqs[1];
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> +		struct scatterlist sg;
> +		vb->stats_vq = vqs[2];
> +
> +		/*
> +		 * Prime this virtqueue with one buffer so the hypervisor can
> +		 * use it to signal us later.
> +		 */
> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats));

should be "sizeof vb->stats"

> +		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
> +			BUG();
> +		vb->stats_vq->vq_ops->kick(vb->stats_vq);
> +	}
>  
>  	vb->thread = kthread_run(balloon, vb, "vballoon");
>  	if (IS_ERR(vb->thread)) {
> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  	kfree(vb);
>  }
>  
> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
> +static unsigned int features[] = {
> +	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,

one per line

> +};
>  
>  static struct virtio_driver virtio_balloon = {
>  	.feature_table = features,
> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
> index 09d7300..a5c09e6 100644
> --- a/include/linux/virtio_balloon.h
> +++ b/include/linux/virtio_balloon.h
> @@ -6,6 +6,7 @@
>  
>  /* The feature bitmap for virtio balloon */
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */

align with a tab
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>  	/* Number of pages we've actually got in balloon. */
>  	__le32 actual;
>  };
> +
> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */

this seems to imply the total amount of memory in bytes?

> +#define VIRTIO_BALLOON_S_NR       6
> +
> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;

This might create padding issues e.g. when guest
is 32 bit and host is 64 bit. Better add padding
manually, or pack the structure (but packed structures
often cause gcc to generate terrible code).


> +};
> +
>  #endif /* _LINUX_VIRTIO_BALLOON_H */
> 
> 
> -- 
> Thanks,
> Adam
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-23  9:44     ` Michael S. Tsirkin
  0 siblings, 0 replies; 33+ messages in thread
From: Michael S. Tsirkin @ 2009-11-23  9:44 UTC (permalink / raw)
  To: Adam Litke
  Cc: Anthony Liguori, Rusty Russell, qemu-devel, linux-kernel,
	Avi Kivity, virtualization

On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
> 
> Changes since V2:
>  - Increase stat field size to 64 bits
>  - Report all sizes in kb (not pages)
>  - Drop anon_pages stat and fix endianness conversion
> 
> Changes since V1:
>  - Use a virtqueue instead of the device config space
> 
> When using ballooning to manage overcommitted memory on a host, a system for
> guests to communicate their memory usage to the host can provide information
> that will minimize the impact of ballooning on the guests.  The current method
> employs a daemon running in each guest that communicates memory statistics to a
> host daemon at a specified time interval.  The host daemon aggregates this
> information and inflates and/or deflates balloons according to the level of
> host memory pressure.  This approach is effective but overly complex since a
> daemon must be installed inside each guest and coordinated to communicate with
> the host.  A simpler approach is to collect memory statistics in the virtio
> balloon driver and communicate them directly to the hypervisor.
> 
> This patch enables the guest-side support by adding stats collection and
> reporting to the virtio balloon driver.
> 
> Signed-off-by: Adam Litke <agl@us.ibm.com>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Anthony Liguori <anthony@codemonkey.ws>
> Cc: virtualization@lists.linux-foundation.org
> Cc: linux-kernel@vger.kernel.org

that's pretty clean. some comments: I wrote them while off-line, and now
that I was going to send it out, I see that there's some overlap with
what Rusty wrote.  Still, here it is:

> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 200c22f..ebc9d39 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -29,7 +29,7 @@
>  struct virtio_balloon
>  {
>  	struct virtio_device *vdev;
> -	struct virtqueue *inflate_vq, *deflate_vq;
> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>  
>  	/* Where the ballooning thread waits for config to change. */
>  	wait_queue_head_t config_change;
> @@ -50,6 +50,9 @@ struct virtio_balloon
>  	/* The array of pfns we tell the Host about. */
>  	unsigned int num_pfns;
>  	u32 pfns[256];
> +
> +	/* Memory statistics */
> +	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>  };
>  
>  static struct virtio_device_id id_table[] = {
> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
>  	}
>  }
>  
> +static inline void update_stat(struct virtio_balloon *vb, int idx,
> +				__le16 tag, __le64 val)
> +{
> +	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
> +	vb->stats[idx].tag = cpu_to_le16(tag);
> +	vb->stats[idx].val = cpu_to_le64(val);

you should do le16_to_cpu etc on __le values.
Try running this patch through sparce checker.

> +}
> +
> +#define K(x) ((x) << (PAGE_SHIFT - 10))

can't this overflow?
also, won't it be simpler to just report memory is
in bytes, then just x * PAGE_SIZE instead of hairy constants?

> +static void update_balloon_stats(struct virtio_balloon *vb)
> +{
> +	unsigned long events[NR_VM_EVENT_ITEMS];

that's about 1/2K worth of stack space on a 64 bit machine.
better keep it as part of struct virtio_balloon.

> +	struct sysinfo i;
> +	int idx = 0;
> +
> +	all_vm_events(events);
> +	si_meminfo(&i);
> +
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
> +}
> +
> +/*
> + * While most virtqueues communicate guest-initiated requests to the hypervisor,
> + * the stats queue operates in reverse.  The driver initializes the virtqueue
> + * with a single buffer.  From that point forward, all conversations consist of
> + * a hypervisor request (a call to this function) which directs us to refill
> + * the virtqueue with a fresh stats buffer.
> + */
> +static void stats_ack(struct virtqueue *vq)
> +{
> +	struct virtio_balloon *vb;
> +	unsigned int len;
> +	struct scatterlist sg;
> +
> +	vb = vq->vq_ops->get_buf(vq, &len);
> +	if (!vb)
> +		return;
> +
> +	update_balloon_stats(vb);
> +
> +	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
> +	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
> +		BUG();
> +	vq->vq_ops->kick(vq);
> +}
> +
>  static void virtballoon_changed(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb = vdev->priv;
> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>  static int virtballoon_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb;
> -	struct virtqueue *vqs[2];
> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
> -	const char *names[] = { "inflate", "deflate" };
> -	int err;
> +	struct virtqueue *vqs[3];
> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
> +	const char *names[] = { "inflate", "deflate", "stats" };
> +	int err, nvqs;
>  
>  	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>  	if (!vb) {
> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	init_waitqueue_head(&vb->config_change);
>  	vb->vdev = vdev;
>  
> -	/* We expect two virtqueues. */
> -	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
> +	/* We expect two virtqueues: inflate and deflate,
> +	 * and optionally stat. */
> +	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> +	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>  	if (err)
>  		goto out_free_vb;
>  
>  	vb->inflate_vq = vqs[0];
>  	vb->deflate_vq = vqs[1];
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> +		struct scatterlist sg;
> +		vb->stats_vq = vqs[2];
> +
> +		/*
> +		 * Prime this virtqueue with one buffer so the hypervisor can
> +		 * use it to signal us later.
> +		 */
> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats));

should be "sizeof vb->stats"

> +		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
> +			BUG();
> +		vb->stats_vq->vq_ops->kick(vb->stats_vq);
> +	}
>  
>  	vb->thread = kthread_run(balloon, vb, "vballoon");
>  	if (IS_ERR(vb->thread)) {
> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  	kfree(vb);
>  }
>  
> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
> +static unsigned int features[] = {
> +	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,

one per line

> +};
>  
>  static struct virtio_driver virtio_balloon = {
>  	.feature_table = features,
> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
> index 09d7300..a5c09e6 100644
> --- a/include/linux/virtio_balloon.h
> +++ b/include/linux/virtio_balloon.h
> @@ -6,6 +6,7 @@
>  
>  /* The feature bitmap for virtio balloon */
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */

align with a tab
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>  	/* Number of pages we've actually got in balloon. */
>  	__le32 actual;
>  };
> +
> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */

this seems to imply the total amount of memory in bytes?

> +#define VIRTIO_BALLOON_S_NR       6
> +
> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;

This might create padding issues e.g. when guest
is 32 bit and host is 64 bit. Better add padding
manually, or pack the structure (but packed structures
often cause gcc to generate terrible code).


> +};
> +
>  #endif /* _LINUX_VIRTIO_BALLOON_H */
> 
> 
> -- 
> Thanks,
> Adam
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
                     ` (3 preceding siblings ...)
  (?)
@ 2009-11-23  9:44   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 33+ messages in thread
From: Michael S. Tsirkin @ 2009-11-23  9:44 UTC (permalink / raw)
  To: Adam Litke
  Cc: Anthony Liguori, qemu-devel, linux-kernel, Avi Kivity, virtualization

On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
> Rusty and Anthony,
> If I've addressed all outstanding issues, please consider this patch for
> inclusion.  Thanks.
> 
> Changes since V2:
>  - Increase stat field size to 64 bits
>  - Report all sizes in kb (not pages)
>  - Drop anon_pages stat and fix endianness conversion
> 
> Changes since V1:
>  - Use a virtqueue instead of the device config space
> 
> When using ballooning to manage overcommitted memory on a host, a system for
> guests to communicate their memory usage to the host can provide information
> that will minimize the impact of ballooning on the guests.  The current method
> employs a daemon running in each guest that communicates memory statistics to a
> host daemon at a specified time interval.  The host daemon aggregates this
> information and inflates and/or deflates balloons according to the level of
> host memory pressure.  This approach is effective but overly complex since a
> daemon must be installed inside each guest and coordinated to communicate with
> the host.  A simpler approach is to collect memory statistics in the virtio
> balloon driver and communicate them directly to the hypervisor.
> 
> This patch enables the guest-side support by adding stats collection and
> reporting to the virtio balloon driver.
> 
> Signed-off-by: Adam Litke <agl@us.ibm.com>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Anthony Liguori <anthony@codemonkey.ws>
> Cc: virtualization@lists.linux-foundation.org
> Cc: linux-kernel@vger.kernel.org

that's pretty clean. some comments: I wrote them while off-line, and now
that I was going to send it out, I see that there's some overlap with
what Rusty wrote.  Still, here it is:

> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 200c22f..ebc9d39 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -29,7 +29,7 @@
>  struct virtio_balloon
>  {
>  	struct virtio_device *vdev;
> -	struct virtqueue *inflate_vq, *deflate_vq;
> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>  
>  	/* Where the ballooning thread waits for config to change. */
>  	wait_queue_head_t config_change;
> @@ -50,6 +50,9 @@ struct virtio_balloon
>  	/* The array of pfns we tell the Host about. */
>  	unsigned int num_pfns;
>  	u32 pfns[256];
> +
> +	/* Memory statistics */
> +	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>  };
>  
>  static struct virtio_device_id id_table[] = {
> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
>  	}
>  }
>  
> +static inline void update_stat(struct virtio_balloon *vb, int idx,
> +				__le16 tag, __le64 val)
> +{
> +	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
> +	vb->stats[idx].tag = cpu_to_le16(tag);
> +	vb->stats[idx].val = cpu_to_le64(val);

you should do le16_to_cpu etc on __le values.
Try running this patch through sparce checker.

> +}
> +
> +#define K(x) ((x) << (PAGE_SHIFT - 10))

can't this overflow?
also, won't it be simpler to just report memory is
in bytes, then just x * PAGE_SIZE instead of hairy constants?

> +static void update_balloon_stats(struct virtio_balloon *vb)
> +{
> +	unsigned long events[NR_VM_EVENT_ITEMS];

that's about 1/2K worth of stack space on a 64 bit machine.
better keep it as part of struct virtio_balloon.

> +	struct sysinfo i;
> +	int idx = 0;
> +
> +	all_vm_events(events);
> +	si_meminfo(&i);
> +
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
> +}
> +
> +/*
> + * While most virtqueues communicate guest-initiated requests to the hypervisor,
> + * the stats queue operates in reverse.  The driver initializes the virtqueue
> + * with a single buffer.  From that point forward, all conversations consist of
> + * a hypervisor request (a call to this function) which directs us to refill
> + * the virtqueue with a fresh stats buffer.
> + */
> +static void stats_ack(struct virtqueue *vq)
> +{
> +	struct virtio_balloon *vb;
> +	unsigned int len;
> +	struct scatterlist sg;
> +
> +	vb = vq->vq_ops->get_buf(vq, &len);
> +	if (!vb)
> +		return;
> +
> +	update_balloon_stats(vb);
> +
> +	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
> +	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
> +		BUG();
> +	vq->vq_ops->kick(vq);
> +}
> +
>  static void virtballoon_changed(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb = vdev->priv;
> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>  static int virtballoon_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb;
> -	struct virtqueue *vqs[2];
> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
> -	const char *names[] = { "inflate", "deflate" };
> -	int err;
> +	struct virtqueue *vqs[3];
> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
> +	const char *names[] = { "inflate", "deflate", "stats" };
> +	int err, nvqs;
>  
>  	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>  	if (!vb) {
> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	init_waitqueue_head(&vb->config_change);
>  	vb->vdev = vdev;
>  
> -	/* We expect two virtqueues. */
> -	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
> +	/* We expect two virtqueues: inflate and deflate,
> +	 * and optionally stat. */
> +	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> +	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>  	if (err)
>  		goto out_free_vb;
>  
>  	vb->inflate_vq = vqs[0];
>  	vb->deflate_vq = vqs[1];
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> +		struct scatterlist sg;
> +		vb->stats_vq = vqs[2];
> +
> +		/*
> +		 * Prime this virtqueue with one buffer so the hypervisor can
> +		 * use it to signal us later.
> +		 */
> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats));

should be "sizeof vb->stats"

> +		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
> +			BUG();
> +		vb->stats_vq->vq_ops->kick(vb->stats_vq);
> +	}
>  
>  	vb->thread = kthread_run(balloon, vb, "vballoon");
>  	if (IS_ERR(vb->thread)) {
> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  	kfree(vb);
>  }
>  
> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
> +static unsigned int features[] = {
> +	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,

one per line

> +};
>  
>  static struct virtio_driver virtio_balloon = {
>  	.feature_table = features,
> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
> index 09d7300..a5c09e6 100644
> --- a/include/linux/virtio_balloon.h
> +++ b/include/linux/virtio_balloon.h
> @@ -6,6 +6,7 @@
>  
>  /* The feature bitmap for virtio balloon */
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */

align with a tab
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>  	/* Number of pages we've actually got in balloon. */
>  	__le32 actual;
>  };
> +
> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */

this seems to imply the total amount of memory in bytes?

> +#define VIRTIO_BALLOON_S_NR       6
> +
> +struct virtio_balloon_stat
> +{
> +	__le16 tag;
> +	__le64 val;

This might create padding issues e.g. when guest
is 32 bit and host is 64 bit. Better add padding
manually, or pack the structure (but packed structures
often cause gcc to generate terrible code).


> +};
> +
>  #endif /* _LINUX_VIRTIO_BALLOON_H */
> 
> 
> -- 
> Thanks,
> Adam
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-23  9:44     ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-11-23 11:00       ` Dor Laor
  -1 siblings, 0 replies; 33+ messages in thread
From: Dor Laor @ 2009-11-23 11:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Adam Litke, Anthony Liguori, qemu-devel, linux-kernel,
	Avi Kivity, virtualization, Vadim Rozenfeld

On 11/23/2009 11:44 AM, Michael S. Tsirkin wrote:
> On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
>> Rusty and Anthony,
>> If I've addressed all outstanding issues, please consider this patch for
>> inclusion.  Thanks.
>>
>> Changes since V2:
>>   - Increase stat field size to 64 bits
>>   - Report all sizes in kb (not pages)
>>   - Drop anon_pages stat and fix endianness conversion
>>
>> Changes since V1:
>>   - Use a virtqueue instead of the device config space
>>
>> When using ballooning to manage overcommitted memory on a host, a system for
>> guests to communicate their memory usage to the host can provide information
>> that will minimize the impact of ballooning on the guests.  The current method
>> employs a daemon running in each guest that communicates memory statistics to a
>> host daemon at a specified time interval.  The host daemon aggregates this
>> information and inflates and/or deflates balloons according to the level of
>> host memory pressure.  This approach is effective but overly complex since a
>> daemon must be installed inside each guest and coordinated to communicate with
>> the host.  A simpler approach is to collect memory statistics in the virtio
>> balloon driver and communicate them directly to the hypervisor.
>>
>> This patch enables the guest-side support by adding stats collection and
>> reporting to the virtio balloon driver.
>>
>> Signed-off-by: Adam Litke<agl@us.ibm.com>
>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>> Cc: Anthony Liguori<anthony@codemonkey.ws>
>> Cc: virtualization@lists.linux-foundation.org
>> Cc: linux-kernel@vger.kernel.org
>
> that's pretty clean. some comments: I wrote them while off-line, and now
> that I was going to send it out, I see that there's some overlap with
> what Rusty wrote.  Still, here it is:
>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 200c22f..ebc9d39 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -29,7 +29,7 @@
>>   struct virtio_balloon
>>   {
>>   	struct virtio_device *vdev;
>> -	struct virtqueue *inflate_vq, *deflate_vq;
>> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>>
>>   	/* Where the ballooning thread waits for config to change. */
>>   	wait_queue_head_t config_change;
>> @@ -50,6 +50,9 @@ struct virtio_balloon
>>   	/* The array of pfns we tell the Host about. */
>>   	unsigned int num_pfns;
>>   	u32 pfns[256];
>> +
>> +	/* Memory statistics */
>> +	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>>   };
>>
>>   static struct virtio_device_id id_table[] = {
>> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
>>   	}
>>   }
>>
>> +static inline void update_stat(struct virtio_balloon *vb, int idx,
>> +				__le16 tag, __le64 val)
>> +{
>> +	BUG_ON(idx>= VIRTIO_BALLOON_S_NR);
>> +	vb->stats[idx].tag = cpu_to_le16(tag);
>> +	vb->stats[idx].val = cpu_to_le64(val);
>
> you should do le16_to_cpu etc on __le values.
> Try running this patch through sparce checker.
>
>> +}
>> +
>> +#define K(x) ((x)<<  (PAGE_SHIFT - 10))
>
> can't this overflow?
> also, won't it be simpler to just report memory is
> in bytes, then just x * PAGE_SIZE instead of hairy constants?
>
>> +static void update_balloon_stats(struct virtio_balloon *vb)
>> +{
>> +	unsigned long events[NR_VM_EVENT_ITEMS];
>
> that's about 1/2K worth of stack space on a 64 bit machine.
> better keep it as part of struct virtio_balloon.
>
>> +	struct sysinfo i;
>> +	int idx = 0;
>> +
>> +	all_vm_events(events);
>> +	si_meminfo(&i);
>> +
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));


Finally I found some data from our M$ neighbors. This is from 
http://www.microsoft.com/whdc/system/sysperf/Perf_tun_srv-R2.mspx

"When running Windows in the child partition, you can use the following 
performance counters within a child partition to identify whether the 
child partition is experiencing memory pressure and is likely to perform 
better with a higher VM memory size:

Memory – Standby Cache Reserve Bytes:
Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
systems with 2 GB or more of visible RAM.

Memory – Free & Zero Page List Bytes:
Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
systems with 2 GB or more of visible RAM.

Memory – Pages Input/Sec:
Average over a 1-hour period is less than 10.
"

The biggest take out of it is that we may get #0-pages in windows.
It's valuable information for the host since KSM will collapse all of 
them anyway, so there is not point in ballooning them.
Vadim, can you check if we can extract it from windows?

>> +}
>> +
>> +/*
>> + * While most virtqueues communicate guest-initiated requests to the hypervisor,
>> + * the stats queue operates in reverse.  The driver initializes the virtqueue
>> + * with a single buffer.  From that point forward, all conversations consist of
>> + * a hypervisor request (a call to this function) which directs us to refill
>> + * the virtqueue with a fresh stats buffer.
>> + */
>> +static void stats_ack(struct virtqueue *vq)
>> +{
>> +	struct virtio_balloon *vb;
>> +	unsigned int len;
>> +	struct scatterlist sg;
>> +
>> +	vb = vq->vq_ops->get_buf(vq,&len);
>> +	if (!vb)
>> +		return;
>> +
>> +	update_balloon_stats(vb);
>> +
>> +	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>> +	if (vq->vq_ops->add_buf(vq,&sg, 1, 0, vb)<  0)
>> +		BUG();
>> +	vq->vq_ops->kick(vq);
>> +}
>> +
>>   static void virtballoon_changed(struct virtio_device *vdev)
>>   {
>>   	struct virtio_balloon *vb = vdev->priv;
>> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>>   static int virtballoon_probe(struct virtio_device *vdev)
>>   {
>>   	struct virtio_balloon *vb;
>> -	struct virtqueue *vqs[2];
>> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
>> -	const char *names[] = { "inflate", "deflate" };
>> -	int err;
>> +	struct virtqueue *vqs[3];
>> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
>> +	const char *names[] = { "inflate", "deflate", "stats" };
>> +	int err, nvqs;
>>
>>   	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>>   	if (!vb) {
>> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>   	init_waitqueue_head(&vb->config_change);
>>   	vb->vdev = vdev;
>>
>> -	/* We expect two virtqueues. */
>> -	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
>> +	/* We expect two virtqueues: inflate and deflate,
>> +	 * and optionally stat. */
>> +	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
>> +	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>>   	if (err)
>>   		goto out_free_vb;
>>
>>   	vb->inflate_vq = vqs[0];
>>   	vb->deflate_vq = vqs[1];
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>> +		struct scatterlist sg;
>> +		vb->stats_vq = vqs[2];
>> +
>> +		/*
>> +		 * Prime this virtqueue with one buffer so the hypervisor can
>> +		 * use it to signal us later.
>> +		 */
>> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>
> should be "sizeof vb->stats"
>
>> +		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,&sg, 1, 0, vb)<  0)
>> +			BUG();
>> +		vb->stats_vq->vq_ops->kick(vb->stats_vq);
>> +	}
>>
>>   	vb->thread = kthread_run(balloon, vb, "vballoon");
>>   	if (IS_ERR(vb->thread)) {
>> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
>>   	kfree(vb);
>>   }
>>
>> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
>> +static unsigned int features[] = {
>> +	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
>
> one per line
>
>> +};
>>
>>   static struct virtio_driver virtio_balloon = {
>>   	.feature_table = features,
>> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
>> index 09d7300..a5c09e6 100644
>> --- a/include/linux/virtio_balloon.h
>> +++ b/include/linux/virtio_balloon.h
>> @@ -6,6 +6,7 @@
>>
>>   /* The feature bitmap for virtio balloon */
>>   #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>
> align with a tab
>>
>>   /* Size of a PFN in the balloon interface. */
>>   #define VIRTIO_BALLOON_PFN_SHIFT 12
>> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>>   	/* Number of pages we've actually got in balloon. */
>>   	__le32 actual;
>>   };
>> +
>> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
>> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
>> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
>> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
>> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
>> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
>
> this seems to imply the total amount of memory in bytes?
>
>> +#define VIRTIO_BALLOON_S_NR       6
>> +
>> +struct virtio_balloon_stat
>> +{
>> +	__le16 tag;
>> +	__le64 val;
>
> This might create padding issues e.g. when guest
> is 32 bit and host is 64 bit. Better add padding
> manually, or pack the structure (but packed structures
> often cause gcc to generate terrible code).
>
>
>> +};
>> +
>>   #endif /* _LINUX_VIRTIO_BALLOON_H */
>>
>>
>> --
>> Thanks,
>> Adam
>>
>> _______________________________________________
>> Virtualization mailing list
>> Virtualization@lists.linux-foundation.org
>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-23 11:00       ` Dor Laor
  0 siblings, 0 replies; 33+ messages in thread
From: Dor Laor @ 2009-11-23 11:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Anthony Liguori, qemu-devel, linux-kernel, virtualization,
	Avi Kivity, Adam Litke, Vadim Rozenfeld

On 11/23/2009 11:44 AM, Michael S. Tsirkin wrote:
> On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
>> Rusty and Anthony,
>> If I've addressed all outstanding issues, please consider this patch for
>> inclusion.  Thanks.
>>
>> Changes since V2:
>>   - Increase stat field size to 64 bits
>>   - Report all sizes in kb (not pages)
>>   - Drop anon_pages stat and fix endianness conversion
>>
>> Changes since V1:
>>   - Use a virtqueue instead of the device config space
>>
>> When using ballooning to manage overcommitted memory on a host, a system for
>> guests to communicate their memory usage to the host can provide information
>> that will minimize the impact of ballooning on the guests.  The current method
>> employs a daemon running in each guest that communicates memory statistics to a
>> host daemon at a specified time interval.  The host daemon aggregates this
>> information and inflates and/or deflates balloons according to the level of
>> host memory pressure.  This approach is effective but overly complex since a
>> daemon must be installed inside each guest and coordinated to communicate with
>> the host.  A simpler approach is to collect memory statistics in the virtio
>> balloon driver and communicate them directly to the hypervisor.
>>
>> This patch enables the guest-side support by adding stats collection and
>> reporting to the virtio balloon driver.
>>
>> Signed-off-by: Adam Litke<agl@us.ibm.com>
>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>> Cc: Anthony Liguori<anthony@codemonkey.ws>
>> Cc: virtualization@lists.linux-foundation.org
>> Cc: linux-kernel@vger.kernel.org
>
> that's pretty clean. some comments: I wrote them while off-line, and now
> that I was going to send it out, I see that there's some overlap with
> what Rusty wrote.  Still, here it is:
>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 200c22f..ebc9d39 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -29,7 +29,7 @@
>>   struct virtio_balloon
>>   {
>>   	struct virtio_device *vdev;
>> -	struct virtqueue *inflate_vq, *deflate_vq;
>> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>>
>>   	/* Where the ballooning thread waits for config to change. */
>>   	wait_queue_head_t config_change;
>> @@ -50,6 +50,9 @@ struct virtio_balloon
>>   	/* The array of pfns we tell the Host about. */
>>   	unsigned int num_pfns;
>>   	u32 pfns[256];
>> +
>> +	/* Memory statistics */
>> +	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>>   };
>>
>>   static struct virtio_device_id id_table[] = {
>> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
>>   	}
>>   }
>>
>> +static inline void update_stat(struct virtio_balloon *vb, int idx,
>> +				__le16 tag, __le64 val)
>> +{
>> +	BUG_ON(idx>= VIRTIO_BALLOON_S_NR);
>> +	vb->stats[idx].tag = cpu_to_le16(tag);
>> +	vb->stats[idx].val = cpu_to_le64(val);
>
> you should do le16_to_cpu etc on __le values.
> Try running this patch through sparce checker.
>
>> +}
>> +
>> +#define K(x) ((x)<<  (PAGE_SHIFT - 10))
>
> can't this overflow?
> also, won't it be simpler to just report memory is
> in bytes, then just x * PAGE_SIZE instead of hairy constants?
>
>> +static void update_balloon_stats(struct virtio_balloon *vb)
>> +{
>> +	unsigned long events[NR_VM_EVENT_ITEMS];
>
> that's about 1/2K worth of stack space on a 64 bit machine.
> better keep it as part of struct virtio_balloon.
>
>> +	struct sysinfo i;
>> +	int idx = 0;
>> +
>> +	all_vm_events(events);
>> +	si_meminfo(&i);
>> +
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));


Finally I found some data from our M$ neighbors. This is from 
http://www.microsoft.com/whdc/system/sysperf/Perf_tun_srv-R2.mspx

"When running Windows in the child partition, you can use the following 
performance counters within a child partition to identify whether the 
child partition is experiencing memory pressure and is likely to perform 
better with a higher VM memory size:

Memory – Standby Cache Reserve Bytes:
Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
systems with 2 GB or more of visible RAM.

Memory – Free & Zero Page List Bytes:
Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
systems with 2 GB or more of visible RAM.

Memory – Pages Input/Sec:
Average over a 1-hour period is less than 10.
"

The biggest take out of it is that we may get #0-pages in windows.
It's valuable information for the host since KSM will collapse all of 
them anyway, so there is not point in ballooning them.
Vadim, can you check if we can extract it from windows?

>> +}
>> +
>> +/*
>> + * While most virtqueues communicate guest-initiated requests to the hypervisor,
>> + * the stats queue operates in reverse.  The driver initializes the virtqueue
>> + * with a single buffer.  From that point forward, all conversations consist of
>> + * a hypervisor request (a call to this function) which directs us to refill
>> + * the virtqueue with a fresh stats buffer.
>> + */
>> +static void stats_ack(struct virtqueue *vq)
>> +{
>> +	struct virtio_balloon *vb;
>> +	unsigned int len;
>> +	struct scatterlist sg;
>> +
>> +	vb = vq->vq_ops->get_buf(vq,&len);
>> +	if (!vb)
>> +		return;
>> +
>> +	update_balloon_stats(vb);
>> +
>> +	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>> +	if (vq->vq_ops->add_buf(vq,&sg, 1, 0, vb)<  0)
>> +		BUG();
>> +	vq->vq_ops->kick(vq);
>> +}
>> +
>>   static void virtballoon_changed(struct virtio_device *vdev)
>>   {
>>   	struct virtio_balloon *vb = vdev->priv;
>> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>>   static int virtballoon_probe(struct virtio_device *vdev)
>>   {
>>   	struct virtio_balloon *vb;
>> -	struct virtqueue *vqs[2];
>> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
>> -	const char *names[] = { "inflate", "deflate" };
>> -	int err;
>> +	struct virtqueue *vqs[3];
>> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
>> +	const char *names[] = { "inflate", "deflate", "stats" };
>> +	int err, nvqs;
>>
>>   	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>>   	if (!vb) {
>> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>   	init_waitqueue_head(&vb->config_change);
>>   	vb->vdev = vdev;
>>
>> -	/* We expect two virtqueues. */
>> -	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
>> +	/* We expect two virtqueues: inflate and deflate,
>> +	 * and optionally stat. */
>> +	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
>> +	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>>   	if (err)
>>   		goto out_free_vb;
>>
>>   	vb->inflate_vq = vqs[0];
>>   	vb->deflate_vq = vqs[1];
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>> +		struct scatterlist sg;
>> +		vb->stats_vq = vqs[2];
>> +
>> +		/*
>> +		 * Prime this virtqueue with one buffer so the hypervisor can
>> +		 * use it to signal us later.
>> +		 */
>> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>
> should be "sizeof vb->stats"
>
>> +		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,&sg, 1, 0, vb)<  0)
>> +			BUG();
>> +		vb->stats_vq->vq_ops->kick(vb->stats_vq);
>> +	}
>>
>>   	vb->thread = kthread_run(balloon, vb, "vballoon");
>>   	if (IS_ERR(vb->thread)) {
>> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
>>   	kfree(vb);
>>   }
>>
>> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
>> +static unsigned int features[] = {
>> +	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
>
> one per line
>
>> +};
>>
>>   static struct virtio_driver virtio_balloon = {
>>   	.feature_table = features,
>> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
>> index 09d7300..a5c09e6 100644
>> --- a/include/linux/virtio_balloon.h
>> +++ b/include/linux/virtio_balloon.h
>> @@ -6,6 +6,7 @@
>>
>>   /* The feature bitmap for virtio balloon */
>>   #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>
> align with a tab
>>
>>   /* Size of a PFN in the balloon interface. */
>>   #define VIRTIO_BALLOON_PFN_SHIFT 12
>> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>>   	/* Number of pages we've actually got in balloon. */
>>   	__le32 actual;
>>   };
>> +
>> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
>> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
>> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
>> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
>> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
>> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
>
> this seems to imply the total amount of memory in bytes?
>
>> +#define VIRTIO_BALLOON_S_NR       6
>> +
>> +struct virtio_balloon_stat
>> +{
>> +	__le16 tag;
>> +	__le64 val;
>
> This might create padding issues e.g. when guest
> is 32 bit and host is 64 bit. Better add padding
> manually, or pack the structure (but packed structures
> often cause gcc to generate terrible code).
>
>
>> +};
>> +
>>   #endif /* _LINUX_VIRTIO_BALLOON_H */
>>
>>
>> --
>> Thanks,
>> Adam
>>
>> _______________________________________________
>> Virtualization mailing list
>> Virtualization@lists.linux-foundation.org
>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-23  9:44     ` [Qemu-devel] " Michael S. Tsirkin
  (?)
@ 2009-11-23 11:00     ` Dor Laor
  -1 siblings, 0 replies; 33+ messages in thread
From: Dor Laor @ 2009-11-23 11:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Anthony Liguori, qemu-devel, linux-kernel, virtualization,
	Avi Kivity, Vadim Rozenfeld

On 11/23/2009 11:44 AM, Michael S. Tsirkin wrote:
> On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
>> Rusty and Anthony,
>> If I've addressed all outstanding issues, please consider this patch for
>> inclusion.  Thanks.
>>
>> Changes since V2:
>>   - Increase stat field size to 64 bits
>>   - Report all sizes in kb (not pages)
>>   - Drop anon_pages stat and fix endianness conversion
>>
>> Changes since V1:
>>   - Use a virtqueue instead of the device config space
>>
>> When using ballooning to manage overcommitted memory on a host, a system for
>> guests to communicate their memory usage to the host can provide information
>> that will minimize the impact of ballooning on the guests.  The current method
>> employs a daemon running in each guest that communicates memory statistics to a
>> host daemon at a specified time interval.  The host daemon aggregates this
>> information and inflates and/or deflates balloons according to the level of
>> host memory pressure.  This approach is effective but overly complex since a
>> daemon must be installed inside each guest and coordinated to communicate with
>> the host.  A simpler approach is to collect memory statistics in the virtio
>> balloon driver and communicate them directly to the hypervisor.
>>
>> This patch enables the guest-side support by adding stats collection and
>> reporting to the virtio balloon driver.
>>
>> Signed-off-by: Adam Litke<agl@us.ibm.com>
>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>> Cc: Anthony Liguori<anthony@codemonkey.ws>
>> Cc: virtualization@lists.linux-foundation.org
>> Cc: linux-kernel@vger.kernel.org
>
> that's pretty clean. some comments: I wrote them while off-line, and now
> that I was going to send it out, I see that there's some overlap with
> what Rusty wrote.  Still, here it is:
>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 200c22f..ebc9d39 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -29,7 +29,7 @@
>>   struct virtio_balloon
>>   {
>>   	struct virtio_device *vdev;
>> -	struct virtqueue *inflate_vq, *deflate_vq;
>> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>>
>>   	/* Where the ballooning thread waits for config to change. */
>>   	wait_queue_head_t config_change;
>> @@ -50,6 +50,9 @@ struct virtio_balloon
>>   	/* The array of pfns we tell the Host about. */
>>   	unsigned int num_pfns;
>>   	u32 pfns[256];
>> +
>> +	/* Memory statistics */
>> +	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>>   };
>>
>>   static struct virtio_device_id id_table[] = {
>> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
>>   	}
>>   }
>>
>> +static inline void update_stat(struct virtio_balloon *vb, int idx,
>> +				__le16 tag, __le64 val)
>> +{
>> +	BUG_ON(idx>= VIRTIO_BALLOON_S_NR);
>> +	vb->stats[idx].tag = cpu_to_le16(tag);
>> +	vb->stats[idx].val = cpu_to_le64(val);
>
> you should do le16_to_cpu etc on __le values.
> Try running this patch through sparce checker.
>
>> +}
>> +
>> +#define K(x) ((x)<<  (PAGE_SHIFT - 10))
>
> can't this overflow?
> also, won't it be simpler to just report memory is
> in bytes, then just x * PAGE_SIZE instead of hairy constants?
>
>> +static void update_balloon_stats(struct virtio_balloon *vb)
>> +{
>> +	unsigned long events[NR_VM_EVENT_ITEMS];
>
> that's about 1/2K worth of stack space on a 64 bit machine.
> better keep it as part of struct virtio_balloon.
>
>> +	struct sysinfo i;
>> +	int idx = 0;
>> +
>> +	all_vm_events(events);
>> +	si_meminfo(&i);
>> +
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
>> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));


Finally I found some data from our M$ neighbors. This is from 
http://www.microsoft.com/whdc/system/sysperf/Perf_tun_srv-R2.mspx

"When running Windows in the child partition, you can use the following 
performance counters within a child partition to identify whether the 
child partition is experiencing memory pressure and is likely to perform 
better with a higher VM memory size:

Memory – Standby Cache Reserve Bytes:
Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
systems with 2 GB or more of visible RAM.

Memory – Free & Zero Page List Bytes:
Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
systems with 2 GB or more of visible RAM.

Memory – Pages Input/Sec:
Average over a 1-hour period is less than 10.
"

The biggest take out of it is that we may get #0-pages in windows.
It's valuable information for the host since KSM will collapse all of 
them anyway, so there is not point in ballooning them.
Vadim, can you check if we can extract it from windows?

>> +}
>> +
>> +/*
>> + * While most virtqueues communicate guest-initiated requests to the hypervisor,
>> + * the stats queue operates in reverse.  The driver initializes the virtqueue
>> + * with a single buffer.  From that point forward, all conversations consist of
>> + * a hypervisor request (a call to this function) which directs us to refill
>> + * the virtqueue with a fresh stats buffer.
>> + */
>> +static void stats_ack(struct virtqueue *vq)
>> +{
>> +	struct virtio_balloon *vb;
>> +	unsigned int len;
>> +	struct scatterlist sg;
>> +
>> +	vb = vq->vq_ops->get_buf(vq,&len);
>> +	if (!vb)
>> +		return;
>> +
>> +	update_balloon_stats(vb);
>> +
>> +	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>> +	if (vq->vq_ops->add_buf(vq,&sg, 1, 0, vb)<  0)
>> +		BUG();
>> +	vq->vq_ops->kick(vq);
>> +}
>> +
>>   static void virtballoon_changed(struct virtio_device *vdev)
>>   {
>>   	struct virtio_balloon *vb = vdev->priv;
>> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>>   static int virtballoon_probe(struct virtio_device *vdev)
>>   {
>>   	struct virtio_balloon *vb;
>> -	struct virtqueue *vqs[2];
>> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
>> -	const char *names[] = { "inflate", "deflate" };
>> -	int err;
>> +	struct virtqueue *vqs[3];
>> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
>> +	const char *names[] = { "inflate", "deflate", "stats" };
>> +	int err, nvqs;
>>
>>   	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>>   	if (!vb) {
>> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>   	init_waitqueue_head(&vb->config_change);
>>   	vb->vdev = vdev;
>>
>> -	/* We expect two virtqueues. */
>> -	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
>> +	/* We expect two virtqueues: inflate and deflate,
>> +	 * and optionally stat. */
>> +	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
>> +	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>>   	if (err)
>>   		goto out_free_vb;
>>
>>   	vb->inflate_vq = vqs[0];
>>   	vb->deflate_vq = vqs[1];
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>> +		struct scatterlist sg;
>> +		vb->stats_vq = vqs[2];
>> +
>> +		/*
>> +		 * Prime this virtqueue with one buffer so the hypervisor can
>> +		 * use it to signal us later.
>> +		 */
>> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>
> should be "sizeof vb->stats"
>
>> +		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,&sg, 1, 0, vb)<  0)
>> +			BUG();
>> +		vb->stats_vq->vq_ops->kick(vb->stats_vq);
>> +	}
>>
>>   	vb->thread = kthread_run(balloon, vb, "vballoon");
>>   	if (IS_ERR(vb->thread)) {
>> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct virtio_device *vdev)
>>   	kfree(vb);
>>   }
>>
>> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
>> +static unsigned int features[] = {
>> +	VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
>
> one per line
>
>> +};
>>
>>   static struct virtio_driver virtio_balloon = {
>>   	.feature_table = features,
>> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
>> index 09d7300..a5c09e6 100644
>> --- a/include/linux/virtio_balloon.h
>> +++ b/include/linux/virtio_balloon.h
>> @@ -6,6 +6,7 @@
>>
>>   /* The feature bitmap for virtio balloon */
>>   #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>
> align with a tab
>>
>>   /* Size of a PFN in the balloon interface. */
>>   #define VIRTIO_BALLOON_PFN_SHIFT 12
>> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>>   	/* Number of pages we've actually got in balloon. */
>>   	__le32 actual;
>>   };
>> +
>> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
>> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
>> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
>> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
>> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
>> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
>
> this seems to imply the total amount of memory in bytes?
>
>> +#define VIRTIO_BALLOON_S_NR       6
>> +
>> +struct virtio_balloon_stat
>> +{
>> +	__le16 tag;
>> +	__le64 val;
>
> This might create padding issues e.g. when guest
> is 32 bit and host is 64 bit. Better add padding
> manually, or pack the structure (but packed structures
> often cause gcc to generate terrible code).
>
>
>> +};
>> +
>>   #endif /* _LINUX_VIRTIO_BALLOON_H */
>>
>>
>> --
>> Thanks,
>> Adam
>>
>> _______________________________________________
>> Virtualization mailing list
>> Virtualization@lists.linux-foundation.org
>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-23 11:00       ` Dor Laor
@ 2009-11-23 11:31         ` Vadim Rozenfeld
  -1 siblings, 0 replies; 33+ messages in thread
From: Vadim Rozenfeld @ 2009-11-23 11:31 UTC (permalink / raw)
  To: dlaor
  Cc: Michael S. Tsirkin, Anthony Liguori, qemu-devel, linux-kernel,
	virtualization, Avi Kivity, Adam Litke

On 11/23/2009 01:00 PM, Dor Laor wrote:
> On 11/23/2009 11:44 AM, Michael S. Tsirkin wrote:
>> On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
>>> Rusty and Anthony,
>>> If I've addressed all outstanding issues, please consider this patch 
>>> for
>>> inclusion.  Thanks.
>>>
>>> Changes since V2:
>>>   - Increase stat field size to 64 bits
>>>   - Report all sizes in kb (not pages)
>>>   - Drop anon_pages stat and fix endianness conversion
>>>
>>> Changes since V1:
>>>   - Use a virtqueue instead of the device config space
>>>
>>> When using ballooning to manage overcommitted memory on a host, a 
>>> system for
>>> guests to communicate their memory usage to the host can provide 
>>> information
>>> that will minimize the impact of ballooning on the guests.  The 
>>> current method
>>> employs a daemon running in each guest that communicates memory 
>>> statistics to a
>>> host daemon at a specified time interval.  The host daemon 
>>> aggregates this
>>> information and inflates and/or deflates balloons according to the 
>>> level of
>>> host memory pressure.  This approach is effective but overly complex 
>>> since a
>>> daemon must be installed inside each guest and coordinated to 
>>> communicate with
>>> the host.  A simpler approach is to collect memory statistics in the 
>>> virtio
>>> balloon driver and communicate them directly to the hypervisor.
>>>
>>> This patch enables the guest-side support by adding stats collection 
>>> and
>>> reporting to the virtio balloon driver.
>>>
>>> Signed-off-by: Adam Litke<agl@us.ibm.com>
>>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>>> Cc: Anthony Liguori<anthony@codemonkey.ws>
>>> Cc: virtualization@lists.linux-foundation.org
>>> Cc: linux-kernel@vger.kernel.org
>>
>> that's pretty clean. some comments: I wrote them while off-line, and now
>> that I was going to send it out, I see that there's some overlap with
>> what Rusty wrote.  Still, here it is:
>>
>>> diff --git a/drivers/virtio/virtio_balloon.c 
>>> b/drivers/virtio/virtio_balloon.c
>>> index 200c22f..ebc9d39 100644
>>> --- a/drivers/virtio/virtio_balloon.c
>>> +++ b/drivers/virtio/virtio_balloon.c
>>> @@ -29,7 +29,7 @@
>>>   struct virtio_balloon
>>>   {
>>>       struct virtio_device *vdev;
>>> -    struct virtqueue *inflate_vq, *deflate_vq;
>>> +    struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>>>
>>>       /* Where the ballooning thread waits for config to change. */
>>>       wait_queue_head_t config_change;
>>> @@ -50,6 +50,9 @@ struct virtio_balloon
>>>       /* The array of pfns we tell the Host about. */
>>>       unsigned int num_pfns;
>>>       u32 pfns[256];
>>> +
>>> +    /* Memory statistics */
>>> +    struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>>>   };
>>>
>>>   static struct virtio_device_id id_table[] = {
>>> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon 
>>> *vb, size_t num)
>>>       }
>>>   }
>>>
>>> +static inline void update_stat(struct virtio_balloon *vb, int idx,
>>> +                __le16 tag, __le64 val)
>>> +{
>>> +    BUG_ON(idx>= VIRTIO_BALLOON_S_NR);
>>> +    vb->stats[idx].tag = cpu_to_le16(tag);
>>> +    vb->stats[idx].val = cpu_to_le64(val);
>>
>> you should do le16_to_cpu etc on __le values.
>> Try running this patch through sparce checker.
>>
>>> +}
>>> +
>>> +#define K(x) ((x)<<  (PAGE_SHIFT - 10))
>>
>> can't this overflow?
>> also, won't it be simpler to just report memory is
>> in bytes, then just x * PAGE_SIZE instead of hairy constants?
>>
>>> +static void update_balloon_stats(struct virtio_balloon *vb)
>>> +{
>>> +    unsigned long events[NR_VM_EVENT_ITEMS];
>>
>> that's about 1/2K worth of stack space on a 64 bit machine.
>> better keep it as part of struct virtio_balloon.
>>
>>> +    struct sysinfo i;
>>> +    int idx = 0;
>>> +
>>> +    all_vm_events(events);
>>> +    si_meminfo(&i);
>>> +
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, 
>>> K(events[PSWPIN]));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, 
>>> K(events[PSWPOUT]));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, 
>>> events[PGMAJFAULT]);
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
>
>
> Finally I found some data from our M$ neighbors. This is from 
> http://www.microsoft.com/whdc/system/sysperf/Perf_tun_srv-R2.mspx
>
> "When running Windows in the child partition, you can use the 
> following performance counters within a child partition to identify 
> whether the child partition is experiencing memory pressure and is 
> likely to perform better with a higher VM memory size:
>
> Memory – Standby Cache Reserve Bytes:
> Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
> should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
> systems with 2 GB or more of visible RAM.
>
> Memory – Free & Zero Page List Bytes:
> Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
> should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
> systems with 2 GB or more of visible RAM.
>
> Memory – Pages Input/Sec:
> Average over a 1-hour period is less than 10.
> "
>
> The biggest take out of it is that we may get #0-pages in windows.
> It's valuable information for the host since KSM will collapse all of 
> them anyway, so there is not point in ballooning them.
> Vadim, can you check if we can extract it from windows?
We can do it from kernel mode by calling ZwQuerySystemInformation 
function. But unfortunately SystemPerformanceInformation class is not 
documented anywhere, except for Gary Nebbet book.
It could be easier to query performance counters from user mode (say, 
some sort of very light service) and pass the information down to 
balloon driver.
Using WMI could be another option, but I need to check whether we can 
read performance  counters from WMI provider at kernel level.
>
>>> +}
>>> +
>>> +/*
>>> + * While most virtqueues communicate guest-initiated requests to 
>>> the hypervisor,
>>> + * the stats queue operates in reverse.  The driver initializes the 
>>> virtqueue
>>> + * with a single buffer.  From that point forward, all 
>>> conversations consist of
>>> + * a hypervisor request (a call to this function) which directs us 
>>> to refill
>>> + * the virtqueue with a fresh stats buffer.
>>> + */
>>> +static void stats_ack(struct virtqueue *vq)
>>> +{
>>> +    struct virtio_balloon *vb;
>>> +    unsigned int len;
>>> +    struct scatterlist sg;
>>> +
>>> +    vb = vq->vq_ops->get_buf(vq,&len);
>>> +    if (!vb)
>>> +        return;
>>> +
>>> +    update_balloon_stats(vb);
>>> +
>>> +    sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>>> +    if (vq->vq_ops->add_buf(vq,&sg, 1, 0, vb)<  0)
>>> +        BUG();
>>> +    vq->vq_ops->kick(vq);
>>> +}
>>> +
>>>   static void virtballoon_changed(struct virtio_device *vdev)
>>>   {
>>>       struct virtio_balloon *vb = vdev->priv;
>>> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>>>   static int virtballoon_probe(struct virtio_device *vdev)
>>>   {
>>>       struct virtio_balloon *vb;
>>> -    struct virtqueue *vqs[2];
>>> -    vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
>>> -    const char *names[] = { "inflate", "deflate" };
>>> -    int err;
>>> +    struct virtqueue *vqs[3];
>>> +    vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, 
>>> stats_ack };
>>> +    const char *names[] = { "inflate", "deflate", "stats" };
>>> +    int err, nvqs;
>>>
>>>       vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>>>       if (!vb) {
>>> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct 
>>> virtio_device *vdev)
>>>       init_waitqueue_head(&vb->config_change);
>>>       vb->vdev = vdev;
>>>
>>> -    /* We expect two virtqueues. */
>>> -    err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
>>> +    /* We expect two virtqueues: inflate and deflate,
>>> +     * and optionally stat. */
>>> +    nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) 
>>> ? 3 : 2;
>>> +    err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>>>       if (err)
>>>           goto out_free_vb;
>>>
>>>       vb->inflate_vq = vqs[0];
>>>       vb->deflate_vq = vqs[1];
>>> +    if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>>> +        struct scatterlist sg;
>>> +        vb->stats_vq = vqs[2];
>>> +
>>> +        /*
>>> +         * Prime this virtqueue with one buffer so the hypervisor can
>>> +         * use it to signal us later.
>>> +         */
>>> +        sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>>
>> should be "sizeof vb->stats"
>>
>>> +        if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,&sg, 1, 0, 
>>> vb)<  0)
>>> +            BUG();
>>> +        vb->stats_vq->vq_ops->kick(vb->stats_vq);
>>> +    }
>>>
>>>       vb->thread = kthread_run(balloon, vb, "vballoon");
>>>       if (IS_ERR(vb->thread)) {
>>> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct 
>>> virtio_device *vdev)
>>>       kfree(vb);
>>>   }
>>>
>>> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
>>> +static unsigned int features[] = {
>>> +    VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
>>
>> one per line
>>
>>> +};
>>>
>>>   static struct virtio_driver virtio_balloon = {
>>>       .feature_table = features,
>>> diff --git a/include/linux/virtio_balloon.h 
>>> b/include/linux/virtio_balloon.h
>>> index 09d7300..a5c09e6 100644
>>> --- a/include/linux/virtio_balloon.h
>>> +++ b/include/linux/virtio_balloon.h
>>> @@ -6,6 +6,7 @@
>>>
>>>   /* The feature bitmap for virtio balloon */
>>>   #define VIRTIO_BALLOON_F_MUST_TELL_HOST    0 /* Tell before 
>>> reclaiming pages */
>>> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>>
>> align with a tab
>>>
>>>   /* Size of a PFN in the balloon interface. */
>>>   #define VIRTIO_BALLOON_PFN_SHIFT 12
>>> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>>>       /* Number of pages we've actually got in balloon. */
>>>       __le32 actual;
>>>   };
>>> +
>>> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped 
>>> in */
>>> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped 
>>> out */
>>> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
>>> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
>>> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free 
>>> memory */
>>> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
>>
>> this seems to imply the total amount of memory in bytes?
>>
>>> +#define VIRTIO_BALLOON_S_NR       6
>>> +
>>> +struct virtio_balloon_stat
>>> +{
>>> +    __le16 tag;
>>> +    __le64 val;
>>
>> This might create padding issues e.g. when guest
>> is 32 bit and host is 64 bit. Better add padding
>> manually, or pack the structure (but packed structures
>> often cause gcc to generate terrible code).
>>
>>
>>> +};
>>> +
>>>   #endif /* _LINUX_VIRTIO_BALLOON_H */
>>>
>>>
>>> -- 
>>> Thanks,
>>> Adam
>>>
>>> _______________________________________________
>>> Virtualization mailing list
>>> Virtualization@lists.linux-foundation.org
>>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
>> _______________________________________________
>> Virtualization mailing list
>> Virtualization@lists.linux-foundation.org
>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
>
>
>


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
@ 2009-11-23 11:31         ` Vadim Rozenfeld
  0 siblings, 0 replies; 33+ messages in thread
From: Vadim Rozenfeld @ 2009-11-23 11:31 UTC (permalink / raw)
  To: dlaor
  Cc: Anthony Liguori, Michael S. Tsirkin, qemu-devel, linux-kernel,
	virtualization, Avi Kivity, Adam Litke

On 11/23/2009 01:00 PM, Dor Laor wrote:
> On 11/23/2009 11:44 AM, Michael S. Tsirkin wrote:
>> On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
>>> Rusty and Anthony,
>>> If I've addressed all outstanding issues, please consider this patch 
>>> for
>>> inclusion.  Thanks.
>>>
>>> Changes since V2:
>>>   - Increase stat field size to 64 bits
>>>   - Report all sizes in kb (not pages)
>>>   - Drop anon_pages stat and fix endianness conversion
>>>
>>> Changes since V1:
>>>   - Use a virtqueue instead of the device config space
>>>
>>> When using ballooning to manage overcommitted memory on a host, a 
>>> system for
>>> guests to communicate their memory usage to the host can provide 
>>> information
>>> that will minimize the impact of ballooning on the guests.  The 
>>> current method
>>> employs a daemon running in each guest that communicates memory 
>>> statistics to a
>>> host daemon at a specified time interval.  The host daemon 
>>> aggregates this
>>> information and inflates and/or deflates balloons according to the 
>>> level of
>>> host memory pressure.  This approach is effective but overly complex 
>>> since a
>>> daemon must be installed inside each guest and coordinated to 
>>> communicate with
>>> the host.  A simpler approach is to collect memory statistics in the 
>>> virtio
>>> balloon driver and communicate them directly to the hypervisor.
>>>
>>> This patch enables the guest-side support by adding stats collection 
>>> and
>>> reporting to the virtio balloon driver.
>>>
>>> Signed-off-by: Adam Litke<agl@us.ibm.com>
>>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>>> Cc: Anthony Liguori<anthony@codemonkey.ws>
>>> Cc: virtualization@lists.linux-foundation.org
>>> Cc: linux-kernel@vger.kernel.org
>>
>> that's pretty clean. some comments: I wrote them while off-line, and now
>> that I was going to send it out, I see that there's some overlap with
>> what Rusty wrote.  Still, here it is:
>>
>>> diff --git a/drivers/virtio/virtio_balloon.c 
>>> b/drivers/virtio/virtio_balloon.c
>>> index 200c22f..ebc9d39 100644
>>> --- a/drivers/virtio/virtio_balloon.c
>>> +++ b/drivers/virtio/virtio_balloon.c
>>> @@ -29,7 +29,7 @@
>>>   struct virtio_balloon
>>>   {
>>>       struct virtio_device *vdev;
>>> -    struct virtqueue *inflate_vq, *deflate_vq;
>>> +    struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>>>
>>>       /* Where the ballooning thread waits for config to change. */
>>>       wait_queue_head_t config_change;
>>> @@ -50,6 +50,9 @@ struct virtio_balloon
>>>       /* The array of pfns we tell the Host about. */
>>>       unsigned int num_pfns;
>>>       u32 pfns[256];
>>> +
>>> +    /* Memory statistics */
>>> +    struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>>>   };
>>>
>>>   static struct virtio_device_id id_table[] = {
>>> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon 
>>> *vb, size_t num)
>>>       }
>>>   }
>>>
>>> +static inline void update_stat(struct virtio_balloon *vb, int idx,
>>> +                __le16 tag, __le64 val)
>>> +{
>>> +    BUG_ON(idx>= VIRTIO_BALLOON_S_NR);
>>> +    vb->stats[idx].tag = cpu_to_le16(tag);
>>> +    vb->stats[idx].val = cpu_to_le64(val);
>>
>> you should do le16_to_cpu etc on __le values.
>> Try running this patch through sparce checker.
>>
>>> +}
>>> +
>>> +#define K(x) ((x)<<  (PAGE_SHIFT - 10))
>>
>> can't this overflow?
>> also, won't it be simpler to just report memory is
>> in bytes, then just x * PAGE_SIZE instead of hairy constants?
>>
>>> +static void update_balloon_stats(struct virtio_balloon *vb)
>>> +{
>>> +    unsigned long events[NR_VM_EVENT_ITEMS];
>>
>> that's about 1/2K worth of stack space on a 64 bit machine.
>> better keep it as part of struct virtio_balloon.
>>
>>> +    struct sysinfo i;
>>> +    int idx = 0;
>>> +
>>> +    all_vm_events(events);
>>> +    si_meminfo(&i);
>>> +
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, 
>>> K(events[PSWPIN]));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, 
>>> K(events[PSWPOUT]));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, 
>>> events[PGMAJFAULT]);
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
>
>
> Finally I found some data from our M$ neighbors. This is from 
> http://www.microsoft.com/whdc/system/sysperf/Perf_tun_srv-R2.mspx
>
> "When running Windows in the child partition, you can use the 
> following performance counters within a child partition to identify 
> whether the child partition is experiencing memory pressure and is 
> likely to perform better with a higher VM memory size:
>
> Memory – Standby Cache Reserve Bytes:
> Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
> should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
> systems with 2 GB or more of visible RAM.
>
> Memory – Free & Zero Page List Bytes:
> Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
> should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
> systems with 2 GB or more of visible RAM.
>
> Memory – Pages Input/Sec:
> Average over a 1-hour period is less than 10.
> "
>
> The biggest take out of it is that we may get #0-pages in windows.
> It's valuable information for the host since KSM will collapse all of 
> them anyway, so there is not point in ballooning them.
> Vadim, can you check if we can extract it from windows?
We can do it from kernel mode by calling ZwQuerySystemInformation 
function. But unfortunately SystemPerformanceInformation class is not 
documented anywhere, except for Gary Nebbet book.
It could be easier to query performance counters from user mode (say, 
some sort of very light service) and pass the information down to 
balloon driver.
Using WMI could be another option, but I need to check whether we can 
read performance  counters from WMI provider at kernel level.
>
>>> +}
>>> +
>>> +/*
>>> + * While most virtqueues communicate guest-initiated requests to 
>>> the hypervisor,
>>> + * the stats queue operates in reverse.  The driver initializes the 
>>> virtqueue
>>> + * with a single buffer.  From that point forward, all 
>>> conversations consist of
>>> + * a hypervisor request (a call to this function) which directs us 
>>> to refill
>>> + * the virtqueue with a fresh stats buffer.
>>> + */
>>> +static void stats_ack(struct virtqueue *vq)
>>> +{
>>> +    struct virtio_balloon *vb;
>>> +    unsigned int len;
>>> +    struct scatterlist sg;
>>> +
>>> +    vb = vq->vq_ops->get_buf(vq,&len);
>>> +    if (!vb)
>>> +        return;
>>> +
>>> +    update_balloon_stats(vb);
>>> +
>>> +    sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>>> +    if (vq->vq_ops->add_buf(vq,&sg, 1, 0, vb)<  0)
>>> +        BUG();
>>> +    vq->vq_ops->kick(vq);
>>> +}
>>> +
>>>   static void virtballoon_changed(struct virtio_device *vdev)
>>>   {
>>>       struct virtio_balloon *vb = vdev->priv;
>>> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>>>   static int virtballoon_probe(struct virtio_device *vdev)
>>>   {
>>>       struct virtio_balloon *vb;
>>> -    struct virtqueue *vqs[2];
>>> -    vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
>>> -    const char *names[] = { "inflate", "deflate" };
>>> -    int err;
>>> +    struct virtqueue *vqs[3];
>>> +    vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, 
>>> stats_ack };
>>> +    const char *names[] = { "inflate", "deflate", "stats" };
>>> +    int err, nvqs;
>>>
>>>       vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>>>       if (!vb) {
>>> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct 
>>> virtio_device *vdev)
>>>       init_waitqueue_head(&vb->config_change);
>>>       vb->vdev = vdev;
>>>
>>> -    /* We expect two virtqueues. */
>>> -    err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
>>> +    /* We expect two virtqueues: inflate and deflate,
>>> +     * and optionally stat. */
>>> +    nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) 
>>> ? 3 : 2;
>>> +    err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>>>       if (err)
>>>           goto out_free_vb;
>>>
>>>       vb->inflate_vq = vqs[0];
>>>       vb->deflate_vq = vqs[1];
>>> +    if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>>> +        struct scatterlist sg;
>>> +        vb->stats_vq = vqs[2];
>>> +
>>> +        /*
>>> +         * Prime this virtqueue with one buffer so the hypervisor can
>>> +         * use it to signal us later.
>>> +         */
>>> +        sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>>
>> should be "sizeof vb->stats"
>>
>>> +        if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,&sg, 1, 0, 
>>> vb)<  0)
>>> +            BUG();
>>> +        vb->stats_vq->vq_ops->kick(vb->stats_vq);
>>> +    }
>>>
>>>       vb->thread = kthread_run(balloon, vb, "vballoon");
>>>       if (IS_ERR(vb->thread)) {
>>> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct 
>>> virtio_device *vdev)
>>>       kfree(vb);
>>>   }
>>>
>>> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
>>> +static unsigned int features[] = {
>>> +    VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
>>
>> one per line
>>
>>> +};
>>>
>>>   static struct virtio_driver virtio_balloon = {
>>>       .feature_table = features,
>>> diff --git a/include/linux/virtio_balloon.h 
>>> b/include/linux/virtio_balloon.h
>>> index 09d7300..a5c09e6 100644
>>> --- a/include/linux/virtio_balloon.h
>>> +++ b/include/linux/virtio_balloon.h
>>> @@ -6,6 +6,7 @@
>>>
>>>   /* The feature bitmap for virtio balloon */
>>>   #define VIRTIO_BALLOON_F_MUST_TELL_HOST    0 /* Tell before 
>>> reclaiming pages */
>>> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>>
>> align with a tab
>>>
>>>   /* Size of a PFN in the balloon interface. */
>>>   #define VIRTIO_BALLOON_PFN_SHIFT 12
>>> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>>>       /* Number of pages we've actually got in balloon. */
>>>       __le32 actual;
>>>   };
>>> +
>>> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped 
>>> in */
>>> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped 
>>> out */
>>> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
>>> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
>>> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free 
>>> memory */
>>> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
>>
>> this seems to imply the total amount of memory in bytes?
>>
>>> +#define VIRTIO_BALLOON_S_NR       6
>>> +
>>> +struct virtio_balloon_stat
>>> +{
>>> +    __le16 tag;
>>> +    __le64 val;
>>
>> This might create padding issues e.g. when guest
>> is 32 bit and host is 64 bit. Better add padding
>> manually, or pack the structure (but packed structures
>> often cause gcc to generate terrible code).
>>
>>
>>> +};
>>> +
>>>   #endif /* _LINUX_VIRTIO_BALLOON_H */
>>>
>>>
>>> -- 
>>> Thanks,
>>> Adam
>>>
>>> _______________________________________________
>>> Virtualization mailing list
>>> Virtualization@lists.linux-foundation.org
>>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
>> _______________________________________________
>> Virtualization mailing list
>> Virtualization@lists.linux-foundation.org
>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
>
>
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)
  2009-11-23 11:00       ` Dor Laor
  (?)
@ 2009-11-23 11:31       ` Vadim Rozenfeld
  -1 siblings, 0 replies; 33+ messages in thread
From: Vadim Rozenfeld @ 2009-11-23 11:31 UTC (permalink / raw)
  To: dlaor
  Cc: Anthony Liguori, Michael S. Tsirkin, qemu-devel, linux-kernel,
	virtualization, Avi Kivity

On 11/23/2009 01:00 PM, Dor Laor wrote:
> On 11/23/2009 11:44 AM, Michael S. Tsirkin wrote:
>> On Thu, Nov 19, 2009 at 09:19:05AM -0600, Adam Litke wrote:
>>> Rusty and Anthony,
>>> If I've addressed all outstanding issues, please consider this patch 
>>> for
>>> inclusion.  Thanks.
>>>
>>> Changes since V2:
>>>   - Increase stat field size to 64 bits
>>>   - Report all sizes in kb (not pages)
>>>   - Drop anon_pages stat and fix endianness conversion
>>>
>>> Changes since V1:
>>>   - Use a virtqueue instead of the device config space
>>>
>>> When using ballooning to manage overcommitted memory on a host, a 
>>> system for
>>> guests to communicate their memory usage to the host can provide 
>>> information
>>> that will minimize the impact of ballooning on the guests.  The 
>>> current method
>>> employs a daemon running in each guest that communicates memory 
>>> statistics to a
>>> host daemon at a specified time interval.  The host daemon 
>>> aggregates this
>>> information and inflates and/or deflates balloons according to the 
>>> level of
>>> host memory pressure.  This approach is effective but overly complex 
>>> since a
>>> daemon must be installed inside each guest and coordinated to 
>>> communicate with
>>> the host.  A simpler approach is to collect memory statistics in the 
>>> virtio
>>> balloon driver and communicate them directly to the hypervisor.
>>>
>>> This patch enables the guest-side support by adding stats collection 
>>> and
>>> reporting to the virtio balloon driver.
>>>
>>> Signed-off-by: Adam Litke<agl@us.ibm.com>
>>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>>> Cc: Anthony Liguori<anthony@codemonkey.ws>
>>> Cc: virtualization@lists.linux-foundation.org
>>> Cc: linux-kernel@vger.kernel.org
>>
>> that's pretty clean. some comments: I wrote them while off-line, and now
>> that I was going to send it out, I see that there's some overlap with
>> what Rusty wrote.  Still, here it is:
>>
>>> diff --git a/drivers/virtio/virtio_balloon.c 
>>> b/drivers/virtio/virtio_balloon.c
>>> index 200c22f..ebc9d39 100644
>>> --- a/drivers/virtio/virtio_balloon.c
>>> +++ b/drivers/virtio/virtio_balloon.c
>>> @@ -29,7 +29,7 @@
>>>   struct virtio_balloon
>>>   {
>>>       struct virtio_device *vdev;
>>> -    struct virtqueue *inflate_vq, *deflate_vq;
>>> +    struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>>>
>>>       /* Where the ballooning thread waits for config to change. */
>>>       wait_queue_head_t config_change;
>>> @@ -50,6 +50,9 @@ struct virtio_balloon
>>>       /* The array of pfns we tell the Host about. */
>>>       unsigned int num_pfns;
>>>       u32 pfns[256];
>>> +
>>> +    /* Memory statistics */
>>> +    struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>>>   };
>>>
>>>   static struct virtio_device_id id_table[] = {
>>> @@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon 
>>> *vb, size_t num)
>>>       }
>>>   }
>>>
>>> +static inline void update_stat(struct virtio_balloon *vb, int idx,
>>> +                __le16 tag, __le64 val)
>>> +{
>>> +    BUG_ON(idx>= VIRTIO_BALLOON_S_NR);
>>> +    vb->stats[idx].tag = cpu_to_le16(tag);
>>> +    vb->stats[idx].val = cpu_to_le64(val);
>>
>> you should do le16_to_cpu etc on __le values.
>> Try running this patch through sparce checker.
>>
>>> +}
>>> +
>>> +#define K(x) ((x)<<  (PAGE_SHIFT - 10))
>>
>> can't this overflow?
>> also, won't it be simpler to just report memory is
>> in bytes, then just x * PAGE_SIZE instead of hairy constants?
>>
>>> +static void update_balloon_stats(struct virtio_balloon *vb)
>>> +{
>>> +    unsigned long events[NR_VM_EVENT_ITEMS];
>>
>> that's about 1/2K worth of stack space on a 64 bit machine.
>> better keep it as part of struct virtio_balloon.
>>
>>> +    struct sysinfo i;
>>> +    int idx = 0;
>>> +
>>> +    all_vm_events(events);
>>> +    si_meminfo(&i);
>>> +
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, 
>>> K(events[PSWPIN]));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, 
>>> K(events[PSWPOUT]));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, 
>>> events[PGMAJFAULT]);
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
>>> +    update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
>
>
> Finally I found some data from our M$ neighbors. This is from 
> http://www.microsoft.com/whdc/system/sysperf/Perf_tun_srv-R2.mspx
>
> "When running Windows in the child partition, you can use the 
> following performance counters within a child partition to identify 
> whether the child partition is experiencing memory pressure and is 
> likely to perform better with a higher VM memory size:
>
> Memory – Standby Cache Reserve Bytes:
> Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
> should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
> systems with 2 GB or more of visible RAM.
>
> Memory – Free & Zero Page List Bytes:
> Sum of Standby Cache Reserve Bytes and Free and Zero Page List Bytes 
> should be 200 MB or more on systems with 1 GB, and 300 MB or more on 
> systems with 2 GB or more of visible RAM.
>
> Memory – Pages Input/Sec:
> Average over a 1-hour period is less than 10.
> "
>
> The biggest take out of it is that we may get #0-pages in windows.
> It's valuable information for the host since KSM will collapse all of 
> them anyway, so there is not point in ballooning them.
> Vadim, can you check if we can extract it from windows?
We can do it from kernel mode by calling ZwQuerySystemInformation 
function. But unfortunately SystemPerformanceInformation class is not 
documented anywhere, except for Gary Nebbet book.
It could be easier to query performance counters from user mode (say, 
some sort of very light service) and pass the information down to 
balloon driver.
Using WMI could be another option, but I need to check whether we can 
read performance  counters from WMI provider at kernel level.
>
>>> +}
>>> +
>>> +/*
>>> + * While most virtqueues communicate guest-initiated requests to 
>>> the hypervisor,
>>> + * the stats queue operates in reverse.  The driver initializes the 
>>> virtqueue
>>> + * with a single buffer.  From that point forward, all 
>>> conversations consist of
>>> + * a hypervisor request (a call to this function) which directs us 
>>> to refill
>>> + * the virtqueue with a fresh stats buffer.
>>> + */
>>> +static void stats_ack(struct virtqueue *vq)
>>> +{
>>> +    struct virtio_balloon *vb;
>>> +    unsigned int len;
>>> +    struct scatterlist sg;
>>> +
>>> +    vb = vq->vq_ops->get_buf(vq,&len);
>>> +    if (!vb)
>>> +        return;
>>> +
>>> +    update_balloon_stats(vb);
>>> +
>>> +    sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>>> +    if (vq->vq_ops->add_buf(vq,&sg, 1, 0, vb)<  0)
>>> +        BUG();
>>> +    vq->vq_ops->kick(vq);
>>> +}
>>> +
>>>   static void virtballoon_changed(struct virtio_device *vdev)
>>>   {
>>>       struct virtio_balloon *vb = vdev->priv;
>>> @@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
>>>   static int virtballoon_probe(struct virtio_device *vdev)
>>>   {
>>>       struct virtio_balloon *vb;
>>> -    struct virtqueue *vqs[2];
>>> -    vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
>>> -    const char *names[] = { "inflate", "deflate" };
>>> -    int err;
>>> +    struct virtqueue *vqs[3];
>>> +    vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, 
>>> stats_ack };
>>> +    const char *names[] = { "inflate", "deflate", "stats" };
>>> +    int err, nvqs;
>>>
>>>       vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
>>>       if (!vb) {
>>> @@ -221,13 +275,28 @@ static int virtballoon_probe(struct 
>>> virtio_device *vdev)
>>>       init_waitqueue_head(&vb->config_change);
>>>       vb->vdev = vdev;
>>>
>>> -    /* We expect two virtqueues. */
>>> -    err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
>>> +    /* We expect two virtqueues: inflate and deflate,
>>> +     * and optionally stat. */
>>> +    nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) 
>>> ? 3 : 2;
>>> +    err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
>>>       if (err)
>>>           goto out_free_vb;
>>>
>>>       vb->inflate_vq = vqs[0];
>>>       vb->deflate_vq = vqs[1];
>>> +    if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>>> +        struct scatterlist sg;
>>> +        vb->stats_vq = vqs[2];
>>> +
>>> +        /*
>>> +         * Prime this virtqueue with one buffer so the hypervisor can
>>> +         * use it to signal us later.
>>> +         */
>>> +        sg_init_one(&sg, vb->stats, sizeof(vb->stats));
>>
>> should be "sizeof vb->stats"
>>
>>> +        if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,&sg, 1, 0, 
>>> vb)<  0)
>>> +            BUG();
>>> +        vb->stats_vq->vq_ops->kick(vb->stats_vq);
>>> +    }
>>>
>>>       vb->thread = kthread_run(balloon, vb, "vballoon");
>>>       if (IS_ERR(vb->thread)) {
>>> @@ -265,7 +334,9 @@ static void virtballoon_remove(struct 
>>> virtio_device *vdev)
>>>       kfree(vb);
>>>   }
>>>
>>> -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
>>> +static unsigned int features[] = {
>>> +    VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ,
>>
>> one per line
>>
>>> +};
>>>
>>>   static struct virtio_driver virtio_balloon = {
>>>       .feature_table = features,
>>> diff --git a/include/linux/virtio_balloon.h 
>>> b/include/linux/virtio_balloon.h
>>> index 09d7300..a5c09e6 100644
>>> --- a/include/linux/virtio_balloon.h
>>> +++ b/include/linux/virtio_balloon.h
>>> @@ -6,6 +6,7 @@
>>>
>>>   /* The feature bitmap for virtio balloon */
>>>   #define VIRTIO_BALLOON_F_MUST_TELL_HOST    0 /* Tell before 
>>> reclaiming pages */
>>> +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
>>
>> align with a tab
>>>
>>>   /* Size of a PFN in the balloon interface. */
>>>   #define VIRTIO_BALLOON_PFN_SHIFT 12
>>> @@ -17,4 +18,19 @@ struct virtio_balloon_config
>>>       /* Number of pages we've actually got in balloon. */
>>>       __le32 actual;
>>>   };
>>> +
>>> +#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped 
>>> in */
>>> +#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped 
>>> out */
>>> +#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
>>> +#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
>>> +#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free 
>>> memory */
>>> +#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
>>
>> this seems to imply the total amount of memory in bytes?
>>
>>> +#define VIRTIO_BALLOON_S_NR       6
>>> +
>>> +struct virtio_balloon_stat
>>> +{
>>> +    __le16 tag;
>>> +    __le64 val;
>>
>> This might create padding issues e.g. when guest
>> is 32 bit and host is 64 bit. Better add padding
>> manually, or pack the structure (but packed structures
>> often cause gcc to generate terrible code).
>>
>>
>>> +};
>>> +
>>>   #endif /* _LINUX_VIRTIO_BALLOON_H */
>>>
>>>
>>> -- 
>>> Thanks,
>>> Adam
>>>
>>> _______________________________________________
>>> Virtualization mailing list
>>> Virtualization@lists.linux-foundation.org
>>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
>> _______________________________________________
>> Virtualization mailing list
>> Virtualization@lists.linux-foundation.org
>> https://lists.linux-foundation.org/mailman/listinfo/virtualization
>
>
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4)
  2009-11-19 16:02     ` Avi Kivity
@ 2009-11-23 18:47       ` Anthony Liguori
  0 siblings, 0 replies; 33+ messages in thread
From: Anthony Liguori @ 2009-11-23 18:47 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, qemu-devel, Adam Litke

Avi Kivity wrote:
> The new monitor will support async command completion but even then I 
> don't think we should allow a guest to stop command execution 
> indefinitely (it would tie up resources at the client).

Well, I think we're going to end up pushing this to 0.13 as we're 
quickly approaching the 0.12 freeze.  In that case, let's not bother 
doing two commands and let's rely on adding asynchronous result 
reporting in qmp.

Freezing the monitor isn't really a DoS today as we support multiple 
monitors.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2009-11-23 18:47 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-19 15:06 [Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Adam Litke
2009-11-19 15:19 ` virtio: Add memory statistics reporting to the balloon driver (V3) Adam Litke
2009-11-19 15:19   ` [Qemu-devel] " Adam Litke
2009-11-19 15:22   ` Avi Kivity
2009-11-19 15:22     ` Avi Kivity
2009-11-19 15:22     ` [Qemu-devel] " Avi Kivity
2009-11-19 15:58     ` Adam Litke
2009-11-19 15:58       ` [Qemu-devel] " Adam Litke
2009-11-19 16:13       ` Avi Kivity
2009-11-19 16:13       ` Avi Kivity
2009-11-19 16:13         ` [Qemu-devel] " Avi Kivity
2009-11-19 16:29         ` Adam Litke
2009-11-19 16:29           ` [Qemu-devel] " Adam Litke
2009-11-19 16:29         ` Adam Litke
2009-11-19 15:58     ` Adam Litke
2009-11-19 23:53   ` Rusty Russell
2009-11-19 23:53   ` Rusty Russell
2009-11-19 23:53     ` [Qemu-devel] " Rusty Russell
2009-11-23  9:44   ` Michael S. Tsirkin
2009-11-23  9:44   ` Michael S. Tsirkin
2009-11-23  9:44     ` [Qemu-devel] " Michael S. Tsirkin
2009-11-23 11:00     ` Dor Laor
2009-11-23 11:00     ` Dor Laor
2009-11-23 11:00       ` Dor Laor
2009-11-23 11:31       ` Vadim Rozenfeld
2009-11-23 11:31       ` Vadim Rozenfeld
2009-11-23 11:31         ` Vadim Rozenfeld
2009-11-19 15:19 ` Adam Litke
2009-11-19 15:19 ` [Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4) Avi Kivity
2009-11-19 15:56   ` Adam Litke
2009-11-19 16:02     ` Avi Kivity
2009-11-23 18:47       ` Anthony Liguori
2009-11-20  1:24     ` Jamie Lokier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.