linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC kernel] balloon: speed up inflating/deflating process
@ 2016-05-20  9:59 Liang Li
  2016-05-20 10:32 ` Cornelia Huck
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Liang Li @ 2016-05-20  9:59 UTC (permalink / raw)
  To: mst
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm, Liang Li

The implementation of the current virtio-balloon is not very efficient,
Bellow is test result of time spends on inflating the balloon to 3GB of
a 4GB idle guest:

a. allocating pages (6.5%, 103ms)
b. sending PFNs to host (68.3%, 787ms)
c. address translation (6.1%, 96ms)
d. madvise (19%, 300ms)

It takes about 1577ms for the whole inflating process to complete. The
test shows that the bottle neck is the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we can
reduce the overhead spends on stage b quite a lot. Furthermore, it's
possible to do the address translation and do the madvise with a bulk
of pages, instead of the current page per page way, so the overhead of
stage c and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to speed
up the inflating & deflating process by adding a new feature to the
virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
idle guest only takes 175ms, it's about 9 times as fast as before.

TODO: optimize stage a by allocating/freeing a chunk of pages instead
of a single page at a time.

Signed-off-by: Liang Li <liang.z.li@intel.com>
---
 drivers/virtio/virtio_balloon.c     | 199 ++++++++++++++++++++++++++++++++++--
 include/uapi/linux/virtio_balloon.h |   1 +
 mm/page_alloc.c                     |   6 ++
 3 files changed, 198 insertions(+), 8 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7b6d74f..5330b6f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -45,6 +45,8 @@ static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
+extern unsigned long get_max_pfn(void);
+
 struct virtio_balloon {
 	struct virtio_device *vdev;
 	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@ -62,6 +64,9 @@ struct virtio_balloon {
 
 	/* Number of balloon pages we've told the Host we're not using. */
 	unsigned int num_pages;
+	unsigned long *page_bitmap;
+	unsigned long start_pfn, end_pfn;
+	unsigned long bmap_len;
 	/*
 	 * The pages we've told the Host we're not using are enqueued
 	 * at vb_dev_info->pages list.
@@ -111,15 +116,66 @@ static void balloon_ack(struct virtqueue *vq)
 	wake_up(&vb->acked);
 }
 
+static int balloon_page_bitmap_init(struct virtio_balloon *vb)
+{
+	unsigned long max_pfn, bmap_bytes;
+
+	max_pfn = get_max_pfn();
+	bmap_bytes = ALIGN(max_pfn, BITS_PER_LONG) / BITS_PER_BYTE;
+	if (!vb->page_bitmap)
+		vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);
+	else {
+		if (bmap_bytes <= vb->bmap_len)
+			memset(vb->page_bitmap, 0, bmap_bytes);
+		else {
+			kfree(vb->page_bitmap);
+			vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);
+		}
+	}
+	if (!vb->page_bitmap) {
+		dev_err(&vb->vdev->dev, "%s failure: allocate page bitmap\n",
+			 __func__);
+		return -ENOMEM;
+	}
+	vb->bmap_len = bmap_bytes;
+	vb->start_pfn = max_pfn;
+	vb->end_pfn = 0;
+
+	return 0;
+}
+
 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 {
-	struct scatterlist sg;
 	unsigned int len;
 
-	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
+		u32 page_shift = PAGE_SHIFT;
+		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
+		struct scatterlist sg[5];
+
+		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
+		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
+		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG * sizeof(long);
+
+		sg_init_table(sg, 5);
+		sg_set_buf(&sg[0], &flags, sizeof(flags));
+		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
+		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
+		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
+		sg_set_buf(&sg[4], vb->page_bitmap +
+				 (start_pfn / BITS_PER_LONG), bmap_len);
+		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
+
+	} else {
+		struct scatterlist sg;
+
+		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+		/* We should always be able to add one buffer to an
+		* empty queue.
+		*/
+		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
+	}
 
-	/* We should always be able to add one buffer to an empty queue. */
-	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
 	virtqueue_kick(vq);
 
 	/* When host has read buffer, this completes via balloon_ack */
@@ -137,7 +193,21 @@ static void set_page_pfns(u32 pfns[], struct page *page)
 		pfns[i] = page_to_balloon_pfn(page) + i;
 }
 
-static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+static void set_page_bitmap(struct virtio_balloon *vb, struct page *page)
+{
+	unsigned int i;
+	unsigned long *bitmap = vb->page_bitmap;
+	unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
+		set_bit(balloon_pfn + i, bitmap);
+	if (balloon_pfn < vb->start_pfn)
+		vb->start_pfn = balloon_pfn;
+	if (balloon_pfn > vb->end_pfn)
+		vb->end_pfn = balloon_pfn;
+}
+
+static unsigned fill_balloon_pfns(struct virtio_balloon *vb, size_t num)
 {
 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 	unsigned num_allocated_pages;
@@ -174,7 +244,104 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
 	return num_allocated_pages;
 }
 
-static void release_pages_balloon(struct virtio_balloon *vb)
+static long fill_balloon_bitmap(struct virtio_balloon *vb, size_t num)
+{
+	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
+	long num_allocated_pages = 0;
+
+	if (balloon_page_bitmap_init(vb) < 0)
+		return num;
+
+	mutex_lock(&vb->balloon_lock);
+	for (vb->num_pfns = 0; vb->num_pfns < num;
+	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
+		struct page *page = balloon_page_enqueue(vb_dev_info);
+
+		if (!page) {
+			dev_info_ratelimited(&vb->vdev->dev,
+					     "Out of puff! Can't get %u pages\n",
+					     VIRTIO_BALLOON_PAGES_PER_PAGE);
+			/* Sleep for at least 1/5 of a second before retry. */
+			msleep(200);
+			break;
+		}
+		set_page_bitmap(vb, page);
+		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
+		if (!virtio_has_feature(vb->vdev,
+					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+			adjust_managed_page_count(page, -1);
+	}
+
+	num_allocated_pages = vb->num_pfns;
+	/* Did we get any? */
+	if (vb->num_pfns != 0)
+		tell_host(vb, vb->inflate_vq);
+	mutex_unlock(&vb->balloon_lock);
+
+	return num_allocated_pages;
+}
+
+static long fill_balloon(struct virtio_balloon *vb, size_t num)
+{
+	long num_allocated_pages;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP))
+		num_allocated_pages = fill_balloon_bitmap(vb, num);
+	else
+		num_allocated_pages = fill_balloon_pfns(vb, num);
+
+	return num_allocated_pages;
+}
+
+static void release_pages_balloon_bitmap(struct virtio_balloon *vb)
+{
+	unsigned long pfn, offset, size;
+	struct page *page;
+
+	size = min(vb->bmap_len * BITS_PER_BYTE, vb->end_pfn);
+	for (offset = vb->start_pfn; offset < size;
+		 offset = pfn + VIRTIO_BALLOON_PAGES_PER_PAGE) {
+		pfn = find_next_bit(vb->page_bitmap, size, offset);
+		if (pfn < size) {
+			page = balloon_pfn_to_page(pfn);
+			if (!virtio_has_feature(vb->vdev,
+					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+				adjust_managed_page_count(page, 1);
+			put_page(page);
+		}
+	}
+}
+
+static unsigned long leak_balloon_bitmap(struct virtio_balloon *vb, size_t num)
+{
+	unsigned long num_freed_pages = num;
+	struct page *page;
+	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
+
+	if (balloon_page_bitmap_init(vb) < 0)
+		return num_freed_pages;
+
+	mutex_lock(&vb->balloon_lock);
+	for (vb->num_pfns = 0; vb->num_pfns < num;
+	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
+		page = balloon_page_dequeue(vb_dev_info);
+		if (!page)
+			break;
+		set_page_bitmap(vb, page);
+		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
+	}
+
+	num_freed_pages = vb->num_pfns;
+
+	if (vb->num_pfns != 0)
+		tell_host(vb, vb->deflate_vq);
+	release_pages_balloon_bitmap(vb);
+	mutex_unlock(&vb->balloon_lock);
+
+	return num_freed_pages;
+}
+
+static void release_pages_balloon_pfns(struct virtio_balloon *vb)
 {
 	unsigned int i;
 
@@ -188,7 +355,7 @@ static void release_pages_balloon(struct virtio_balloon *vb)
 	}
 }
 
-static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
+static unsigned leak_balloon_pfns(struct virtio_balloon *vb, size_t num)
 {
 	unsigned num_freed_pages;
 	struct page *page;
@@ -215,11 +382,23 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 	 */
 	if (vb->num_pfns != 0)
 		tell_host(vb, vb->deflate_vq);
-	release_pages_balloon(vb);
+	release_pages_balloon_pfns(vb);
 	mutex_unlock(&vb->balloon_lock);
 	return num_freed_pages;
 }
 
+static long leak_balloon(struct virtio_balloon *vb, size_t num)
+{
+	long num_freed_pages;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP))
+		num_freed_pages = leak_balloon_bitmap(vb, num);
+	else
+		num_freed_pages = leak_balloon_pfns(vb, num);
+
+	return num_freed_pages;
+}
+
 static inline void update_stat(struct virtio_balloon *vb, int idx,
 			       u16 tag, u64 val)
 {
@@ -510,6 +689,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	spin_lock_init(&vb->stop_update_lock);
 	vb->stop_update = false;
 	vb->num_pages = 0;
+	vb->page_bitmap = NULL;
+	vb->bmap_len = 0;
 	mutex_init(&vb->balloon_lock);
 	init_waitqueue_head(&vb->acked);
 	vb->vdev = vdev;
@@ -567,6 +748,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	cancel_work_sync(&vb->update_balloon_stats_work);
 
 	remove_common(vb);
+	kfree(vb->page_bitmap);
 	kfree(vb);
 }
 
@@ -605,6 +787,7 @@ static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_BALLOON_F_PAGE_BITMAP,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 343d7dd..f78fa47 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,6 +34,7 @@
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info with bitmap */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c1069ef..74b2fc5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2139,6 +2139,12 @@ void drain_all_pages(struct zone *zone)
 								zone, 1);
 }
 
+unsigned long get_max_pfn(void)
+{
+	return max_pfn;
+}
+EXPORT_SYMBOL(get_max_pfn);
+
 #ifdef CONFIG_HIBERNATION
 
 void mark_free_pages(struct zone *zone)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-20  9:59 [PATCH RFC kernel] balloon: speed up inflating/deflating process Liang Li
@ 2016-05-20 10:32 ` Cornelia Huck
  2016-05-24  7:48   ` Li, Liang Z
  2016-05-20 11:19 ` Paolo Bonzini
  2016-05-20 12:00 ` Michael S. Tsirkin
  2 siblings, 1 reply; 23+ messages in thread
From: Cornelia Huck @ 2016-05-20 10:32 UTC (permalink / raw)
  To: Liang Li
  Cc: mst, kvm, qemu-devel, linux-kernel, virtualization, amit.shah,
	pbonzini, akpm, dgilbert

On Fri, 20 May 2016 17:59:46 +0800
Liang Li <liang.z.li@intel.com> wrote:

> The implementation of the current virtio-balloon is not very efficient,
> Bellow is test result of time spends on inflating the balloon to 3GB of
> a 4GB idle guest:
> 
> a. allocating pages (6.5%, 103ms)
> b. sending PFNs to host (68.3%, 787ms)
> c. address translation (6.1%, 96ms)
> d. madvise (19%, 300ms)
> 
> It takes about 1577ms for the whole inflating process to complete. The
> test shows that the bottle neck is the stage b and stage d.
> 
> If using a bitmap to send the page info instead of the PFNs, we can
> reduce the overhead spends on stage b quite a lot. Furthermore, it's
> possible to do the address translation and do the madvise with a bulk
> of pages, instead of the current page per page way, so the overhead of
> stage c and stage d can also be reduced a lot.
> 
> This patch is the kernel side implementation which is intended to speed
> up the inflating & deflating process by adding a new feature to the
> virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
> idle guest only takes 175ms, it's about 9 times as fast as before.
> 
> TODO: optimize stage a by allocating/freeing a chunk of pages instead
> of a single page at a time.

Not commenting on the approach, but...

> 
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> ---
>  drivers/virtio/virtio_balloon.c     | 199 ++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_balloon.h |   1 +
>  mm/page_alloc.c                     |   6 ++
>  3 files changed, 198 insertions(+), 8 deletions(-)
> 

>  static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>  {
> -	struct scatterlist sg;
>  	unsigned int len;
> 
> -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> +		u32 page_shift = PAGE_SHIFT;
> +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> +		struct scatterlist sg[5];
> +
> +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG * sizeof(long);
> +
> +		sg_init_table(sg, 5);
> +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> +		sg_set_buf(&sg[4], vb->page_bitmap +
> +				 (start_pfn / BITS_PER_LONG), bmap_len);
> +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> +

...you need to take care of the endianness of the data you put on the
queue, otherwise virtio-1 on big endian won't work. (There's just been
a patch for that problem.)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-20  9:59 [PATCH RFC kernel] balloon: speed up inflating/deflating process Liang Li
  2016-05-20 10:32 ` Cornelia Huck
@ 2016-05-20 11:19 ` Paolo Bonzini
  2016-05-24  7:51   ` Li, Liang Z
  2016-05-20 12:00 ` Michael S. Tsirkin
  2 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-05-20 11:19 UTC (permalink / raw)
  To: Liang Li, mst
  Cc: linux-kernel, qemu-devel, virtualization, akpm, dgilbert, amit.shah, kvm



On 20/05/2016 11:59, Liang Li wrote:
> +
> +		sg_init_table(sg, 5);
> +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));

These four should probably be placed in a single struct and therefore a
single sg entry.  It might even be faster to place it together with the
bitmap, thus avoiding the use of indirect descriptors.

You should also test ballooning of a 64GB guest after filling in the
page cache, not just ballooning of a freshly booted 4GB guest.  This
will give you a much more sparse bitmap.  Still, the improvement in
sending PFNs to the host are impressive.

Thanks,

Paolo

> +		sg_set_buf(&sg[4], vb->page_bitmap +
> +				 (start_pfn / BITS_PER_LONG), bmap_len);
> +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-20  9:59 [PATCH RFC kernel] balloon: speed up inflating/deflating process Liang Li
  2016-05-20 10:32 ` Cornelia Huck
  2016-05-20 11:19 ` Paolo Bonzini
@ 2016-05-20 12:00 ` Michael S. Tsirkin
  2016-05-24  9:51   ` Li, Liang Z
  2 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-20 12:00 UTC (permalink / raw)
  To: Liang Li
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Fri, May 20, 2016 at 05:59:46PM +0800, Liang Li wrote:
> The implementation of the current virtio-balloon is not very efficient,
> Bellow is test result of time spends on inflating the balloon to 3GB of
> a 4GB idle guest:
> 
> a. allocating pages (6.5%, 103ms)
> b. sending PFNs to host (68.3%, 787ms)
> c. address translation (6.1%, 96ms)
> d. madvise (19%, 300ms)
> 
> It takes about 1577ms for the whole inflating process to complete. The
> test shows that the bottle neck is the stage b and stage d.
> 
> If using a bitmap to send the page info instead of the PFNs, we can
> reduce the overhead spends on stage b quite a lot. Furthermore, it's
> possible to do the address translation and do the madvise with a bulk
> of pages, instead of the current page per page way, so the overhead of
> stage c and stage d can also be reduced a lot.
> 
> This patch is the kernel side implementation which is intended to speed
> up the inflating & deflating process by adding a new feature to the
> virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
> idle guest only takes 175ms, it's about 9 times as fast as before.
> 
> TODO: optimize stage a by allocating/freeing a chunk of pages instead
> of a single page at a time.
> 
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> ---
>  drivers/virtio/virtio_balloon.c     | 199 ++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_balloon.h |   1 +
>  mm/page_alloc.c                     |   6 ++
>  3 files changed, 198 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 7b6d74f..5330b6f 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -45,6 +45,8 @@ static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
>  module_param(oom_pages, int, S_IRUSR | S_IWUSR);
>  MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
>  
> +extern unsigned long get_max_pfn(void);
> +
>  struct virtio_balloon {
>  	struct virtio_device *vdev;
>  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> @@ -62,6 +64,9 @@ struct virtio_balloon {
>  
>  	/* Number of balloon pages we've told the Host we're not using. */
>  	unsigned int num_pages;
> +	unsigned long *page_bitmap;
> +	unsigned long start_pfn, end_pfn;
> +	unsigned long bmap_len;
>  	/*
>  	 * The pages we've told the Host we're not using are enqueued
>  	 * at vb_dev_info->pages list.
> @@ -111,15 +116,66 @@ static void balloon_ack(struct virtqueue *vq)
>  	wake_up(&vb->acked);
>  }
>  
> +static int balloon_page_bitmap_init(struct virtio_balloon *vb)
> +{
> +	unsigned long max_pfn, bmap_bytes;
> +
> +	max_pfn = get_max_pfn();

This is racy. max_pfn could be increased by memory hotplug
after you got it.


> +	bmap_bytes = ALIGN(max_pfn, BITS_PER_LONG) / BITS_PER_BYTE;
> +	if (!vb->page_bitmap)
> +		vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);

Likely to fail for a huge busy guest.
Why not init on device probe?
this way
	- probe will fail, or we can clear the feature bit
	- free memory is more likely to be available


> +	else {
> +		if (bmap_bytes <= vb->bmap_len)
> +			memset(vb->page_bitmap, 0, bmap_bytes);
> +		else {
> +			kfree(vb->page_bitmap);
> +			vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);
> +		}
> +	}
> +	if (!vb->page_bitmap) {
> +		dev_err(&vb->vdev->dev, "%s failure: allocate page bitmap\n",
> +			 __func__);
> +		return -ENOMEM;
> +	}
> +	vb->bmap_len = bmap_bytes;
> +	vb->start_pfn = max_pfn;
> +	vb->end_pfn = 0;
> +
> +	return 0;
> +}
> +

>  {
> -	struct scatterlist sg;
>  	unsigned int len;
>  
> -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> +		u32 page_shift = PAGE_SHIFT;
> +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> +		struct scatterlist sg[5];
> +
> +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG * sizeof(long);
> +
> +		sg_init_table(sg, 5);
> +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> +		sg_set_buf(&sg[4], vb->page_bitmap +
> +				 (start_pfn / BITS_PER_LONG), bmap_len);

This can be pre-initialized, correct?

> +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> +
> +	} else {
> +		struct scatterlist sg;
> +
> +		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> +		/* We should always be able to add one buffer to an
> +		* empty queue.
> +		*/
> +		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> +	}
>  
> -	/* We should always be able to add one buffer to an empty queue. */
> -	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
>  	virtqueue_kick(vq);
>  
>  	/* When host has read buffer, this completes via balloon_ack */
> @@ -137,7 +193,21 @@ static void set_page_pfns(u32 pfns[], struct page *page)
>  		pfns[i] = page_to_balloon_pfn(page) + i;
>  }
>  
> -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> +static void set_page_bitmap(struct virtio_balloon *vb, struct page *page)
> +{
> +	unsigned int i;
> +	unsigned long *bitmap = vb->page_bitmap;
> +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> +		set_bit(balloon_pfn + i, bitmap);
> +	if (balloon_pfn < vb->start_pfn)
> +		vb->start_pfn = balloon_pfn;
> +	if (balloon_pfn > vb->end_pfn)
> +		vb->end_pfn = balloon_pfn;
> +}
> +
> +static unsigned fill_balloon_pfns(struct virtio_balloon *vb, size_t num)
>  {
>  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
>  	unsigned num_allocated_pages;
> @@ -174,7 +244,104 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
>  	return num_allocated_pages;
>  }
>  
> -static void release_pages_balloon(struct virtio_balloon *vb)
> +static long fill_balloon_bitmap(struct virtio_balloon *vb, size_t num)
> +{
> +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> +	long num_allocated_pages = 0;
> +
> +	if (balloon_page_bitmap_init(vb) < 0)
> +		return num;
> +
> +	mutex_lock(&vb->balloon_lock);
> +	for (vb->num_pfns = 0; vb->num_pfns < num;
> +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> +		struct page *page = balloon_page_enqueue(vb_dev_info);
> +
> +		if (!page) {
> +			dev_info_ratelimited(&vb->vdev->dev,
> +					     "Out of puff! Can't get %u pages\n",
> +					     VIRTIO_BALLOON_PAGES_PER_PAGE);
> +			/* Sleep for at least 1/5 of a second before retry. */
> +			msleep(200);
> +			break;
> +		}
> +		set_page_bitmap(vb, page);
> +		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> +		if (!virtio_has_feature(vb->vdev,
> +					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> +			adjust_managed_page_count(page, -1);
> +	}

This is grossly inefficient if you only requested a single page.
And it's also allocating memory very aggressively without ever telling
the host what is going on.
> +
> +	num_allocated_pages = vb->num_pfns;
> +	/* Did we get any? */
> +	if (vb->num_pfns != 0)
> +		tell_host(vb, vb->inflate_vq);
> +	mutex_unlock(&vb->balloon_lock);
> +
> +	return num_allocated_pages;
> +}
> +
> +static long fill_balloon(struct virtio_balloon *vb, size_t num)
> +{
> +	long num_allocated_pages;
> +
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP))
> +		num_allocated_pages = fill_balloon_bitmap(vb, num);
> +	else
> +		num_allocated_pages = fill_balloon_pfns(vb, num);
> +
> +	return num_allocated_pages;
> +}
> +
> +static void release_pages_balloon_bitmap(struct virtio_balloon *vb)
> +{
> +	unsigned long pfn, offset, size;
> +	struct page *page;
> +
> +	size = min(vb->bmap_len * BITS_PER_BYTE, vb->end_pfn);
> +	for (offset = vb->start_pfn; offset < size;
> +		 offset = pfn + VIRTIO_BALLOON_PAGES_PER_PAGE) {
> +		pfn = find_next_bit(vb->page_bitmap, size, offset);
> +		if (pfn < size) {
> +			page = balloon_pfn_to_page(pfn);
> +			if (!virtio_has_feature(vb->vdev,
> +					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> +				adjust_managed_page_count(page, 1);
> +			put_page(page);
> +		}
> +	}
> +}
> +
> +static unsigned long leak_balloon_bitmap(struct virtio_balloon *vb, size_t num)
> +{
> +	unsigned long num_freed_pages = num;
> +	struct page *page;
> +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> +
> +	if (balloon_page_bitmap_init(vb) < 0)
> +		return num_freed_pages;
> +
> +	mutex_lock(&vb->balloon_lock);
> +	for (vb->num_pfns = 0; vb->num_pfns < num;
> +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> +		page = balloon_page_dequeue(vb_dev_info);
> +		if (!page)
> +			break;
> +		set_page_bitmap(vb, page);
> +		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> +	}
> +
> +	num_freed_pages = vb->num_pfns;
> +
> +	if (vb->num_pfns != 0)
> +		tell_host(vb, vb->deflate_vq);
> +	release_pages_balloon_bitmap(vb);
> +	mutex_unlock(&vb->balloon_lock);
> +
> +	return num_freed_pages;
> +}
> +
> +static void release_pages_balloon_pfns(struct virtio_balloon *vb)
>  {
>  	unsigned int i;
>  
> @@ -188,7 +355,7 @@ static void release_pages_balloon(struct virtio_balloon *vb)
>  	}
>  }
>  
> -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> +static unsigned leak_balloon_pfns(struct virtio_balloon *vb, size_t num)
>  {
>  	unsigned num_freed_pages;
>  	struct page *page;
> @@ -215,11 +382,23 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
>  	 */
>  	if (vb->num_pfns != 0)
>  		tell_host(vb, vb->deflate_vq);
> -	release_pages_balloon(vb);
> +	release_pages_balloon_pfns(vb);
>  	mutex_unlock(&vb->balloon_lock);
>  	return num_freed_pages;
>  }
>  
> +static long leak_balloon(struct virtio_balloon *vb, size_t num)
> +{
> +	long num_freed_pages;
> +
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP))
> +		num_freed_pages = leak_balloon_bitmap(vb, num);
> +	else
> +		num_freed_pages = leak_balloon_pfns(vb, num);
> +
> +	return num_freed_pages;
> +}
> +
>  static inline void update_stat(struct virtio_balloon *vb, int idx,
>  			       u16 tag, u64 val)
>  {
> @@ -510,6 +689,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	spin_lock_init(&vb->stop_update_lock);
>  	vb->stop_update = false;
>  	vb->num_pages = 0;
> +	vb->page_bitmap = NULL;
> +	vb->bmap_len = 0;
>  	mutex_init(&vb->balloon_lock);
>  	init_waitqueue_head(&vb->acked);
>  	vb->vdev = vdev;
> @@ -567,6 +748,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  	cancel_work_sync(&vb->update_balloon_stats_work);
>  
>  	remove_common(vb);
> +	kfree(vb->page_bitmap);
>  	kfree(vb);
>  }
>  
> @@ -605,6 +787,7 @@ static unsigned int features[] = {
>  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
>  	VIRTIO_BALLOON_F_STATS_VQ,
>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> +	VIRTIO_BALLOON_F_PAGE_BITMAP,
>  };
>  
>  static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 343d7dd..f78fa47 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -34,6 +34,7 @@
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> +#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info with bitmap */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c1069ef..74b2fc5 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2139,6 +2139,12 @@ void drain_all_pages(struct zone *zone)
>  								zone, 1);
>  }
>  
> +unsigned long get_max_pfn(void)
> +{
> +	return max_pfn;
> +}
> +EXPORT_SYMBOL(get_max_pfn);
> +
>  #ifdef CONFIG_HIBERNATION
>  
>  void mark_free_pages(struct zone *zone)

Suggestion to address all above comments:
	1. allocate a bunch of pages and link them up,
	   calculating the min and the max pfn.
	   if max-min exceeds the allocated bitmap size,
	   tell host.
	2. limit allocated bitmap size to something reasonable.
	   How about 32Kbytes? This is 256kilo bit in the map, which comes
	   out to 1Giga bytes of memory in the balloon.
> -- 
> 1.9.1

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-20 10:32 ` Cornelia Huck
@ 2016-05-24  7:48   ` Li, Liang Z
  0 siblings, 0 replies; 23+ messages in thread
From: Li, Liang Z @ 2016-05-24  7:48 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: mst, kvm, qemu-devel, linux-kernel, virtualization, amit.shah,
	pbonzini, akpm, dgilbert

> On Fri, 20 May 2016 17:59:46 +0800
> Liang Li <liang.z.li@intel.com> wrote:
> 
> > The implementation of the current virtio-balloon is not very
> > efficient, Bellow is test result of time spends on inflating the
> > balloon to 3GB of a 4GB idle guest:
> >
> > a. allocating pages (6.5%, 103ms)
> > b. sending PFNs to host (68.3%, 787ms) c. address translation (6.1%,
> > 96ms) d. madvise (19%, 300ms)
> >
> > It takes about 1577ms for the whole inflating process to complete. The
> > test shows that the bottle neck is the stage b and stage d.
> >
> > If using a bitmap to send the page info instead of the PFNs, we can
> > reduce the overhead spends on stage b quite a lot. Furthermore, it's
> > possible to do the address translation and do the madvise with a bulk
> > of pages, instead of the current page per page way, so the overhead of
> > stage c and stage d can also be reduced a lot.
> >
> > This patch is the kernel side implementation which is intended to
> > speed up the inflating & deflating process by adding a new feature to
> > the virtio-balloon device. And now, inflating the balloon to 3GB of a
> > 4GB idle guest only takes 175ms, it's about 9 times as fast as before.
> >
> > TODO: optimize stage a by allocating/freeing a chunk of pages instead
> > of a single page at a time.
> 
> Not commenting on the approach, but...
> 
> >
> > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > ---
> >  drivers/virtio/virtio_balloon.c     | 199
> ++++++++++++++++++++++++++++++++++--
> >  include/uapi/linux/virtio_balloon.h |   1 +
> >  mm/page_alloc.c                     |   6 ++
> >  3 files changed, 198 insertions(+), 8 deletions(-)
> >
> 
> >  static void tell_host(struct virtio_balloon *vb, struct virtqueue
> > *vq)  {
> > -	struct scatterlist sg;
> >  	unsigned int len;
> >
> > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > +	if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > +		u32 page_shift = PAGE_SHIFT;
> > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > +		struct scatterlist sg[5];
> > +
> > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> sizeof(long);
> > +
> > +		sg_init_table(sg, 5);
> > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> > +
> 
> ...you need to take care of the endianness of the data you put on the queue,
> otherwise virtio-1 on big endian won't work. (There's just been a patch for
> that problem.)

OK, thanks for your reminding.

Liang

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-20 11:19 ` Paolo Bonzini
@ 2016-05-24  7:51   ` Li, Liang Z
  0 siblings, 0 replies; 23+ messages in thread
From: Li, Liang Z @ 2016-05-24  7:51 UTC (permalink / raw)
  To: Paolo Bonzini, mst
  Cc: linux-kernel, qemu-devel, virtualization, akpm, dgilbert, amit.shah, kvm

> On 20/05/2016 11:59, Liang Li wrote:
> > +
> > +		sg_init_table(sg, 5);
> > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> 
> These four should probably be placed in a single struct and therefore a single
> sg entry.  It might even be faster to place it together with the bitmap, thus
> avoiding the use of indirect descriptors.
> 

Yes, thanks for your suggestion.

> You should also test ballooning of a 64GB guest after filling in the page cache,
> not just ballooning of a freshly booted 4GB guest.  This will give you a much
> more sparse bitmap.  Still, the improvement in sending PFNs to the host are

I will include the test result for that case in next version.


Thanks,

Liang

> impressive.
> 
> Thanks,
> 
> Paolo
> 
> > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-20 12:00 ` Michael S. Tsirkin
@ 2016-05-24  9:51   ` Li, Liang Z
  2016-05-24  9:55     ` Li, Liang Z
  2016-05-24 10:08     ` Michael S. Tsirkin
  0 siblings, 2 replies; 23+ messages in thread
From: Li, Liang Z @ 2016-05-24  9:51 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> On Fri, May 20, 2016 at 05:59:46PM +0800, Liang Li wrote:
> > The implementation of the current virtio-balloon is not very
> > efficient, Bellow is test result of time spends on inflating the
> > balloon to 3GB of a 4GB idle guest:
> >
> > a. allocating pages (6.5%, 103ms)
> > b. sending PFNs to host (68.3%, 787ms) c. address translation (6.1%,
> > 96ms) d. madvise (19%, 300ms)
> >
> > It takes about 1577ms for the whole inflating process to complete. The
> > test shows that the bottle neck is the stage b and stage d.
> >
> > If using a bitmap to send the page info instead of the PFNs, we can
> > reduce the overhead spends on stage b quite a lot. Furthermore, it's
> > possible to do the address translation and do the madvise with a bulk
> > of pages, instead of the current page per page way, so the overhead of
> > stage c and stage d can also be reduced a lot.
> >
> > This patch is the kernel side implementation which is intended to
> > speed up the inflating & deflating process by adding a new feature to
> > the virtio-balloon device. And now, inflating the balloon to 3GB of a
> > 4GB idle guest only takes 175ms, it's about 9 times as fast as before.
> >
> > TODO: optimize stage a by allocating/freeing a chunk of pages instead
> > of a single page at a time.
> >
> > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > ---
> >  drivers/virtio/virtio_balloon.c     | 199
> ++++++++++++++++++++++++++++++++++--
> >  include/uapi/linux/virtio_balloon.h |   1 +
> >  mm/page_alloc.c                     |   6 ++
> >  3 files changed, 198 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_balloon.c
> > b/drivers/virtio/virtio_balloon.c index 7b6d74f..5330b6f 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -45,6 +45,8 @@ static int oom_pages =
> OOM_VBALLOON_DEFAULT_PAGES;
> > module_param(oom_pages, int, S_IRUSR | S_IWUSR);
> > MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
> >
> > +extern unsigned long get_max_pfn(void);
> > +
> >  struct virtio_balloon {
> >  	struct virtio_device *vdev;
> >  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; @@ -62,6 +64,9
> > @@ struct virtio_balloon {
> >
> >  	/* Number of balloon pages we've told the Host we're not using. */
> >  	unsigned int num_pages;
> > +	unsigned long *page_bitmap;
> > +	unsigned long start_pfn, end_pfn;
> > +	unsigned long bmap_len;
> >  	/*
> >  	 * The pages we've told the Host we're not using are enqueued
> >  	 * at vb_dev_info->pages list.
> > @@ -111,15 +116,66 @@ static void balloon_ack(struct virtqueue *vq)
> >  	wake_up(&vb->acked);
> >  }
> >
> > +static int balloon_page_bitmap_init(struct virtio_balloon *vb) {
> > +	unsigned long max_pfn, bmap_bytes;
> > +
> > +	max_pfn = get_max_pfn();
> 
> This is racy. max_pfn could be increased by memory hotplug after you got it.
> 
> 
> > +	bmap_bytes = ALIGN(max_pfn, BITS_PER_LONG) / BITS_PER_BYTE;
> > +	if (!vb->page_bitmap)
> > +		vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);
> 
> Likely to fail for a huge busy guest.
> Why not init on device probe?
> this way
> 	- probe will fail, or we can clear the feature bit
> 	- free memory is more likely to be available
> 

Very good suggestion!

> 
> > +	else {
> > +		if (bmap_bytes <= vb->bmap_len)
> > +			memset(vb->page_bitmap, 0, bmap_bytes);
> > +		else {
> > +			kfree(vb->page_bitmap);
> > +			vb->page_bitmap = kzalloc(bmap_bytes,
> GFP_KERNEL);
> > +		}
> > +	}
> > +	if (!vb->page_bitmap) {
> > +		dev_err(&vb->vdev->dev, "%s failure: allocate page
> bitmap\n",
> > +			 __func__);
> > +		return -ENOMEM;
> > +	}
> > +	vb->bmap_len = bmap_bytes;
> > +	vb->start_pfn = max_pfn;
> > +	vb->end_pfn = 0;
> > +
> > +	return 0;
> > +}
> > +
> 
> >  {
> > -	struct scatterlist sg;
> >  	unsigned int len;
> >
> > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > +	if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > +		u32 page_shift = PAGE_SHIFT;
> > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > +		struct scatterlist sg[5];
> > +
> > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> sizeof(long);
> > +
> > +		sg_init_table(sg, 5);
> > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> 
> This can be pre-initialized, correct?

pre-initialized? I am not quite understand your mean.

> 
> > +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> > +
> > +	} else {
> > +		struct scatterlist sg;
> > +
> > +		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb-
> >num_pfns);
> > +		/* We should always be able to add one buffer to an
> > +		* empty queue.
> > +		*/
> > +		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> > +	}
> >
> > -	/* We should always be able to add one buffer to an empty queue.
> */
> > -	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> >  	virtqueue_kick(vq);
> >
> >  	/* When host has read buffer, this completes via balloon_ack */ @@
> > -137,7 +193,21 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> >  		pfns[i] = page_to_balloon_pfn(page) + i;  }
> >
> > -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> > +static void set_page_bitmap(struct virtio_balloon *vb, struct page
> > +*page) {
> > +	unsigned int i;
> > +	unsigned long *bitmap = vb->page_bitmap;
> > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > +
> > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > +		set_bit(balloon_pfn + i, bitmap);
> > +	if (balloon_pfn < vb->start_pfn)
> > +		vb->start_pfn = balloon_pfn;
> > +	if (balloon_pfn > vb->end_pfn)
> > +		vb->end_pfn = balloon_pfn;
> > +}
> > +
> > +static unsigned fill_balloon_pfns(struct virtio_balloon *vb, size_t
> > +num)
> >  {
> >  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> >  	unsigned num_allocated_pages;
> > @@ -174,7 +244,104 @@ static unsigned fill_balloon(struct virtio_balloon
> *vb, size_t num)
> >  	return num_allocated_pages;
> >  }
> >
> > -static void release_pages_balloon(struct virtio_balloon *vb)
> > +static long fill_balloon_bitmap(struct virtio_balloon *vb, size_t
> > +num) {
> > +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > +	long num_allocated_pages = 0;
> > +
> > +	if (balloon_page_bitmap_init(vb) < 0)
> > +		return num;
> > +
> > +	mutex_lock(&vb->balloon_lock);
> > +	for (vb->num_pfns = 0; vb->num_pfns < num;
> > +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > +		struct page *page = balloon_page_enqueue(vb_dev_info);
> > +
> > +		if (!page) {
> > +			dev_info_ratelimited(&vb->vdev->dev,
> > +					     "Out of puff! Can't get %u
> pages\n",
> > +
> VIRTIO_BALLOON_PAGES_PER_PAGE);
> > +			/* Sleep for at least 1/5 of a second before retry. */
> > +			msleep(200);
> > +			break;
> > +		}
> > +		set_page_bitmap(vb, page);
> > +		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> > +		if (!virtio_has_feature(vb->vdev,
> > +
> 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> > +			adjust_managed_page_count(page, -1);
> > +	}
> 
> This is grossly inefficient if you only requested a single page.
> And it's also allocating memory very aggressively without ever telling the host
> what is going on.

If only requested a single page, there is no need  to send the entire page bitmap,
This RFC patch has already considered about this. But it can works very well if requesting
several pages  which across a large range.

> > +
> > +	num_allocated_pages = vb->num_pfns;
> > +	/* Did we get any? */
> > +	if (vb->num_pfns != 0)
> > +		tell_host(vb, vb->inflate_vq);
> > +	mutex_unlock(&vb->balloon_lock);
> > +
> > +	return num_allocated_pages;
> > +}
> > +
> > +static long fill_balloon(struct virtio_balloon *vb, size_t num) {
> > +	long num_allocated_pages;
> > +
> > +	if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP))
> > +		num_allocated_pages = fill_balloon_bitmap(vb, num);
> > +	else
> > +		num_allocated_pages = fill_balloon_pfns(vb, num);
> > +
> > +	return num_allocated_pages;
> > +}
> > +
> > +static void release_pages_balloon_bitmap(struct virtio_balloon *vb) {
> > +	unsigned long pfn, offset, size;
> > +	struct page *page;
> > +
> > +	size = min(vb->bmap_len * BITS_PER_BYTE, vb->end_pfn);
> > +	for (offset = vb->start_pfn; offset < size;
> > +		 offset = pfn + VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > +		pfn = find_next_bit(vb->page_bitmap, size, offset);
> > +		if (pfn < size) {
> > +			page = balloon_pfn_to_page(pfn);
> > +			if (!virtio_has_feature(vb->vdev,
> > +
> 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> > +				adjust_managed_page_count(page, 1);
> > +			put_page(page);
> > +		}
> > +	}
> > +}
> > +
> > +static unsigned long leak_balloon_bitmap(struct virtio_balloon *vb,
> > +size_t num) {
> > +	unsigned long num_freed_pages = num;
> > +	struct page *page;
> > +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > +
> > +	if (balloon_page_bitmap_init(vb) < 0)
> > +		return num_freed_pages;
> > +
> > +	mutex_lock(&vb->balloon_lock);
> > +	for (vb->num_pfns = 0; vb->num_pfns < num;
> > +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > +		page = balloon_page_dequeue(vb_dev_info);
> > +		if (!page)
> > +			break;
> > +		set_page_bitmap(vb, page);
> > +		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> > +	}
> > +
> > +	num_freed_pages = vb->num_pfns;
> > +
> > +	if (vb->num_pfns != 0)
> > +		tell_host(vb, vb->deflate_vq);
> > +	release_pages_balloon_bitmap(vb);
> > +	mutex_unlock(&vb->balloon_lock);
> > +
> > +	return num_freed_pages;
> > +}
> > +
> > +static void release_pages_balloon_pfns(struct virtio_balloon *vb)
> >  {
> >  	unsigned int i;
> >
> > @@ -188,7 +355,7 @@ static void release_pages_balloon(struct
> virtio_balloon *vb)
> >  	}
> >  }
> >
> > -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> > +static unsigned leak_balloon_pfns(struct virtio_balloon *vb, size_t
> > +num)
> >  {
> >  	unsigned num_freed_pages;
> >  	struct page *page;
> > @@ -215,11 +382,23 @@ static unsigned leak_balloon(struct virtio_balloon
> *vb, size_t num)
> >  	 */
> >  	if (vb->num_pfns != 0)
> >  		tell_host(vb, vb->deflate_vq);
> > -	release_pages_balloon(vb);
> > +	release_pages_balloon_pfns(vb);
> >  	mutex_unlock(&vb->balloon_lock);
> >  	return num_freed_pages;
> >  }
> >
> > +static long leak_balloon(struct virtio_balloon *vb, size_t num) {
> > +	long num_freed_pages;
> > +
> > +	if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP))
> > +		num_freed_pages = leak_balloon_bitmap(vb, num);
> > +	else
> > +		num_freed_pages = leak_balloon_pfns(vb, num);
> > +
> > +	return num_freed_pages;
> > +}
> > +
> >  static inline void update_stat(struct virtio_balloon *vb, int idx,
> >  			       u16 tag, u64 val)
> >  {
> > @@ -510,6 +689,8 @@ static int virtballoon_probe(struct virtio_device
> *vdev)
> >  	spin_lock_init(&vb->stop_update_lock);
> >  	vb->stop_update = false;
> >  	vb->num_pages = 0;
> > +	vb->page_bitmap = NULL;
> > +	vb->bmap_len = 0;
> >  	mutex_init(&vb->balloon_lock);
> >  	init_waitqueue_head(&vb->acked);
> >  	vb->vdev = vdev;
> > @@ -567,6 +748,7 @@ static void virtballoon_remove(struct virtio_device
> *vdev)
> >  	cancel_work_sync(&vb->update_balloon_stats_work);
> >
> >  	remove_common(vb);
> > +	kfree(vb->page_bitmap);
> >  	kfree(vb);
> >  }
> >
> > @@ -605,6 +787,7 @@ static unsigned int features[] = {
> >  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
> >  	VIRTIO_BALLOON_F_STATS_VQ,
> >  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> > +	VIRTIO_BALLOON_F_PAGE_BITMAP,
> >  };
> >
> >  static struct virtio_driver virtio_balloon_driver = { diff --git
> > a/include/uapi/linux/virtio_balloon.h
> > b/include/uapi/linux/virtio_balloon.h
> > index 343d7dd..f78fa47 100644
> > --- a/include/uapi/linux/virtio_balloon.h
> > +++ b/include/uapi/linux/virtio_balloon.h
> > @@ -34,6 +34,7 @@
> >  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before
> reclaiming pages */
> >  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue
> */
> >  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon
> on OOM */
> > +#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info
> with bitmap */
> >
> >  /* Size of a PFN in the balloon interface. */  #define
> > VIRTIO_BALLOON_PFN_SHIFT 12 diff --git a/mm/page_alloc.c
> > b/mm/page_alloc.c index c1069ef..74b2fc5 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -2139,6 +2139,12 @@ void drain_all_pages(struct zone *zone)
> >  								zone, 1);
> >  }
> >
> > +unsigned long get_max_pfn(void)
> > +{
> > +	return max_pfn;
> > +}
> > +EXPORT_SYMBOL(get_max_pfn);
> > +
> >  #ifdef CONFIG_HIBERNATION
> >
> >  void mark_free_pages(struct zone *zone)
> 
> Suggestion to address all above comments:
> 	1. allocate a bunch of pages and link them up,
> 	   calculating the min and the max pfn.
> 	   if max-min exceeds the allocated bitmap size,
> 	   tell host.

I am not sure if it works well in some cases, e.g. The allocated pages 
are across a wide range and the max-min > limit is very frequently to be true.
Then, there will be many times of virtio transmission and it's bad for performance
improvement. Right?


> 	2. limit allocated bitmap size to something reasonable.
> 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> 	   out to 1Giga bytes of memory in the balloon.

So, even the VM has 1TB of RAM, the page bitmap will take 32MB of memory.
Maybe it's better to use a big page bitmap the save the pages allocated by balloon,
and split the big page bitmap to 32K bytes unit, then transfer one unit at a time.


Should we use a page bitmap to replace 'vb->pages' ?

How about rolling back to use PFNs if the count of requested pages is a small number?

Liang
> > --
> > 1.9.1
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in the body of
> a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24  9:51   ` Li, Liang Z
@ 2016-05-24  9:55     ` Li, Liang Z
  2016-05-24 10:08     ` Michael S. Tsirkin
  1 sibling, 0 replies; 23+ messages in thread
From: Li, Liang Z @ 2016-05-24  9:55 UTC (permalink / raw)
  To: Li, Liang Z, Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > On Fri, May 20, 2016 at 05:59:46PM +0800, Liang Li wrote:
> > > The implementation of the current virtio-balloon is not very
> > > efficient, Bellow is test result of time spends on inflating the
> > > balloon to 3GB of a 4GB idle guest:
> > >
> > > a. allocating pages (6.5%, 103ms)
> > > b. sending PFNs to host (68.3%, 787ms) c. address translation (6.1%,
> > > 96ms) d. madvise (19%, 300ms)
> > >
> > > It takes about 1577ms for the whole inflating process to complete.
> > > The test shows that the bottle neck is the stage b and stage d.
> > >
> > > If using a bitmap to send the page info instead of the PFNs, we can
> > > reduce the overhead spends on stage b quite a lot. Furthermore, it's
> > > possible to do the address translation and do the madvise with a
> > > bulk of pages, instead of the current page per page way, so the
> > > overhead of stage c and stage d can also be reduced a lot.
> > >
> > > This patch is the kernel side implementation which is intended to
> > > speed up the inflating & deflating process by adding a new feature
> > > to the virtio-balloon device. And now, inflating the balloon to 3GB
> > > of a 4GB idle guest only takes 175ms, it's about 9 times as fast as before.
> > >
> > > TODO: optimize stage a by allocating/freeing a chunk of pages
> > > instead of a single page at a time.
> > >
> > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > ---
> > >  drivers/virtio/virtio_balloon.c     | 199
> > ++++++++++++++++++++++++++++++++++--
> > >  include/uapi/linux/virtio_balloon.h |   1 +
> > >  mm/page_alloc.c                     |   6 ++
> > >  3 files changed, 198 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_balloon.c
> > > b/drivers/virtio/virtio_balloon.c index 7b6d74f..5330b6f 100644
> > > --- a/drivers/virtio/virtio_balloon.c
> > > +++ b/drivers/virtio/virtio_balloon.c
> > > @@ -45,6 +45,8 @@ static int oom_pages =
> > OOM_VBALLOON_DEFAULT_PAGES;
> > > module_param(oom_pages, int, S_IRUSR | S_IWUSR);
> > > MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
> > >
> > > +extern unsigned long get_max_pfn(void);
> > > +
> > >  struct virtio_balloon {
> > >  	struct virtio_device *vdev;
> > >  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; @@ -62,6
> > > +64,9 @@ struct virtio_balloon {
> > >
> > >  	/* Number of balloon pages we've told the Host we're not using. */
> > >  	unsigned int num_pages;
> > > +	unsigned long *page_bitmap;
> > > +	unsigned long start_pfn, end_pfn;
> > > +	unsigned long bmap_len;
> > >  	/*
> > >  	 * The pages we've told the Host we're not using are enqueued
> > >  	 * at vb_dev_info->pages list.
> > > @@ -111,15 +116,66 @@ static void balloon_ack(struct virtqueue *vq)
> > >  	wake_up(&vb->acked);
> > >  }
> > >
> > > +static int balloon_page_bitmap_init(struct virtio_balloon *vb) {
> > > +	unsigned long max_pfn, bmap_bytes;
> > > +
> > > +	max_pfn = get_max_pfn();
> >
> > This is racy. max_pfn could be increased by memory hotplug after you got it.
> >
> >
> > > +	bmap_bytes = ALIGN(max_pfn, BITS_PER_LONG) / BITS_PER_BYTE;
> > > +	if (!vb->page_bitmap)
> > > +		vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);
> >
> > Likely to fail for a huge busy guest.
> > Why not init on device probe?
> > this way
> > 	- probe will fail, or we can clear the feature bit
> > 	- free memory is more likely to be available
> >
> 
> Very good suggestion!
> 
> >
> > > +	else {
> > > +		if (bmap_bytes <= vb->bmap_len)
> > > +			memset(vb->page_bitmap, 0, bmap_bytes);
> > > +		else {
> > > +			kfree(vb->page_bitmap);
> > > +			vb->page_bitmap = kzalloc(bmap_bytes,
> > GFP_KERNEL);
> > > +		}
> > > +	}
> > > +	if (!vb->page_bitmap) {
> > > +		dev_err(&vb->vdev->dev, "%s failure: allocate page
> > bitmap\n",
> > > +			 __func__);
> > > +		return -ENOMEM;
> > > +	}
> > > +	vb->bmap_len = bmap_bytes;
> > > +	vb->start_pfn = max_pfn;
> > > +	vb->end_pfn = 0;
> > > +
> > > +	return 0;
> > > +}
> > > +
> >
> > >  {
> > > -	struct scatterlist sg;
> > >  	unsigned int len;
> > >
> > > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > > +	if (virtio_has_feature(vb->vdev,
> > VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > > +		u32 page_shift = PAGE_SHIFT;
> > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > +		struct scatterlist sg[5];
> > > +
> > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > sizeof(long);
> > > +
> > > +		sg_init_table(sg, 5);
> > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> >
> > This can be pre-initialized, correct?
> 
> pre-initialized? I am not quite understand your mean.
> 
> >
> > > +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> > > +
> > > +	} else {
> > > +		struct scatterlist sg;
> > > +
> > > +		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb-
> > >num_pfns);
> > > +		/* We should always be able to add one buffer to an
> > > +		* empty queue.
> > > +		*/
> > > +		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> > > +	}
> > >
> > > -	/* We should always be able to add one buffer to an empty queue.
> > */
> > > -	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> > >  	virtqueue_kick(vq);
> > >
> > >  	/* When host has read buffer, this completes via balloon_ack */ @@
> > > -137,7 +193,21 @@ static void set_page_pfns(u32 pfns[], struct page
> *page)
> > >  		pfns[i] = page_to_balloon_pfn(page) + i;  }
> > >
> > > -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> > > +static void set_page_bitmap(struct virtio_balloon *vb, struct page
> > > +*page) {
> > > +	unsigned int i;
> > > +	unsigned long *bitmap = vb->page_bitmap;
> > > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > > +
> > > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > > +		set_bit(balloon_pfn + i, bitmap);
> > > +	if (balloon_pfn < vb->start_pfn)
> > > +		vb->start_pfn = balloon_pfn;
> > > +	if (balloon_pfn > vb->end_pfn)
> > > +		vb->end_pfn = balloon_pfn;
> > > +}
> > > +
> > > +static unsigned fill_balloon_pfns(struct virtio_balloon *vb, size_t
> > > +num)
> > >  {
> > >  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > >  	unsigned num_allocated_pages;
> > > @@ -174,7 +244,104 @@ static unsigned fill_balloon(struct
> > > virtio_balloon
> > *vb, size_t num)
> > >  	return num_allocated_pages;
> > >  }
> > >
> > > -static void release_pages_balloon(struct virtio_balloon *vb)
> > > +static long fill_balloon_bitmap(struct virtio_balloon *vb, size_t
> > > +num) {
> > > +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > > +	long num_allocated_pages = 0;
> > > +
> > > +	if (balloon_page_bitmap_init(vb) < 0)
> > > +		return num;
> > > +
> > > +	mutex_lock(&vb->balloon_lock);
> > > +	for (vb->num_pfns = 0; vb->num_pfns < num;
> > > +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > > +		struct page *page = balloon_page_enqueue(vb_dev_info);
> > > +
> > > +		if (!page) {
> > > +			dev_info_ratelimited(&vb->vdev->dev,
> > > +					     "Out of puff! Can't get %u
> > pages\n",
> > > +
> > VIRTIO_BALLOON_PAGES_PER_PAGE);
> > > +			/* Sleep for at least 1/5 of a second before retry. */
> > > +			msleep(200);
> > > +			break;
> > > +		}
> > > +		set_page_bitmap(vb, page);
> > > +		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> > > +		if (!virtio_has_feature(vb->vdev,
> > > +
> > 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> > > +			adjust_managed_page_count(page, -1);
> > > +	}
> >
> > This is grossly inefficient if you only requested a single page.
> > And it's also allocating memory very aggressively without ever telling
> > the host what is going on.
> 
> If only requested a single page, there is no need  to send the entire page
> bitmap, This RFC patch has already considered about this. But it can works

s/can/can't
> very well if requesting several pages  which across a large range.
> 
> > > +
> > > +	num_allocated_pages = vb->num_pfns;
> > > +	/* Did we get any? */
> > > +	if (vb->num_pfns != 0)
> > > +		tell_host(vb, vb->inflate_vq);
> > > +	mutex_unlock(&vb->balloon_lock);
> > > +
> > > +	return num_allocated_pages;
> > > +}
> > > +
> > > +static long fill_balloon(struct virtio_balloon *vb, size_t num) {
> > > +	long num_allocated_pages;
> > > +
> > > +	if (virtio_has_feature(vb->vdev,
> > VIRTIO_BALLOON_F_PAGE_BITMAP))
> > > +		num_allocated_pages = fill_balloon_bitmap(vb, num);
> > > +	else
> > > +		num_allocated_pages = fill_balloon_pfns(vb, num);
> > > +
> > > +	return num_allocated_pages;
> > > +}
> > > +
> > > +static void release_pages_balloon_bitmap(struct virtio_balloon *vb) {
> > > +	unsigned long pfn, offset, size;
> > > +	struct page *page;
> > > +
> > > +	size = min(vb->bmap_len * BITS_PER_BYTE, vb->end_pfn);
> > > +	for (offset = vb->start_pfn; offset < size;
> > > +		 offset = pfn + VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > > +		pfn = find_next_bit(vb->page_bitmap, size, offset);
> > > +		if (pfn < size) {
> > > +			page = balloon_pfn_to_page(pfn);
> > > +			if (!virtio_has_feature(vb->vdev,
> > > +
> > 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> > > +				adjust_managed_page_count(page, 1);
> > > +			put_page(page);
> > > +		}
> > > +	}
> > > +}
> > > +
> > > +static unsigned long leak_balloon_bitmap(struct virtio_balloon *vb,
> > > +size_t num) {
> > > +	unsigned long num_freed_pages = num;
> > > +	struct page *page;
> > > +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > > +
> > > +	if (balloon_page_bitmap_init(vb) < 0)
> > > +		return num_freed_pages;
> > > +
> > > +	mutex_lock(&vb->balloon_lock);
> > > +	for (vb->num_pfns = 0; vb->num_pfns < num;
> > > +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > > +		page = balloon_page_dequeue(vb_dev_info);
> > > +		if (!page)
> > > +			break;
> > > +		set_page_bitmap(vb, page);
> > > +		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> > > +	}
> > > +
> > > +	num_freed_pages = vb->num_pfns;
> > > +
> > > +	if (vb->num_pfns != 0)
> > > +		tell_host(vb, vb->deflate_vq);
> > > +	release_pages_balloon_bitmap(vb);
> > > +	mutex_unlock(&vb->balloon_lock);
> > > +
> > > +	return num_freed_pages;
> > > +}
> > > +
> > > +static void release_pages_balloon_pfns(struct virtio_balloon *vb)
> > >  {
> > >  	unsigned int i;
> > >
> > > @@ -188,7 +355,7 @@ static void release_pages_balloon(struct
> > virtio_balloon *vb)
> > >  	}
> > >  }
> > >
> > > -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> > > +static unsigned leak_balloon_pfns(struct virtio_balloon *vb, size_t
> > > +num)
> > >  {
> > >  	unsigned num_freed_pages;
> > >  	struct page *page;
> > > @@ -215,11 +382,23 @@ static unsigned leak_balloon(struct
> > > virtio_balloon
> > *vb, size_t num)
> > >  	 */
> > >  	if (vb->num_pfns != 0)
> > >  		tell_host(vb, vb->deflate_vq);
> > > -	release_pages_balloon(vb);
> > > +	release_pages_balloon_pfns(vb);
> > >  	mutex_unlock(&vb->balloon_lock);
> > >  	return num_freed_pages;
> > >  }
> > >
> > > +static long leak_balloon(struct virtio_balloon *vb, size_t num) {
> > > +	long num_freed_pages;
> > > +
> > > +	if (virtio_has_feature(vb->vdev,
> > VIRTIO_BALLOON_F_PAGE_BITMAP))
> > > +		num_freed_pages = leak_balloon_bitmap(vb, num);
> > > +	else
> > > +		num_freed_pages = leak_balloon_pfns(vb, num);
> > > +
> > > +	return num_freed_pages;
> > > +}
> > > +
> > >  static inline void update_stat(struct virtio_balloon *vb, int idx,
> > >  			       u16 tag, u64 val)
> > >  {
> > > @@ -510,6 +689,8 @@ static int virtballoon_probe(struct
> > > virtio_device
> > *vdev)
> > >  	spin_lock_init(&vb->stop_update_lock);
> > >  	vb->stop_update = false;
> > >  	vb->num_pages = 0;
> > > +	vb->page_bitmap = NULL;
> > > +	vb->bmap_len = 0;
> > >  	mutex_init(&vb->balloon_lock);
> > >  	init_waitqueue_head(&vb->acked);
> > >  	vb->vdev = vdev;
> > > @@ -567,6 +748,7 @@ static void virtballoon_remove(struct
> > > virtio_device
> > *vdev)
> > >  	cancel_work_sync(&vb->update_balloon_stats_work);
> > >
> > >  	remove_common(vb);
> > > +	kfree(vb->page_bitmap);
> > >  	kfree(vb);
> > >  }
> > >
> > > @@ -605,6 +787,7 @@ static unsigned int features[] = {
> > >  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
> > >  	VIRTIO_BALLOON_F_STATS_VQ,
> > >  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> > > +	VIRTIO_BALLOON_F_PAGE_BITMAP,
> > >  };
> > >
> > >  static struct virtio_driver virtio_balloon_driver = { diff --git
> > > a/include/uapi/linux/virtio_balloon.h
> > > b/include/uapi/linux/virtio_balloon.h
> > > index 343d7dd..f78fa47 100644
> > > --- a/include/uapi/linux/virtio_balloon.h
> > > +++ b/include/uapi/linux/virtio_balloon.h
> > > @@ -34,6 +34,7 @@
> > >  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before
> > reclaiming pages */
> > >  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue
> > */
> > >  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon
> > on OOM */
> > > +#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info
> > with bitmap */
> > >
> > >  /* Size of a PFN in the balloon interface. */  #define
> > > VIRTIO_BALLOON_PFN_SHIFT 12 diff --git a/mm/page_alloc.c
> > > b/mm/page_alloc.c index c1069ef..74b2fc5 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -2139,6 +2139,12 @@ void drain_all_pages(struct zone *zone)
> > >  								zone, 1);
> > >  }
> > >
> > > +unsigned long get_max_pfn(void)
> > > +{
> > > +	return max_pfn;
> > > +}
> > > +EXPORT_SYMBOL(get_max_pfn);
> > > +
> > >  #ifdef CONFIG_HIBERNATION
> > >
> > >  void mark_free_pages(struct zone *zone)
> >
> > Suggestion to address all above comments:
> > 	1. allocate a bunch of pages and link them up,
> > 	   calculating the min and the max pfn.
> > 	   if max-min exceeds the allocated bitmap size,
> > 	   tell host.
> 
> I am not sure if it works well in some cases, e.g. The allocated pages are
> across a wide range and the max-min > limit is very frequently to be true.
> Then, there will be many times of virtio transmission and it's bad for
> performance improvement. Right?
> 
> 
> > 	2. limit allocated bitmap size to something reasonable.
> > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > 	   out to 1Giga bytes of memory in the balloon.
> 
> So, even the VM has 1TB of RAM, the page bitmap will take 32MB of memory.
> Maybe it's better to use a big page bitmap the save the pages allocated by
> balloon,
> and split the big page bitmap to 32K bytes unit, then transfer one unit at a
> time.
> 
> 
> Should we use a page bitmap to replace 'vb->pages' ?
> 
> How about rolling back to use PFNs if the count of requested pages is a small
> number?
> 
> Liang
> > > --
> > > 1.9.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in the body
> of
> > a message to majordomo@vger.kernel.org More majordomo info at
> > http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24  9:51   ` Li, Liang Z
  2016-05-24  9:55     ` Li, Liang Z
@ 2016-05-24 10:08     ` Michael S. Tsirkin
  2016-05-24 10:38       ` Li, Liang Z
  2016-05-25  8:48       ` Li, Liang Z
  1 sibling, 2 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-24 10:08 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Tue, May 24, 2016 at 09:51:46AM +0000, Li, Liang Z wrote:
> > On Fri, May 20, 2016 at 05:59:46PM +0800, Liang Li wrote:
> > > The implementation of the current virtio-balloon is not very
> > > efficient, Bellow is test result of time spends on inflating the
> > > balloon to 3GB of a 4GB idle guest:
> > >
> > > a. allocating pages (6.5%, 103ms)
> > > b. sending PFNs to host (68.3%, 787ms) c. address translation (6.1%,
> > > 96ms) d. madvise (19%, 300ms)
> > >
> > > It takes about 1577ms for the whole inflating process to complete. The
> > > test shows that the bottle neck is the stage b and stage d.
> > >
> > > If using a bitmap to send the page info instead of the PFNs, we can
> > > reduce the overhead spends on stage b quite a lot. Furthermore, it's
> > > possible to do the address translation and do the madvise with a bulk
> > > of pages, instead of the current page per page way, so the overhead of
> > > stage c and stage d can also be reduced a lot.
> > >
> > > This patch is the kernel side implementation which is intended to
> > > speed up the inflating & deflating process by adding a new feature to
> > > the virtio-balloon device. And now, inflating the balloon to 3GB of a
> > > 4GB idle guest only takes 175ms, it's about 9 times as fast as before.
> > >
> > > TODO: optimize stage a by allocating/freeing a chunk of pages instead
> > > of a single page at a time.
> > >
> > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > ---
> > >  drivers/virtio/virtio_balloon.c     | 199
> > ++++++++++++++++++++++++++++++++++--
> > >  include/uapi/linux/virtio_balloon.h |   1 +
> > >  mm/page_alloc.c                     |   6 ++
> > >  3 files changed, 198 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_balloon.c
> > > b/drivers/virtio/virtio_balloon.c index 7b6d74f..5330b6f 100644
> > > --- a/drivers/virtio/virtio_balloon.c
> > > +++ b/drivers/virtio/virtio_balloon.c
> > > @@ -45,6 +45,8 @@ static int oom_pages =
> > OOM_VBALLOON_DEFAULT_PAGES;
> > > module_param(oom_pages, int, S_IRUSR | S_IWUSR);
> > > MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
> > >
> > > +extern unsigned long get_max_pfn(void);
> > > +
> > >  struct virtio_balloon {
> > >  	struct virtio_device *vdev;
> > >  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; @@ -62,6 +64,9
> > > @@ struct virtio_balloon {
> > >
> > >  	/* Number of balloon pages we've told the Host we're not using. */
> > >  	unsigned int num_pages;
> > > +	unsigned long *page_bitmap;
> > > +	unsigned long start_pfn, end_pfn;
> > > +	unsigned long bmap_len;
> > >  	/*
> > >  	 * The pages we've told the Host we're not using are enqueued
> > >  	 * at vb_dev_info->pages list.
> > > @@ -111,15 +116,66 @@ static void balloon_ack(struct virtqueue *vq)
> > >  	wake_up(&vb->acked);
> > >  }
> > >
> > > +static int balloon_page_bitmap_init(struct virtio_balloon *vb) {
> > > +	unsigned long max_pfn, bmap_bytes;
> > > +
> > > +	max_pfn = get_max_pfn();
> > 
> > This is racy. max_pfn could be increased by memory hotplug after you got it.
> > 
> > 
> > > +	bmap_bytes = ALIGN(max_pfn, BITS_PER_LONG) / BITS_PER_BYTE;
> > > +	if (!vb->page_bitmap)
> > > +		vb->page_bitmap = kzalloc(bmap_bytes, GFP_KERNEL);
> > 
> > Likely to fail for a huge busy guest.
> > Why not init on device probe?
> > this way
> > 	- probe will fail, or we can clear the feature bit
> > 	- free memory is more likely to be available
> > 
> 
> Very good suggestion!
> 
> > 
> > > +	else {
> > > +		if (bmap_bytes <= vb->bmap_len)
> > > +			memset(vb->page_bitmap, 0, bmap_bytes);
> > > +		else {
> > > +			kfree(vb->page_bitmap);
> > > +			vb->page_bitmap = kzalloc(bmap_bytes,
> > GFP_KERNEL);
> > > +		}
> > > +	}
> > > +	if (!vb->page_bitmap) {
> > > +		dev_err(&vb->vdev->dev, "%s failure: allocate page
> > bitmap\n",
> > > +			 __func__);
> > > +		return -ENOMEM;
> > > +	}
> > > +	vb->bmap_len = bmap_bytes;
> > > +	vb->start_pfn = max_pfn;
> > > +	vb->end_pfn = 0;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > 
> > >  {
> > > -	struct scatterlist sg;
> > >  	unsigned int len;
> > >
> > > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > > +	if (virtio_has_feature(vb->vdev,
> > VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > > +		u32 page_shift = PAGE_SHIFT;
> > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > +		struct scatterlist sg[5];
> > > +
> > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > sizeof(long);
> > > +
> > > +		sg_init_table(sg, 5);
> > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > 
> > This can be pre-initialized, correct?
> 
> pre-initialized? I am not quite understand your mean.

I think you can maintain sg as part of device state
and init sg with the bitmap.

> > 
> > > +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> > > +
> > > +	} else {
> > > +		struct scatterlist sg;
> > > +
> > > +		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb-
> > >num_pfns);
> > > +		/* We should always be able to add one buffer to an
> > > +		* empty queue.
> > > +		*/
> > > +		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> > > +	}
> > >
> > > -	/* We should always be able to add one buffer to an empty queue.
> > */
> > > -	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> > >  	virtqueue_kick(vq);
> > >
> > >  	/* When host has read buffer, this completes via balloon_ack */ @@
> > > -137,7 +193,21 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> > >  		pfns[i] = page_to_balloon_pfn(page) + i;  }
> > >
> > > -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> > > +static void set_page_bitmap(struct virtio_balloon *vb, struct page
> > > +*page) {
> > > +	unsigned int i;
> > > +	unsigned long *bitmap = vb->page_bitmap;
> > > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > > +
> > > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > > +		set_bit(balloon_pfn + i, bitmap);
> > > +	if (balloon_pfn < vb->start_pfn)
> > > +		vb->start_pfn = balloon_pfn;
> > > +	if (balloon_pfn > vb->end_pfn)
> > > +		vb->end_pfn = balloon_pfn;
> > > +}
> > > +
> > > +static unsigned fill_balloon_pfns(struct virtio_balloon *vb, size_t
> > > +num)
> > >  {
> > >  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > >  	unsigned num_allocated_pages;
> > > @@ -174,7 +244,104 @@ static unsigned fill_balloon(struct virtio_balloon
> > *vb, size_t num)
> > >  	return num_allocated_pages;
> > >  }
> > >
> > > -static void release_pages_balloon(struct virtio_balloon *vb)
> > > +static long fill_balloon_bitmap(struct virtio_balloon *vb, size_t
> > > +num) {
> > > +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > > +	long num_allocated_pages = 0;
> > > +
> > > +	if (balloon_page_bitmap_init(vb) < 0)
> > > +		return num;
> > > +
> > > +	mutex_lock(&vb->balloon_lock);
> > > +	for (vb->num_pfns = 0; vb->num_pfns < num;
> > > +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > > +		struct page *page = balloon_page_enqueue(vb_dev_info);
> > > +
> > > +		if (!page) {
> > > +			dev_info_ratelimited(&vb->vdev->dev,
> > > +					     "Out of puff! Can't get %u
> > pages\n",
> > > +
> > VIRTIO_BALLOON_PAGES_PER_PAGE);
> > > +			/* Sleep for at least 1/5 of a second before retry. */
> > > +			msleep(200);
> > > +			break;
> > > +		}
> > > +		set_page_bitmap(vb, page);
> > > +		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> > > +		if (!virtio_has_feature(vb->vdev,
> > > +
> > 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> > > +			adjust_managed_page_count(page, -1);
> > > +	}
> > 
> > This is grossly inefficient if you only requested a single page.
> > And it's also allocating memory very aggressively without ever telling the host
> > what is going on.
> 
> If only requested a single page, there is no need  to send the entire page bitmap,
> This RFC patch has already considered about this.

where's that addressed in code?

> But it can works very well if requesting
> several pages  which across a large range.

Some kind of limit on range would make sense though.
It need not cover max pfn.

> > > +
> > > +	num_allocated_pages = vb->num_pfns;
> > > +	/* Did we get any? */
> > > +	if (vb->num_pfns != 0)
> > > +		tell_host(vb, vb->inflate_vq);
> > > +	mutex_unlock(&vb->balloon_lock);
> > > +
> > > +	return num_allocated_pages;
> > > +}
> > > +
> > > +static long fill_balloon(struct virtio_balloon *vb, size_t num) {
> > > +	long num_allocated_pages;
> > > +
> > > +	if (virtio_has_feature(vb->vdev,
> > VIRTIO_BALLOON_F_PAGE_BITMAP))
> > > +		num_allocated_pages = fill_balloon_bitmap(vb, num);
> > > +	else
> > > +		num_allocated_pages = fill_balloon_pfns(vb, num);
> > > +
> > > +	return num_allocated_pages;
> > > +}
> > > +
> > > +static void release_pages_balloon_bitmap(struct virtio_balloon *vb) {
> > > +	unsigned long pfn, offset, size;
> > > +	struct page *page;
> > > +
> > > +	size = min(vb->bmap_len * BITS_PER_BYTE, vb->end_pfn);
> > > +	for (offset = vb->start_pfn; offset < size;
> > > +		 offset = pfn + VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > > +		pfn = find_next_bit(vb->page_bitmap, size, offset);
> > > +		if (pfn < size) {
> > > +			page = balloon_pfn_to_page(pfn);
> > > +			if (!virtio_has_feature(vb->vdev,
> > > +
> > 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> > > +				adjust_managed_page_count(page, 1);
> > > +			put_page(page);
> > > +		}
> > > +	}
> > > +}
> > > +
> > > +static unsigned long leak_balloon_bitmap(struct virtio_balloon *vb,
> > > +size_t num) {
> > > +	unsigned long num_freed_pages = num;
> > > +	struct page *page;
> > > +	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> > > +
> > > +	if (balloon_page_bitmap_init(vb) < 0)
> > > +		return num_freed_pages;
> > > +
> > > +	mutex_lock(&vb->balloon_lock);
> > > +	for (vb->num_pfns = 0; vb->num_pfns < num;
> > > +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> > > +		page = balloon_page_dequeue(vb_dev_info);
> > > +		if (!page)
> > > +			break;
> > > +		set_page_bitmap(vb, page);
> > > +		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> > > +	}
> > > +
> > > +	num_freed_pages = vb->num_pfns;
> > > +
> > > +	if (vb->num_pfns != 0)
> > > +		tell_host(vb, vb->deflate_vq);
> > > +	release_pages_balloon_bitmap(vb);
> > > +	mutex_unlock(&vb->balloon_lock);
> > > +
> > > +	return num_freed_pages;
> > > +}
> > > +
> > > +static void release_pages_balloon_pfns(struct virtio_balloon *vb)
> > >  {
> > >  	unsigned int i;
> > >
> > > @@ -188,7 +355,7 @@ static void release_pages_balloon(struct
> > virtio_balloon *vb)
> > >  	}
> > >  }
> > >
> > > -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> > > +static unsigned leak_balloon_pfns(struct virtio_balloon *vb, size_t
> > > +num)
> > >  {
> > >  	unsigned num_freed_pages;
> > >  	struct page *page;
> > > @@ -215,11 +382,23 @@ static unsigned leak_balloon(struct virtio_balloon
> > *vb, size_t num)
> > >  	 */
> > >  	if (vb->num_pfns != 0)
> > >  		tell_host(vb, vb->deflate_vq);
> > > -	release_pages_balloon(vb);
> > > +	release_pages_balloon_pfns(vb);
> > >  	mutex_unlock(&vb->balloon_lock);
> > >  	return num_freed_pages;
> > >  }
> > >
> > > +static long leak_balloon(struct virtio_balloon *vb, size_t num) {
> > > +	long num_freed_pages;
> > > +
> > > +	if (virtio_has_feature(vb->vdev,
> > VIRTIO_BALLOON_F_PAGE_BITMAP))
> > > +		num_freed_pages = leak_balloon_bitmap(vb, num);
> > > +	else
> > > +		num_freed_pages = leak_balloon_pfns(vb, num);
> > > +
> > > +	return num_freed_pages;
> > > +}
> > > +
> > >  static inline void update_stat(struct virtio_balloon *vb, int idx,
> > >  			       u16 tag, u64 val)
> > >  {
> > > @@ -510,6 +689,8 @@ static int virtballoon_probe(struct virtio_device
> > *vdev)
> > >  	spin_lock_init(&vb->stop_update_lock);
> > >  	vb->stop_update = false;
> > >  	vb->num_pages = 0;
> > > +	vb->page_bitmap = NULL;
> > > +	vb->bmap_len = 0;
> > >  	mutex_init(&vb->balloon_lock);
> > >  	init_waitqueue_head(&vb->acked);
> > >  	vb->vdev = vdev;
> > > @@ -567,6 +748,7 @@ static void virtballoon_remove(struct virtio_device
> > *vdev)
> > >  	cancel_work_sync(&vb->update_balloon_stats_work);
> > >
> > >  	remove_common(vb);
> > > +	kfree(vb->page_bitmap);
> > >  	kfree(vb);
> > >  }
> > >
> > > @@ -605,6 +787,7 @@ static unsigned int features[] = {
> > >  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
> > >  	VIRTIO_BALLOON_F_STATS_VQ,
> > >  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> > > +	VIRTIO_BALLOON_F_PAGE_BITMAP,
> > >  };
> > >
> > >  static struct virtio_driver virtio_balloon_driver = { diff --git
> > > a/include/uapi/linux/virtio_balloon.h
> > > b/include/uapi/linux/virtio_balloon.h
> > > index 343d7dd..f78fa47 100644
> > > --- a/include/uapi/linux/virtio_balloon.h
> > > +++ b/include/uapi/linux/virtio_balloon.h
> > > @@ -34,6 +34,7 @@
> > >  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before
> > reclaiming pages */
> > >  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue
> > */
> > >  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon
> > on OOM */
> > > +#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info
> > with bitmap */
> > >
> > >  /* Size of a PFN in the balloon interface. */  #define
> > > VIRTIO_BALLOON_PFN_SHIFT 12 diff --git a/mm/page_alloc.c
> > > b/mm/page_alloc.c index c1069ef..74b2fc5 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -2139,6 +2139,12 @@ void drain_all_pages(struct zone *zone)
> > >  								zone, 1);
> > >  }
> > >
> > > +unsigned long get_max_pfn(void)
> > > +{
> > > +	return max_pfn;
> > > +}
> > > +EXPORT_SYMBOL(get_max_pfn);
> > > +
> > >  #ifdef CONFIG_HIBERNATION
> > >
> > >  void mark_free_pages(struct zone *zone)
> > 
> > Suggestion to address all above comments:
> > 	1. allocate a bunch of pages and link them up,
> > 	   calculating the min and the max pfn.
> > 	   if max-min exceeds the allocated bitmap size,
> > 	   tell host.
> 
> I am not sure if it works well in some cases, e.g. The allocated pages 
> are across a wide range and the max-min > limit is very frequently to be true.
> Then, there will be many times of virtio transmission and it's bad for performance
> improvement. Right?

It's a tradeoff for sure. Measure it, see what the overhead is.

> 
> > 	2. limit allocated bitmap size to something reasonable.
> > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > 	   out to 1Giga bytes of memory in the balloon.
> 
> So, even the VM has 1TB of RAM, the page bitmap will take 32MB of memory.
> Maybe it's better to use a big page bitmap the save the pages allocated by balloon,
> and split the big page bitmap to 32K bytes unit, then transfer one unit at a time.

How is this different from what I said?

> 
> Should we use a page bitmap to replace 'vb->pages' ?
> 
> How about rolling back to use PFNs if the count of requested pages is a small number?
> 
> Liang

That's why we have start pfn. you can use that to pass
even a single page without a lot of overhead.

> > > --
> > > 1.9.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in the body of
> > a message to majordomo@vger.kernel.org More majordomo info at
> > http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 10:08     ` Michael S. Tsirkin
@ 2016-05-24 10:38       ` Li, Liang Z
  2016-05-24 11:11         ` Michael S. Tsirkin
  2016-05-25  8:48       ` Li, Liang Z
  1 sibling, 1 reply; 23+ messages in thread
From: Li, Liang Z @ 2016-05-24 10:38 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > > >  {
> > > > -	struct scatterlist sg;
> > > >  	unsigned int len;
> > > >
> > > > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > > > +	if (virtio_has_feature(vb->vdev,
> > > VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > > > +		u32 page_shift = PAGE_SHIFT;
> > > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > > +		struct scatterlist sg[5];
> > > > +
> > > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > > sizeof(long);
> > > > +
> > > > +		sg_init_table(sg, 5);
> > > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > >
> > > This can be pre-initialized, correct?
> >
> > pre-initialized? I am not quite understand your mean.
> 
> I think you can maintain sg as part of device state and init sg with the bitmap.
> 

I got it.

> > > This is grossly inefficient if you only requested a single page.
> > > And it's also allocating memory very aggressively without ever
> > > telling the host what is going on.
> >
> > If only requested a single page, there is no need  to send the entire
> > page bitmap, This RFC patch has already considered about this.
> 
> where's that addressed in code?
> 

By record the start_pfn and end_pfn.

The start_pfn & end_pfn will be updated in set_page_bitmap()
and will be used in the function tell_host():

---------------------------------------------------------------------------------
+static void set_page_bitmap(struct virtio_balloon *vb, struct page 
+*page) {
+	unsigned int i;
+	unsigned long *bitmap = vb->page_bitmap;
+	unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
+		set_bit(balloon_pfn + i, bitmap);
+	if (balloon_pfn < vb->start_pfn)
+		vb->start_pfn = balloon_pfn;
+	if (balloon_pfn > vb->end_pfn)
+		vb->end_pfn = balloon_pfn;
+}


+		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
+		struct scatterlist sg[5];
+
+		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
+		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
+		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG * sizeof(long);
+
+		sg_init_table(sg, 5);
+		sg_set_buf(&sg[0], &flags, sizeof(flags));
+		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
+		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
+		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
+		sg_set_buf(&sg[4], vb->page_bitmap +
+				 (start_pfn / BITS_PER_LONG), bmap_len);
+		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
-------------------------------------------------------------------------------------------
> > But it can works very well if requesting several pages  which across a
> > large range.
> 
> Some kind of limit on range would make sense though.
> It need not cover max pfn.
> 

Yes, agree.

> > > Suggestion to address all above comments:
> > > 	1. allocate a bunch of pages and link them up,
> > > 	   calculating the min and the max pfn.
> > > 	   if max-min exceeds the allocated bitmap size,
> > > 	   tell host.
> >
> > I am not sure if it works well in some cases, e.g. The allocated pages
> > are across a wide range and the max-min > limit is very frequently to be
> true.
> > Then, there will be many times of virtio transmission and it's bad for
> > performance improvement. Right?
> 
> It's a tradeoff for sure. Measure it, see what the overhead is.

OK, I will try and get back to you.

> 
> >
> > > 	2. limit allocated bitmap size to something reasonable.
> > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > 	   out to 1Giga bytes of memory in the balloon.
> >
> > So, even the VM has 1TB of RAM, the page bitmap will take 32MB of
> memory.
> > Maybe it's better to use a big page bitmap the save the pages
> > allocated by balloon, and split the big page bitmap to 32K bytes unit, then
> transfer one unit at a time.
> 
> How is this different from what I said?
> 

It's good if it's the same as you said.

Thanks!
Liang

> >
> > Should we use a page bitmap to replace 'vb->pages' ?
> >
> > How about rolling back to use PFNs if the count of requested pages is a
> small number?
> >
> > Liang
> 
> That's why we have start pfn. you can use that to pass even a single page
> without a lot of overhead.
> 
> > > > --
> > > > 1.9.1
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org More majordomo
> > > info at http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 10:38       ` Li, Liang Z
@ 2016-05-24 11:11         ` Michael S. Tsirkin
  2016-05-24 14:36           ` Li, Liang Z
  0 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-24 11:11 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Tue, May 24, 2016 at 10:38:43AM +0000, Li, Liang Z wrote:
> > > > >  {
> > > > > -	struct scatterlist sg;
> > > > >  	unsigned int len;
> > > > >
> > > > > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > > > > +	if (virtio_has_feature(vb->vdev,
> > > > VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > > > > +		u32 page_shift = PAGE_SHIFT;
> > > > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > > > +		struct scatterlist sg[5];
> > > > > +
> > > > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > > > sizeof(long);
> > > > > +
> > > > > +		sg_init_table(sg, 5);
> > > > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > > >
> > > > This can be pre-initialized, correct?
> > >
> > > pre-initialized? I am not quite understand your mean.
> > 
> > I think you can maintain sg as part of device state and init sg with the bitmap.
> > 
> 
> I got it.
> 
> > > > This is grossly inefficient if you only requested a single page.
> > > > And it's also allocating memory very aggressively without ever
> > > > telling the host what is going on.
> > >
> > > If only requested a single page, there is no need  to send the entire
> > > page bitmap, This RFC patch has already considered about this.
> > 
> > where's that addressed in code?
> > 
> 
> By record the start_pfn and end_pfn.
> 
> The start_pfn & end_pfn will be updated in set_page_bitmap()
> and will be used in the function tell_host():
> 
> ---------------------------------------------------------------------------------
> +static void set_page_bitmap(struct virtio_balloon *vb, struct page 
> +*page) {
> +	unsigned int i;
> +	unsigned long *bitmap = vb->page_bitmap;
> +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> +		set_bit(balloon_pfn + i, bitmap);

BTW, there's a page size value in header so there
is no longer need to set multiple bits per page.

> +	if (balloon_pfn < vb->start_pfn)
> +		vb->start_pfn = balloon_pfn;
> +	if (balloon_pfn > vb->end_pfn)
> +		vb->end_pfn = balloon_pfn;
> +}

Sounds good, but you also need to limit by
allocated bitmap size.

> 
> +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> +		struct scatterlist sg[5];
> +
> +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG * sizeof(long);
> +
> +		sg_init_table(sg, 5);
> +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> +		sg_set_buf(&sg[4], vb->page_bitmap +
> +				 (start_pfn / BITS_PER_LONG), bmap_len);

Looks wrong. start_pfn should start at offset 0 I think ...

> +		virtqueue_add_outbuf(vq, sg, 5, vb, GFP_KERNEL);
> -------------------------------------------------------------------------------------------
> > > But it can works very well if requesting several pages  which across a
> > > large range.
> > 
> > Some kind of limit on range would make sense though.
> > It need not cover max pfn.
> > 
> 
> Yes, agree.
> 
> > > > Suggestion to address all above comments:
> > > > 	1. allocate a bunch of pages and link them up,
> > > > 	   calculating the min and the max pfn.
> > > > 	   if max-min exceeds the allocated bitmap size,
> > > > 	   tell host.
> > >
> > > I am not sure if it works well in some cases, e.g. The allocated pages
> > > are across a wide range and the max-min > limit is very frequently to be
> > true.
> > > Then, there will be many times of virtio transmission and it's bad for
> > > performance improvement. Right?
> > 
> > It's a tradeoff for sure. Measure it, see what the overhead is.
> 
> OK, I will try and get back to you.
> 
> > 
> > >
> > > > 	2. limit allocated bitmap size to something reasonable.
> > > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > > 	   out to 1Giga bytes of memory in the balloon.
> > >
> > > So, even the VM has 1TB of RAM, the page bitmap will take 32MB of
> > memory.
> > > Maybe it's better to use a big page bitmap the save the pages
> > > allocated by balloon, and split the big page bitmap to 32K bytes unit, then
> > transfer one unit at a time.
> > 
> > How is this different from what I said?
> > 
> 
> It's good if it's the same as you said.
> 
> Thanks!
> Liang
> 
> > >
> > > Should we use a page bitmap to replace 'vb->pages' ?
> > >
> > > How about rolling back to use PFNs if the count of requested pages is a
> > small number?
> > >
> > > Liang
> > 
> > That's why we have start pfn. you can use that to pass even a single page
> > without a lot of overhead.
> > 
> > > > > --
> > > > > 1.9.1
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > > the body of a message to majordomo@vger.kernel.org More majordomo
> > > > info at http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 11:11         ` Michael S. Tsirkin
@ 2016-05-24 14:36           ` Li, Liang Z
  2016-05-24 15:12             ` Michael S. Tsirkin
  0 siblings, 1 reply; 23+ messages in thread
From: Li, Liang Z @ 2016-05-24 14:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > > > > This can be pre-initialized, correct?
> > > >
> > > > pre-initialized? I am not quite understand your mean.
> > >
> > > I think you can maintain sg as part of device state and init sg with the
> bitmap.
> > >
> >
> > I got it.
> >
> > > > > This is grossly inefficient if you only requested a single page.
> > > > > And it's also allocating memory very aggressively without ever
> > > > > telling the host what is going on.
> > > >
> > > > If only requested a single page, there is no need  to send the
> > > > entire page bitmap, This RFC patch has already considered about this.
> > >
> > > where's that addressed in code?
> > >
> >
> > By record the start_pfn and end_pfn.
> >
> > The start_pfn & end_pfn will be updated in set_page_bitmap() and will
> > be used in the function tell_host():
> >
> > ----------------------------------------------------------------------
> > -----------
> > +static void set_page_bitmap(struct virtio_balloon *vb, struct page
> > +*page) {
> > +	unsigned int i;
> > +	unsigned long *bitmap = vb->page_bitmap;
> > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > +
> > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > +		set_bit(balloon_pfn + i, bitmap);
> 
> BTW, there's a page size value in header so there is no longer need to set
> multiple bits per page.

Yes, you are right.

> 
> > +	if (balloon_pfn < vb->start_pfn)
> > +		vb->start_pfn = balloon_pfn;
> > +	if (balloon_pfn > vb->end_pfn)
> > +		vb->end_pfn = balloon_pfn;
> > +}
> 
> Sounds good, but you also need to limit by allocated bitmap size.

Why should we limit the page bitmap size? Is it no good to send a large page bitmap?
or to save the memory used for page bitmap? Or some other reason?
> 
> >
> > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > +		struct scatterlist sg[5];
> > +
> > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> sizeof(long);
> > +
> > +		sg_init_table(sg, 5);
> > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> 
> Looks wrong. start_pfn should start at offset 0 I think ...

I don't know what is wrong here, could you tell me why?

Thanks!

Liang

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 14:36           ` Li, Liang Z
@ 2016-05-24 15:12             ` Michael S. Tsirkin
  2016-05-25  0:52               ` Li, Liang Z
  2016-05-25  1:00               ` Li, Liang Z
  0 siblings, 2 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-24 15:12 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Tue, May 24, 2016 at 02:36:08PM +0000, Li, Liang Z wrote:
> > > > > > This can be pre-initialized, correct?
> > > > >
> > > > > pre-initialized? I am not quite understand your mean.
> > > >
> > > > I think you can maintain sg as part of device state and init sg with the
> > bitmap.
> > > >
> > >
> > > I got it.
> > >
> > > > > > This is grossly inefficient if you only requested a single page.
> > > > > > And it's also allocating memory very aggressively without ever
> > > > > > telling the host what is going on.
> > > > >
> > > > > If only requested a single page, there is no need  to send the
> > > > > entire page bitmap, This RFC patch has already considered about this.
> > > >
> > > > where's that addressed in code?
> > > >
> > >
> > > By record the start_pfn and end_pfn.
> > >
> > > The start_pfn & end_pfn will be updated in set_page_bitmap() and will
> > > be used in the function tell_host():
> > >
> > > ----------------------------------------------------------------------
> > > -----------
> > > +static void set_page_bitmap(struct virtio_balloon *vb, struct page
> > > +*page) {
> > > +	unsigned int i;
> > > +	unsigned long *bitmap = vb->page_bitmap;
> > > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > > +
> > > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > > +		set_bit(balloon_pfn + i, bitmap);
> > 
> > BTW, there's a page size value in header so there is no longer need to set
> > multiple bits per page.
> 
> Yes, you are right.
> 
> > 
> > > +	if (balloon_pfn < vb->start_pfn)
> > > +		vb->start_pfn = balloon_pfn;
> > > +	if (balloon_pfn > vb->end_pfn)
> > > +		vb->end_pfn = balloon_pfn;
> > > +}
> > 
> > Sounds good, but you also need to limit by allocated bitmap size.
> 
> Why should we limit the page bitmap size? Is it no good to send a large page bitmap?
> or to save the memory used for page bitmap? Or some other reason?

To save memory. First allocating a large bitmap can fail, second this is
pinned memory that is wasted - it's unused most of the time while guest
is running.

> > 
> > >
> > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > +		struct scatterlist sg[5];
> > > +
> > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > sizeof(long);
> > > +
> > > +		sg_init_table(sg, 5);
> > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > 
> > Looks wrong. start_pfn should start at offset 0 I think ...
> 
> I don't know what is wrong here, could you tell me why?
> 
> Thanks!
> 
> Liang

start_pfn should mean "bit 0 in bitmap refers to pfn X".
So it does not make sense to also add it as offset within bitmap.

-- 
MST

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 15:12             ` Michael S. Tsirkin
@ 2016-05-25  0:52               ` Li, Liang Z
  2016-05-25  1:00               ` Li, Liang Z
  1 sibling, 0 replies; 23+ messages in thread
From: Li, Liang Z @ 2016-05-25  0:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> On Tue, May 24, 2016 at 02:36:08PM +0000, Li, Liang Z wrote:
> > > > > > > This can be pre-initialized, correct?
> > > > > >
> > > > > > pre-initialized? I am not quite understand your mean.
> > > > >
> > > > > I think you can maintain sg as part of device state and init sg
> > > > > with the
> > > bitmap.
> > > > >
> > > >
> > > > I got it.
> > > >
> > > > > > > This is grossly inefficient if you only requested a single page.
> > > > > > > And it's also allocating memory very aggressively without
> > > > > > > ever telling the host what is going on.
> > > > > >
> > > > > > If only requested a single page, there is no need  to send the
> > > > > > entire page bitmap, This RFC patch has already considered about
> this.
> > > > >
> > > > > where's that addressed in code?
> > > > >
> > > >
> > > > By record the start_pfn and end_pfn.
> > > >
> > > > The start_pfn & end_pfn will be updated in set_page_bitmap() and
> > > > will be used in the function tell_host():
> > > >
> > > > ------------------------------------------------------------------
> > > > ----
> > > > -----------
> > > > +static void set_page_bitmap(struct virtio_balloon *vb, struct
> > > > +page
> > > > +*page) {
> > > > +	unsigned int i;
> > > > +	unsigned long *bitmap = vb->page_bitmap;
> > > > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > > > +
> > > > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > > > +		set_bit(balloon_pfn + i, bitmap);
> > >
> > > BTW, there's a page size value in header so there is no longer need
> > > to set multiple bits per page.
> >
> > Yes, you are right.
> >
> > >
> > > > +	if (balloon_pfn < vb->start_pfn)
> > > > +		vb->start_pfn = balloon_pfn;
> > > > +	if (balloon_pfn > vb->end_pfn)
> > > > +		vb->end_pfn = balloon_pfn;
> > > > +}
> > >
> > > Sounds good, but you also need to limit by allocated bitmap size.
> >
> > Why should we limit the page bitmap size? Is it no good to send a large
> page bitmap?
> > or to save the memory used for page bitmap? Or some other reason?
> 
> To save memory. First allocating a large bitmap can fail, second this is pinned
> memory that is wasted - it's unused most of the time while guest is running.
> 
> > >
> > > >
> > > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > > +		struct scatterlist sg[5];
> > > > +
> > > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > > sizeof(long);
> > > > +
> > > > +		sg_init_table(sg, 5);
> > > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > >
> > > Looks wrong. start_pfn should start at offset 0 I think ...
> >
> > I don't know what is wrong here, could you tell me why?
> >
> > Thanks!
> >
> > Liang
> 
> start_pfn should mean "bit 0 in bitmap refers to pfn X".
> So it does not make sense to also add it as offset within bitmap.
> 
> --
> MST

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 15:12             ` Michael S. Tsirkin
  2016-05-25  0:52               ` Li, Liang Z
@ 2016-05-25  1:00               ` Li, Liang Z
  2016-05-25  8:35                 ` Michael S. Tsirkin
  1 sibling, 1 reply; 23+ messages in thread
From: Li, Liang Z @ 2016-05-25  1:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > > > > > > This is grossly inefficient if you only requested a single page.
> > > > > > > And it's also allocating memory very aggressively without
> > > > > > > ever telling the host what is going on.
> > > > > >
> > > > > > If only requested a single page, there is no need  to send the
> > > > > > entire page bitmap, This RFC patch has already considered about
> this.
> > > > >
> > > > > where's that addressed in code?
> > > > >
> > > >
> > > > By record the start_pfn and end_pfn.
> > > >
> > > > The start_pfn & end_pfn will be updated in set_page_bitmap() and
> > > > will be used in the function tell_host():
> > > >
> > > > ------------------------------------------------------------------
> > > > ----
> > > > -----------
> > > > +static void set_page_bitmap(struct virtio_balloon *vb, struct
> > > > +page
> > > > +*page) {
> > > > +	unsigned int i;
> > > > +	unsigned long *bitmap = vb->page_bitmap;
> > > > +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> > > > +
> > > > +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
> > > > +		set_bit(balloon_pfn + i, bitmap);
> > >
> > > BTW, there's a page size value in header so there is no longer need
> > > to set multiple bits per page.
> >
> > Yes, you are right.
> >
> > >
> > > > +	if (balloon_pfn < vb->start_pfn)
> > > > +		vb->start_pfn = balloon_pfn;
> > > > +	if (balloon_pfn > vb->end_pfn)
> > > > +		vb->end_pfn = balloon_pfn;
> > > > +}
> > >
> > > Sounds good, but you also need to limit by allocated bitmap size.
> >
> > Why should we limit the page bitmap size? Is it no good to send a large
> page bitmap?
> > or to save the memory used for page bitmap? Or some other reason?
> 
> To save memory. First allocating a large bitmap can fail, second this is pinned
> memory that is wasted - it's unused most of the time while guest is running.

Make sense.

> 
> > >
> > > >
> > > > +		unsigned long start_pfn, end_pfn, flags = 0, bmap_len;
> > > > +		struct scatterlist sg[5];
> > > > +
> > > > +		start_pfn = rounddown(vb->start_pfn, BITS_PER_LONG);
> > > > +		end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> > > > +		bmap_len = (end_pfn - start_pfn) / BITS_PER_LONG *
> > > sizeof(long);
> > > > +
> > > > +		sg_init_table(sg, 5);
> > > > +		sg_set_buf(&sg[0], &flags, sizeof(flags));
> > > > +		sg_set_buf(&sg[1], &start_pfn, sizeof(start_pfn));
> > > > +		sg_set_buf(&sg[2], &page_shift, sizeof(page_shift));
> > > > +		sg_set_buf(&sg[3], &bmap_len, sizeof(bmap_len));
> > > > +		sg_set_buf(&sg[4], vb->page_bitmap +
> > > > +				 (start_pfn / BITS_PER_LONG), bmap_len);
> > >
> > > Looks wrong. start_pfn should start at offset 0 I think ...
> >
> > I don't know what is wrong here, could you tell me why?
> >
> > Thanks!
> >
> > Liang
> 
> start_pfn should mean "bit 0 in bitmap refers to pfn X".
> So it does not make sense to also add it as offset within bitmap.

I got it, but in my code, it's correct, because the page_bitmap is
range from pfn 0 to max_pfn. 
It should be changed if we are going to use a small page bitmap.

Thanks!

Liang


> 
> --
> MST

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25  1:00               ` Li, Liang Z
@ 2016-05-25  8:35                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-25  8:35 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Wed, May 25, 2016 at 01:00:27AM +0000, Li, Liang Z wrote:
> It should be changed if we are going to use a small page bitmap.

Exactly.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-24 10:08     ` Michael S. Tsirkin
  2016-05-24 10:38       ` Li, Liang Z
@ 2016-05-25  8:48       ` Li, Liang Z
  2016-05-25  8:57         ` Michael S. Tsirkin
  1 sibling, 1 reply; 23+ messages in thread
From: Li, Liang Z @ 2016-05-25  8:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > > Suggestion to address all above comments:
> > > 	1. allocate a bunch of pages and link them up,
> > > 	   calculating the min and the max pfn.
> > > 	   if max-min exceeds the allocated bitmap size,
> > > 	   tell host.
> >
> > I am not sure if it works well in some cases, e.g. The allocated pages
> > are across a wide range and the max-min > limit is very frequently to be
> true.
> > Then, there will be many times of virtio transmission and it's bad for
> > performance improvement. Right?
> 
> It's a tradeoff for sure. Measure it, see what the overhead is.
> 

Hi MST,

I have measured the performance when using a 32K page bitmap, and inflate the balloon to 3GB
of an idle guest with 4GB RAM.

Now: 
total inflating time: 338ms
the count of virtio data transmission:  373
the call count of madvise: 865

before:
total inflating time: 175ms
the count of virtio data transmission: 1
the call count of madvise: 42

Maybe the result will be worse if the guest is not idle, or the guest has more RAM.
Do you want more data?

Is it worth to do that?

Liang

> >
> > > 	2. limit allocated bitmap size to something reasonable.
> > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > 	   out to 1Giga bytes of memory in the balloon.
> >
> > So, even the VM has 1TB of RAM, the page bitmap will take 32MB of
> memory.
> > Maybe it's better to use a big page bitmap the save the pages
> > allocated by balloon, and split the big page bitmap to 32K bytes unit, then
> transfer one unit at a time.
> 
> How is this different from what I said?
> 
> >
> > Should we use a page bitmap to replace 'vb->pages' ?
> >
> > How about rolling back to use PFNs if the count of requested pages is a
> small number?
> >
> > Liang
> 
> That's why we have start pfn. you can use that to pass even a single page
> without a lot of overhead.
> 
> > > > --
> > > > 1.9.1
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org More majordomo
> > > info at http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25  8:48       ` Li, Liang Z
@ 2016-05-25  8:57         ` Michael S. Tsirkin
  2016-05-25  9:28           ` Li, Liang Z
  0 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-25  8:57 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Wed, May 25, 2016 at 08:48:17AM +0000, Li, Liang Z wrote:
> > > > Suggestion to address all above comments:
> > > > 	1. allocate a bunch of pages and link them up,
> > > > 	   calculating the min and the max pfn.
> > > > 	   if max-min exceeds the allocated bitmap size,
> > > > 	   tell host.
> > >
> > > I am not sure if it works well in some cases, e.g. The allocated pages
> > > are across a wide range and the max-min > limit is very frequently to be
> > true.
> > > Then, there will be many times of virtio transmission and it's bad for
> > > performance improvement. Right?
> > 
> > It's a tradeoff for sure. Measure it, see what the overhead is.
> > 
> 
> Hi MST,
> 
> I have measured the performance when using a 32K page bitmap,

Just to make sure. Do you mean a 32Kbyte bitmap?
Covering 1Gbyte of memory?

> and inflate the balloon to 3GB
> of an idle guest with 4GB RAM.

Should take 3 requests then, right?

> Now: 
> total inflating time: 338ms
> the count of virtio data transmission:  373

Why was this so high? I would expect 3 transmissions.

> the call count of madvise: 865
> 
> before:
> total inflating time: 175ms
> the count of virtio data transmission: 1
> the call count of madvise: 42
> 
> Maybe the result will be worse if the guest is not idle, or the guest has more RAM.
> Do you want more data?
> 
> Is it worth to do that?
> 
> Liang

Either my math is wrong or there's an implementation bug.

> > >
> > > > 	2. limit allocated bitmap size to something reasonable.
> > > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > > 	   out to 1Giga bytes of memory in the balloon.
> > >
> > > So, even the VM has 1TB of RAM, the page bitmap will take 32MB of
> > memory.
> > > Maybe it's better to use a big page bitmap the save the pages
> > > allocated by balloon, and split the big page bitmap to 32K bytes unit, then
> > transfer one unit at a time.
> > 
> > How is this different from what I said?
> > 
> > >
> > > Should we use a page bitmap to replace 'vb->pages' ?
> > >
> > > How about rolling back to use PFNs if the count of requested pages is a
> > small number?
> > >
> > > Liang
> > 
> > That's why we have start pfn. you can use that to pass even a single page
> > without a lot of overhead.
> > 
> > > > > --
> > > > > 1.9.1
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > > the body of a message to majordomo@vger.kernel.org More majordomo
> > > > info at http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25  8:57         ` Michael S. Tsirkin
@ 2016-05-25  9:28           ` Li, Liang Z
  2016-05-25  9:40             ` Michael S. Tsirkin
  0 siblings, 1 reply; 23+ messages in thread
From: Li, Liang Z @ 2016-05-25  9:28 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> On Wed, May 25, 2016 at 08:48:17AM +0000, Li, Liang Z wrote:
> > > > > Suggestion to address all above comments:
> > > > > 	1. allocate a bunch of pages and link them up,
> > > > > 	   calculating the min and the max pfn.
> > > > > 	   if max-min exceeds the allocated bitmap size,
> > > > > 	   tell host.
> > > >
> > > > I am not sure if it works well in some cases, e.g. The allocated
> > > > pages are across a wide range and the max-min > limit is very
> > > > frequently to be
> > > true.
> > > > Then, there will be many times of virtio transmission and it's bad
> > > > for performance improvement. Right?
> > >
> > > It's a tradeoff for sure. Measure it, see what the overhead is.
> > >
> >
> > Hi MST,
> >
> > I have measured the performance when using a 32K page bitmap,
> 
> Just to make sure. Do you mean a 32Kbyte bitmap?
> Covering 1Gbyte of memory?
Yes.

> 
> > and inflate the balloon to 3GB
> > of an idle guest with 4GB RAM.
> 
> Should take 3 requests then, right?
> 

No,  we can't assign the PFN when allocating page in balloon driver,
So the PFNs of pages allocated may be across a large range,  we will
tell the host once the pfn_max -pfn_min >= 0x40000(1GB range),
so the requests count is most likely to be more than 3. 


> > Now:
> > total inflating time: 338ms
> > the count of virtio data transmission:  373
> 
> Why was this so high? I would expect 3 transmissions.

I follow your suggestion:
------------------------------------------------------------------------------------
Suggestion to address all above comments:
	1. allocate a bunch of pages and link them up,
	   calculating the min and the max pfn.
	   if max-min exceeds the allocated bitmap size,
	   tell host.
	2. limit allocated bitmap size to something reasonable.
	   How about 32Kbytes? This is 256kilo bit in the map, which comes
	   out to 1Giga bytes of memory in the balloon.
-------------------------------------------------------------------------------------
Because the PFNs of the allocated pages are not linear increased, so 3 transmissions
are  impossible.


Liang

> 
> > the call count of madvise: 865
> >
> > before:
> > total inflating time: 175ms
> > the count of virtio data transmission: 1 the call count of madvise: 42
> >
> > Maybe the result will be worse if the guest is not idle, or the guest has
> more RAM.
> > Do you want more data?
> >
> > Is it worth to do that?
> >
> > Liang
> 
> Either my math is wrong or there's an implementation bug.
> 
> > > >
> > > > > 	2. limit allocated bitmap size to something reasonable.
> > > > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > > > 	   out to 1Giga bytes of memory in the balloon.
> > > >
> > > > So, even the VM has 1TB of RAM, the page bitmap will take 32MB of
> > > memory.
> > > > Maybe it's better to use a big page bitmap the save the pages
> > > > allocated by balloon, and split the big page bitmap to 32K bytes
> > > > unit, then
> > > transfer one unit at a time.
> > >
> > > How is this different from what I said?
> > >
> > > >
> > > > Should we use a page bitmap to replace 'vb->pages' ?
> > > >
> > > > How about rolling back to use PFNs if the count of requested pages
> > > > is a
> > > small number?
> > > >
> > > > Liang
> > >
> > > That's why we have start pfn. you can use that to pass even a single
> > > page without a lot of overhead.
> > >
> > > > > > --
> > > > > > 1.9.1
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe kvm"
> > > > > in the body of a message to majordomo@vger.kernel.org More
> > > > > majordomo info at http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in the body of
> a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25  9:28           ` Li, Liang Z
@ 2016-05-25  9:40             ` Michael S. Tsirkin
  2016-05-25 10:10               ` Li, Liang Z
  0 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-25  9:40 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Wed, May 25, 2016 at 09:28:58AM +0000, Li, Liang Z wrote:
> > On Wed, May 25, 2016 at 08:48:17AM +0000, Li, Liang Z wrote:
> > > > > > Suggestion to address all above comments:
> > > > > > 	1. allocate a bunch of pages and link them up,
> > > > > > 	   calculating the min and the max pfn.
> > > > > > 	   if max-min exceeds the allocated bitmap size,
> > > > > > 	   tell host.
> > > > >
> > > > > I am not sure if it works well in some cases, e.g. The allocated
> > > > > pages are across a wide range and the max-min > limit is very
> > > > > frequently to be
> > > > true.
> > > > > Then, there will be many times of virtio transmission and it's bad
> > > > > for performance improvement. Right?
> > > >
> > > > It's a tradeoff for sure. Measure it, see what the overhead is.
> > > >
> > >
> > > Hi MST,
> > >
> > > I have measured the performance when using a 32K page bitmap,
> > 
> > Just to make sure. Do you mean a 32Kbyte bitmap?
> > Covering 1Gbyte of memory?
> Yes.
> 
> > 
> > > and inflate the balloon to 3GB
> > > of an idle guest with 4GB RAM.
> > 
> > Should take 3 requests then, right?
> > 
> 
> No,  we can't assign the PFN when allocating page in balloon driver,
> So the PFNs of pages allocated may be across a large range,  we will
> tell the host once the pfn_max -pfn_min >= 0x40000(1GB range),
> so the requests count is most likely to be more than 3. 
> 
> > > Now:
> > > total inflating time: 338ms
> > > the count of virtio data transmission:  373
> > 
> > Why was this so high? I would expect 3 transmissions.
> 
> I follow your suggestion:
> ------------------------------------------------------------------------------------
> Suggestion to address all above comments:
> 	1. allocate a bunch of pages and link them up,
> 	   calculating the min and the max pfn.
> 	   if max-min exceeds the allocated bitmap size,
> 	   tell host.
> 	2. limit allocated bitmap size to something reasonable.
> 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> 	   out to 1Giga bytes of memory in the balloon.
> -------------------------------------------------------------------------------------
> Because the PFNs of the allocated pages are not linear increased, so 3 transmissions
> are  impossible.
> 
> 
> Liang

Interesting. How about instead of tell host, we do multiple scans, each
time ignoring pages out of range?

	for (pfn = min pfn; pfn < max pfn; pfn += 1G) {
		foreach page
			if page pfn < pfn || page pfn >= pfn + 1G
				continue
			set bit
		tell host
	}

> 
> > 
> > > the call count of madvise: 865
> > >
> > > before:
> > > total inflating time: 175ms
> > > the count of virtio data transmission: 1 the call count of madvise: 42
> > >
> > > Maybe the result will be worse if the guest is not idle, or the guest has
> > more RAM.
> > > Do you want more data?
> > >
> > > Is it worth to do that?
> > >
> > > Liang
> > 
> > Either my math is wrong or there's an implementation bug.
> > 
> > > > >
> > > > > > 	2. limit allocated bitmap size to something reasonable.
> > > > > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > > > > 	   out to 1Giga bytes of memory in the balloon.
> > > > >
> > > > > So, even the VM has 1TB of RAM, the page bitmap will take 32MB of
> > > > memory.
> > > > > Maybe it's better to use a big page bitmap the save the pages
> > > > > allocated by balloon, and split the big page bitmap to 32K bytes
> > > > > unit, then
> > > > transfer one unit at a time.
> > > >
> > > > How is this different from what I said?
> > > >
> > > > >
> > > > > Should we use a page bitmap to replace 'vb->pages' ?
> > > > >
> > > > > How about rolling back to use PFNs if the count of requested pages
> > > > > is a
> > > > small number?
> > > > >
> > > > > Liang
> > > >
> > > > That's why we have start pfn. you can use that to pass even a single
> > > > page without a lot of overhead.
> > > >
> > > > > > > --
> > > > > > > 1.9.1
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe kvm"
> > > > > > in the body of a message to majordomo@vger.kernel.org More
> > > > > > majordomo info at http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in the body of
> > a message to majordomo@vger.kernel.org More majordomo info at
> > http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25  9:40             ` Michael S. Tsirkin
@ 2016-05-25 10:10               ` Li, Liang Z
  2016-05-25 10:37                 ` Michael S. Tsirkin
  0 siblings, 1 reply; 23+ messages in thread
From: Li, Liang Z @ 2016-05-25 10:10 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > > >
> > > > Hi MST,
> > > >
> > > > I have measured the performance when using a 32K page bitmap,
> > >
> > > Just to make sure. Do you mean a 32Kbyte bitmap?
> > > Covering 1Gbyte of memory?
> > Yes.
> >
> > >
> > > > and inflate the balloon to 3GB
> > > > of an idle guest with 4GB RAM.
> > >
> > > Should take 3 requests then, right?
> > >
> >
> > No,  we can't assign the PFN when allocating page in balloon driver,
> > So the PFNs of pages allocated may be across a large range,  we will
> > tell the host once the pfn_max -pfn_min >= 0x40000(1GB range), so the
> > requests count is most likely to be more than 3.
> >
> > > > Now:
> > > > total inflating time: 338ms
> > > > the count of virtio data transmission:  373
> > >
> > > Why was this so high? I would expect 3 transmissions.
> >
> > I follow your suggestion:
> > ----------------------------------------------------------------------
> > -------------- Suggestion to address all above comments:
> > 	1. allocate a bunch of pages and link them up,
> > 	   calculating the min and the max pfn.
> > 	   if max-min exceeds the allocated bitmap size,
> > 	   tell host.
> > 	2. limit allocated bitmap size to something reasonable.
> > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > 	   out to 1Giga bytes of memory in the balloon.
> > ----------------------------------------------------------------------
> > --------------- Because the PFNs of the allocated pages are not linear
> > increased, so 3 transmissions are  impossible.
> >
> >
> > Liang
> 
> Interesting. How about instead of tell host, we do multiple scans, each time
> ignoring pages out of range?
> 
> 	for (pfn = min pfn; pfn < max pfn; pfn += 1G) {
> 		foreach page
> 			if page pfn < pfn || page pfn >= pfn + 1G
> 				continue
> 			set bit
> 		tell host
> 	}
> 

That means we have to allocate/free all the requested pages first, and then tell the host.
It works fine for inflating, but for deflating, because the page has been deleted from the vb-> vb_dev_info->pages,
so, we have to use a struct to save the dequeued pages before calling release_pages_balloon(),
 I think a page bitmap is the best struct to save these pages, because it consumes less memory.
And that bitmap should be large enough to save pfn 0 to max_pfn.

If the above is true, then we are back to the square one. we really need a large page bitmap. Right?

Liang

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25 10:10               ` Li, Liang Z
@ 2016-05-25 10:37                 ` Michael S. Tsirkin
  2016-05-25 14:29                   ` Li, Liang Z
  0 siblings, 1 reply; 23+ messages in thread
From: Michael S. Tsirkin @ 2016-05-25 10:37 UTC (permalink / raw)
  To: Li, Liang Z
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

On Wed, May 25, 2016 at 10:10:47AM +0000, Li, Liang Z wrote:
> > > > >
> > > > > Hi MST,
> > > > >
> > > > > I have measured the performance when using a 32K page bitmap,
> > > >
> > > > Just to make sure. Do you mean a 32Kbyte bitmap?
> > > > Covering 1Gbyte of memory?
> > > Yes.
> > >
> > > >
> > > > > and inflate the balloon to 3GB
> > > > > of an idle guest with 4GB RAM.
> > > >
> > > > Should take 3 requests then, right?
> > > >
> > >
> > > No,  we can't assign the PFN when allocating page in balloon driver,
> > > So the PFNs of pages allocated may be across a large range,  we will
> > > tell the host once the pfn_max -pfn_min >= 0x40000(1GB range), so the
> > > requests count is most likely to be more than 3.
> > >
> > > > > Now:
> > > > > total inflating time: 338ms
> > > > > the count of virtio data transmission:  373
> > > >
> > > > Why was this so high? I would expect 3 transmissions.
> > >
> > > I follow your suggestion:
> > > ----------------------------------------------------------------------
> > > -------------- Suggestion to address all above comments:
> > > 	1. allocate a bunch of pages and link them up,
> > > 	   calculating the min and the max pfn.
> > > 	   if max-min exceeds the allocated bitmap size,
> > > 	   tell host.
> > > 	2. limit allocated bitmap size to something reasonable.
> > > 	   How about 32Kbytes? This is 256kilo bit in the map, which comes
> > > 	   out to 1Giga bytes of memory in the balloon.
> > > ----------------------------------------------------------------------
> > > --------------- Because the PFNs of the allocated pages are not linear
> > > increased, so 3 transmissions are  impossible.
> > >
> > >
> > > Liang
> > 
> > Interesting. How about instead of tell host, we do multiple scans, each time
> > ignoring pages out of range?
> > 
> > 	for (pfn = min pfn; pfn < max pfn; pfn += 1G) {
> > 		foreach page
> > 			if page pfn < pfn || page pfn >= pfn + 1G
> > 				continue
> > 			set bit
> > 		tell host
> > 	}
> > 
> 
> That means we have to allocate/free all the requested pages first, and then tell the host.
> It works fine for inflating, but for deflating, because the page has been deleted from the vb-> vb_dev_info->pages,
> so, we have to use a struct to save the dequeued pages before calling release_pages_balloon(),

struct list_head?  I think you can just replace set_page_pfns with
        list_add(&page->lru, &page_list);



>  I think a page bitmap is the best struct to save these pages, because it consumes less memory.
> And that bitmap should be large enough to save pfn 0 to max_pfn.
> 
> If the above is true, then we are back to the square one. we really need a large page bitmap. Right?
> 
> Liang

These look like implementation issues to me.

I think the below might be helpful (completely untested),
your work can go on top.

--->

virtio-balloon: rework deflate to add page to a tmp list

Will allow faster notifications using a bitmap down the road.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 476c0e3..44050a3 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -195,8 +195,9 @@ static void release_pages_balloon(struct virtio_balloon *vb)
 static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 {
 	unsigned num_freed_pages;
-	struct page *page;
+	struct page *page, *next;
 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
+	LIST_HEAD(pages);		/* Pages dequeued for handing to Host */
 
 	/* We can only do one array worth at a time. */
 	num = min(num, ARRAY_SIZE(vb->pfns));
@@ -207,10 +208,13 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 		page = balloon_page_dequeue(vb_dev_info);
 		if (!page)
 			break;
-		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+		list_add(&page->lru, &pages);
 		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
 	}
 
+	list_for_each_entry_safe(page, next, &pages, lru)
+		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+
 	num_freed_pages = vb->num_pfns;
 	/*
 	 * Note that if
-- 
MST

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* RE: [PATCH RFC kernel] balloon: speed up inflating/deflating process
  2016-05-25 10:37                 ` Michael S. Tsirkin
@ 2016-05-25 14:29                   ` Li, Liang Z
  0 siblings, 0 replies; 23+ messages in thread
From: Li, Liang Z @ 2016-05-25 14:29 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, qemu-devel, virtualization, akpm, pbonzini,
	dgilbert, amit.shah, kvm

> > > Interesting. How about instead of tell host, we do multiple scans,
> > > each time ignoring pages out of range?
> > >
> > > 	for (pfn = min pfn; pfn < max pfn; pfn += 1G) {
> > > 		foreach page
> > > 			if page pfn < pfn || page pfn >= pfn + 1G
> > > 				continue
> > > 			set bit
> > > 		tell host
> > > 	}
> > >
> >
> > That means we have to allocate/free all the requested pages first, and then
> tell the host.
> > It works fine for inflating, but for deflating, because the page has
> > been deleted from the vb-> vb_dev_info->pages, so, we have to use a
> > struct to save the dequeued pages before calling
> > release_pages_balloon(),
> 
> struct list_head?  I think you can just replace set_page_pfns with
>         list_add(&page->lru, &page_list);
> 
> 

That's is fine, I will retry and get back to you.

Thanks!

Liang
> 
> >  I think a page bitmap is the best struct to save these pages, because it
> consumes less memory.
> > And that bitmap should be large enough to save pfn 0 to max_pfn.
> >
> > If the above is true, then we are back to the square one. we really need a
> large page bitmap. Right?
> >
> > Liang
> 
> These look like implementation issues to me.
> 
> I think the below might be helpful (completely untested), your work can go
> on top.
> 
> --->
> 
> virtio-balloon: rework deflate to add page to a tmp list
> 
> Will allow faster notifications using a bitmap down the road.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> 
> ---
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 476c0e3..44050a3 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -195,8 +195,9 @@ static void release_pages_balloon(struct
> virtio_balloon *vb)  static unsigned leak_balloon(struct virtio_balloon *vb,
> size_t num)  {
>  	unsigned num_freed_pages;
> -	struct page *page;
> +	struct page *page, *next;
>  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> +	LIST_HEAD(pages);		/* Pages dequeued for handing to
> Host */
> 
>  	/* We can only do one array worth at a time. */
>  	num = min(num, ARRAY_SIZE(vb->pfns));
> @@ -207,10 +208,13 @@ static unsigned leak_balloon(struct virtio_balloon
> *vb, size_t num)
>  		page = balloon_page_dequeue(vb_dev_info);
>  		if (!page)
>  			break;
> -		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> +		list_add(&page->lru, &pages);
>  		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
>  	}
> 
> +	list_for_each_entry_safe(page, next, &pages, lru)
> +		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> +
>  	num_freed_pages = vb->num_pfns;
>  	/*
>  	 * Note that if
> --
> MST

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2016-05-25 14:30 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-20  9:59 [PATCH RFC kernel] balloon: speed up inflating/deflating process Liang Li
2016-05-20 10:32 ` Cornelia Huck
2016-05-24  7:48   ` Li, Liang Z
2016-05-20 11:19 ` Paolo Bonzini
2016-05-24  7:51   ` Li, Liang Z
2016-05-20 12:00 ` Michael S. Tsirkin
2016-05-24  9:51   ` Li, Liang Z
2016-05-24  9:55     ` Li, Liang Z
2016-05-24 10:08     ` Michael S. Tsirkin
2016-05-24 10:38       ` Li, Liang Z
2016-05-24 11:11         ` Michael S. Tsirkin
2016-05-24 14:36           ` Li, Liang Z
2016-05-24 15:12             ` Michael S. Tsirkin
2016-05-25  0:52               ` Li, Liang Z
2016-05-25  1:00               ` Li, Liang Z
2016-05-25  8:35                 ` Michael S. Tsirkin
2016-05-25  8:48       ` Li, Liang Z
2016-05-25  8:57         ` Michael S. Tsirkin
2016-05-25  9:28           ` Li, Liang Z
2016-05-25  9:40             ` Michael S. Tsirkin
2016-05-25 10:10               ` Li, Liang Z
2016-05-25 10:37                 ` Michael S. Tsirkin
2016-05-25 14:29                   ` Li, Liang Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).