KVM Archive on lore.kernel.org
 help / color / Atom feed
From: Alexander Duyck <alexander.duyck@gmail.com>
To: nitesh@redhat.com, kvm@vger.kernel.org, david@redhat.com,
	mst@redhat.com, dave.hansen@intel.com,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	akpm@linux-foundation.org
Cc: yang.zhang.wz@gmail.com, pagupta@redhat.com, riel@surriel.com,
	konrad.wilk@oracle.com, lcapitulino@redhat.com,
	wei.w.wang@intel.com, aarcange@redhat.com, pbonzini@redhat.com,
	dan.j.williams@intel.com, alexander.h.duyck@linux.intel.com
Subject: [PATCH v1 QEMU] QEMU: Provide a interface for hinting based off of the balloon infrastructure
Date: Wed, 19 Jun 2019 15:37:13 -0700
Message-ID: <20190619223535.1403.32612.stgit@localhost.localdomain> (raw)
In-Reply-To: <20190619222922.1231.27432.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

Add support for what I am referring to as "bubble hinting". Basically the
idea is to function very similar to how the balloon works in that we
basically end up madvising the page as not being used. However we don't
really need to bother with any deflate type logic since the page will be
faulted back into the guest when it is read or written to.

This is meant to be a simplification of the existing balloon interface
to use for providing hints to what memory needs to be freed. I am assuming
this is safe to do as the deflate logic does not actually appear to do very
much other than tracking what subpages have been released and which ones
haven't.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 hw/virtio/trace-events                          |    1 
 hw/virtio/virtio-balloon.c                      |   73 +++++++++++++++++++++++
 include/hw/virtio/virtio-balloon.h              |    2 -
 include/standard-headers/linux/virtio_balloon.h |    1 
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index e28ba48da621..b56daf460769 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -46,6 +46,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
 virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
 virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
 virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: 0x%"PRIx64" num_pages: %d"
+virtio_bubble_handle_output(const char *name, uint64_t gpa, uint64_t size) "section name: %s gpa: 0x%" PRIx64 " size: %" PRIx64
 
 # virtio-mmio.c
 virtio_mmio_read(uint64_t offset) "virtio_mmio_read offset 0x%" PRIx64
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 2112874055fb..93ee165d2db2 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -328,6 +328,75 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
     balloon_stats_change_timer(s, 0);
 }
 
+static void bubble_inflate_page(VirtIOBalloon *balloon,
+                                MemoryRegion *mr, hwaddr offset, size_t size)
+{
+    void *addr = memory_region_get_ram_ptr(mr) + offset;
+    ram_addr_t ram_offset;
+    size_t rb_page_size;
+    RAMBlock *rb;
+
+    rb = qemu_ram_block_from_host(addr, false, &ram_offset);
+    rb_page_size = qemu_ram_pagesize(rb);
+
+    /* For now we will simply ignore unaligned memory regions */
+    if ((ram_offset | size) & (rb_page_size - 1))
+        return;
+
+    ram_block_discard_range(rb, ram_offset, size);
+}
+
+static void virtio_bubble_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    VirtQueueElement *elem;
+    MemoryRegionSection section;
+
+    for (;;) {
+        size_t offset = 0;
+	uint64_t pa_order;
+
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            return;
+        }
+
+        while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pa_order, 8) == 8) {
+            hwaddr pa = virtio_ldq_p(vdev, &pa_order);
+            size_t size = 1ul << (VIRTIO_BALLOON_PFN_SHIFT + (pa & 0xFF));
+
+            pa -= pa & 0xFF;
+            offset += 8;
+
+            if (qemu_balloon_is_inhibited())
+                continue;
+
+            section = memory_region_find(get_system_memory(), pa, size);
+            if (!section.mr) {
+                trace_virtio_balloon_bad_addr(pa);
+                continue;
+            }
+
+            if (!memory_region_is_ram(section.mr) ||
+                memory_region_is_rom(section.mr) ||
+                memory_region_is_romd(section.mr)) {
+                trace_virtio_balloon_bad_addr(pa);
+            } else {
+                trace_virtio_bubble_handle_output(memory_region_name(section.mr),
+                                                  pa, size);
+                bubble_inflate_page(s, section.mr,
+                                    section.offset_within_region, size);
+            }
+
+            memory_region_unref(section.mr);
+        }
+
+        virtqueue_push(vq, elem, offset);
+        virtio_notify(vdev, vq);
+        g_free(elem);
+    }
+}
+
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -694,6 +763,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+    virtio_add_feature(&f, VIRTIO_BALLOON_F_HINTING);
 
     return f;
 }
@@ -780,6 +850,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
+    s->hvq = virtio_add_queue(vdev, 128, virtio_bubble_handle_output);
 
     if (virtio_has_feature(s->host_features,
                            VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
@@ -875,6 +946,8 @@ static void virtio_balloon_instance_init(Object *obj)
 
     object_property_add(obj, "guest-stats", "guest statistics",
                         balloon_stats_get_all, NULL, NULL, s, NULL);
+    object_property_add(obj, "guest-page-hinting", "guest page hinting",
+                        NULL, NULL, NULL, s, NULL);
 
     object_property_add(obj, "guest-stats-polling-interval", "int",
                         balloon_stats_get_poll_interval,
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 1afafb12f6bc..dd6d4d0e45fd 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -44,7 +44,7 @@ enum virtio_balloon_free_page_report_status {
 
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
-    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    VirtQueue *ivq, *dvq, *svq, *hvq, *free_page_vq;
     uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 9375ca2a70de..f9e3e8256261 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -36,6 +36,7 @@
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
 #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
+#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12


  parent reply index

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-19 22:32 [PATCH v1 0/6] mm / virtio: Provide support for paravirtual waste page treatment Alexander Duyck
2019-06-19 22:33 ` [PATCH v1 1/6] mm: Adjust shuffle code to allow for future coalescing Alexander Duyck
2019-06-25  7:55   ` David Hildenbrand
2019-06-28 19:49     ` Alexander Duyck
2019-06-25 18:25   ` Dave Hansen
2019-06-25 18:26   ` Dave Hansen
2019-06-19 22:33 ` [PATCH v1 2/6] mm: Move set/get_pcppage_migratetype to mmzone.h Alexander Duyck
2019-06-25 18:28   ` Dave Hansen
2019-06-28 19:55     ` Alexander Duyck
2019-06-19 22:33 ` [PATCH v1 3/6] mm: Use zone and order instead of free area in free_list manipulators Alexander Duyck
2019-06-25 18:36   ` Dave Hansen
2019-06-19 22:33 ` [PATCH v1 4/6] mm: Introduce "aerated" pages Alexander Duyck
2019-06-25 19:45   ` Dave Hansen
2019-07-08 17:32     ` Alexander Duyck
2019-06-19 22:33 ` [PATCH v1 5/6] mm: Add logic for separating "aerated" pages from "raw" pages Alexander Duyck
2019-06-25 20:24   ` Dave Hansen
2019-07-08 19:02     ` Alexander Duyck
2019-07-08 19:36       ` Dave Hansen
2019-07-08 22:02         ` Alexander Duyck
2019-06-19 22:33 ` [PATCH v1 6/6] virtio-balloon: Add support for aerating memory via hinting Alexander Duyck
2019-07-16  9:55   ` Michael S. Tsirkin
2019-07-16 14:00     ` Dave Hansen
2019-07-16 14:12       ` David Hildenbrand
2019-07-16 14:17         ` David Hildenbrand
2019-07-16 15:04           ` Michael S. Tsirkin
2019-07-16 14:41         ` Dave Hansen
2019-07-16 15:01           ` Wang, Wei W
2019-07-16 16:12             ` Michael S. Tsirkin
2019-07-16 15:02           ` David Hildenbrand
2019-07-16 15:37     ` Alexander Duyck
2019-07-16 16:07       ` Michael S. Tsirkin
2019-07-16 16:54         ` Alexander Duyck
2019-07-16 17:41           ` Michael S. Tsirkin
2019-07-16 21:06             ` Alexander Duyck
2019-07-17 10:28               ` Michael S. Tsirkin
2019-07-17 16:43                 ` Alexander Duyck
2019-07-18  5:13                   ` Michael S. Tsirkin
2019-07-18 15:34                     ` Alexander Duyck
2019-07-18 16:03                       ` Nitesh Narayan Lal
2019-07-18 20:27                         ` Michael S. Tsirkin
2019-07-18 16:07                       ` Michael S. Tsirkin
2019-07-18 20:29                         ` Alexander Duyck
2019-07-18 20:37                           ` Michael S. Tsirkin
2019-07-18 20:54                             ` Alexander Duyck
2019-07-18 20:24                       ` Michael S. Tsirkin
2019-07-18 20:34                         ` Alexander Duyck
2019-07-18 20:48                           ` Michael S. Tsirkin
2019-07-18 21:09                             ` Alexander Duyck
2019-06-19 22:37 ` Alexander Duyck [this message]
2019-06-25  7:42 ` [PATCH v1 0/6] mm / virtio: Provide support for paravirtual waste page treatment David Hildenbrand
2019-06-25 14:10   ` Dave Hansen
2019-06-25 17:00     ` Alexander Duyck
2019-06-25 18:12       ` David Hildenbrand
2019-06-25 18:22       ` Dave Hansen
2019-07-15  9:41         ` David Hildenbrand
2019-07-15 14:57           ` Alexander Duyck
2019-06-25 16:09   ` Alexander Duyck
2019-06-26  9:01   ` Christophe de Dinechin
2019-06-26  9:12     ` David Hildenbrand

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190619223535.1403.32612.stgit@localhost.localdomain \
    --to=alexander.duyck@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.h.duyck@linux.intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=david@redhat.com \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=lcapitulino@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mst@redhat.com \
    --cc=nitesh@redhat.com \
    --cc=pagupta@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=riel@surriel.com \
    --cc=wei.w.wang@intel.com \
    --cc=yang.zhang.wz@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

KVM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/kvm/0 kvm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 kvm kvm/ https://lore.kernel.org/kvm \
		kvm@vger.kernel.org
	public-inbox-index kvm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.kvm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git