LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Alexander Duyck <alexander.duyck@gmail.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: rkrcmar@redhat.com, alexander.h.duyck@linux.intel.com,
	x86@kernel.org, mingo@redhat.com, bp@alien8.de, hpa@zytor.com,
	pbonzini@redhat.com, tglx@linutronix.de,
	akpm@linux-foundation.org
Subject: [RFC PATCH 2/4] kvm: Add host side support for free memory hints
Date: Mon, 04 Feb 2019 10:15:46 -0800
Message-ID: <20190204181546.12095.81356.stgit@localhost.localdomain> (raw)
In-Reply-To: <20190204181118.12095.38300.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

Add the host side of the KVM memory hinting support. With this we expose a
feature bit indicating that the host will pass the messages along to the
new madvise function.

This functionality is mutually exclusive with device assignment. If a
device is assigned we will disable the functionality as it could lead to a
potential memory corruption if a device writes to a page after KVM has
flagged it as not being used.

The logic as it is currently defined limits the hint to only supporting a
hugepage or larger notifications. This is meant to help prevent us from
potentially breaking up huge pages by hinting that only a portion of the
page is not needed.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 Documentation/virtual/kvm/cpuid.txt      |    4 +++
 Documentation/virtual/kvm/hypercalls.txt |   14 ++++++++++++
 arch/x86/include/uapi/asm/kvm_para.h     |    3 +++
 arch/x86/kvm/cpuid.c                     |    6 ++++-
 arch/x86/kvm/x86.c                       |   35 ++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h            |    1 +
 6 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 97ca1940a0dc..fe3395a58b7e 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
                                    ||       || before using paravirtualized
                                    ||       || send IPIs.
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_UNUSED_PAGE_HINT    ||    12 || guest checks this feature bit
+                                   ||       || before using paravirtualized
+                                   ||       || unused page hints.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index da24c138c8d1..b374678ac1f9 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -141,3 +141,17 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1
 corresponds to the APIC ID a2+1, and so on.
 
 Returns the number of CPUs to which the IPIs were delivered successfully.
+
+7. KVM_HC_UNUSED_PAGE_HINT
+------------------------
+Architecture: x86
+Status: active
+Purpose: Send unused page hint to host
+
+a0: physical address of region unused, page aligned
+a1: size of unused region, page aligned
+
+The hypercall lets a guest send notifications to the host that it will no
+longer be using a given page in memory. Multiple pages can be hinted at by
+using the size field to hint that a higher order page is available by
+specifying the higher order page size.
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 19980ec1a316..f066c23060df 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,6 +29,7 @@
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
 #define KVM_FEATURE_PV_SEND_IPI	11
+#define KVM_FEATURE_PV_UNUSED_PAGE_HINT	12
 
 #define KVM_HINTS_REALTIME      0
 
@@ -119,4 +120,6 @@ struct kvm_vcpu_pv_apf_data {
 #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
 #define KVM_PV_EOI_DISABLED 0x0
 
+#define KVM_PV_UNUSED_PAGE_HINT_MIN_ORDER	HUGETLB_PAGE_ORDER
+
 #endif /* _UAPI_ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbffa6c54697..b82bcbfbc420 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -136,6 +136,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
 		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
 		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+	if (kvm_arch_has_assigned_device(vcpu->kvm) && best &&
+		(best->eax & KVM_FEATURE_PV_UNUSED_PAGE_HINT))
+		best->eax &= ~(1 << KVM_FEATURE_PV_UNUSED_PAGE_HINT);
 
 	/* Update physical-address width */
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -637,7 +640,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_PV_UNHALT) |
 			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
 			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
-			     (1 << KVM_FEATURE_PV_SEND_IPI);
+			     (1 << KVM_FEATURE_PV_SEND_IPI) |
+			     (1 << KVM_FEATURE_PV_UNUSED_PAGE_HINT);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d27206f6c01..3ec75ab849e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -55,6 +55,7 @@
 #include <linux/irqbypass.h>
 #include <linux/sched/stat.h>
 #include <linux/mem_encrypt.h>
+#include <linux/mm.h>
 
 #include <trace/events/kvm.h>
 
@@ -7052,6 +7053,37 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
 }
 
+static int kvm_pv_unused_page_hint_op(struct kvm *kvm, gpa_t gpa, size_t len)
+{
+	unsigned long start;
+
+	/*
+	 * Guarantee the following:
+	 *	len meets minimum size
+	 *	len is a power of 2
+	 *	gpa is aligned to len
+	 */
+	if (len < (PAGE_SIZE << KVM_PV_UNUSED_PAGE_HINT_MIN_ORDER))
+		return -KVM_EINVAL;
+	if (!is_power_of_2(len) || !IS_ALIGNED(gpa, len))
+		return -KVM_EINVAL;
+
+	/*
+	 * If a device is assigned we cannot use use madvise as memory
+	 * is shared with the device and could lead to memory corruption
+	 * if the device writes to it after free.
+	 */
+	if (kvm_arch_has_assigned_device(kvm))
+		return -KVM_EOPNOTSUPP;
+
+	start = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	if (kvm_is_error_hva(start + len))
+		return -KVM_EFAULT;
+
+	return do_madvise_dontneed(start, len);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -7098,6 +7130,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_SEND_IPI:
 		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
 		break;
+	case KVM_HC_UNUSED_PAGE_HINT:
+		ret = kvm_pv_unused_page_hint_op(vcpu->kvm, a0, a1);
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 6c0ce49931e5..75643b862a4e 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -28,6 +28,7 @@
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 #define KVM_HC_CLOCK_PAIRING		9
 #define KVM_HC_SEND_IPI		10
+#define KVM_HC_UNUSED_PAGE_HINT		11
 
 /*
  * hypercalls use architecture specific


  parent reply index

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04 18:15 [RFC PATCH 0/4] kvm: Report unused guest pages to host Alexander Duyck
2019-02-04 18:15 ` [RFC PATCH 1/4] madvise: Expose ability to set dontneed from kernel Alexander Duyck
2019-02-04 18:15 ` Alexander Duyck [this message]
2019-02-10  0:44   ` [RFC PATCH 2/4] kvm: Add host side support for free memory hints Michael S. Tsirkin
2019-02-11 17:34     ` Alexander Duyck
2019-02-11 17:36       ` Michael S. Tsirkin
2019-02-11 17:41     ` Dave Hansen
2019-02-11 17:48       ` Michael S. Tsirkin
2019-02-11 18:30         ` Alexander Duyck
2019-02-11 19:24           ` Michael S. Tsirkin
2019-02-04 18:15 ` [RFC PATCH 3/4] kvm: Add guest " Alexander Duyck
2019-02-04 19:44   ` Dave Hansen
2019-02-04 20:42     ` Alexander Duyck
2019-02-04 23:00   ` Nadav Amit
2019-02-04 23:37     ` Alexander Duyck
2019-02-05  0:03       ` Nadav Amit
2019-02-05  0:16         ` Alexander Duyck
2019-02-05  1:46           ` Nadav Amit
2019-02-05 18:09             ` Alexander Duyck
2019-02-07 18:21   ` Luiz Capitulino
2019-02-07 18:44     ` Alexander Duyck
2019-02-07 20:02       ` Luiz Capitulino
2019-02-08 21:05       ` Nitesh Narayan Lal
2019-02-08 21:31         ` Alexander Duyck
2019-02-10  0:49   ` Michael S. Tsirkin
2019-02-11 16:31     ` Alexander Duyck
2019-02-11 17:36       ` Michael S. Tsirkin
2019-02-11 18:10         ` Alexander Duyck
2019-02-11 19:54           ` Michael S. Tsirkin
2019-02-11 21:00             ` Alexander Duyck
2019-02-11 22:52               ` Michael S. Tsirkin
     [not found]                 ` <94462313ccd927d25675f69de459456cf066c1a2.camel@linux.intel.com>
2019-02-12  0:34                   ` Michael S. Tsirkin
2019-02-11 17:48     ` Dave Hansen
2019-02-11 17:58       ` Michael S. Tsirkin
2019-02-11 18:19         ` Dave Hansen
2019-02-11 19:56           ` Michael S. Tsirkin
2019-02-04 18:15 ` [RFC PATCH 4/4] mm: Add merge page notifier Alexander Duyck
2019-02-04 19:40   ` Dave Hansen
2019-02-04 19:51     ` Alexander Duyck
2019-02-10  0:57   ` Michael S. Tsirkin
2019-02-11 13:30     ` Nitesh Narayan Lal
2019-02-11 14:17       ` Michael S. Tsirkin
2019-02-11 16:24         ` Nitesh Narayan Lal
2019-02-11 17:41           ` Michael S. Tsirkin
2019-02-11 18:09             ` Nitesh Narayan Lal
2019-02-11  6:40   ` Aaron Lu
2019-02-11 15:58     ` Alexander Duyck
2019-02-12  2:09       ` Aaron Lu
2019-02-12 17:20         ` Alexander Duyck
2019-02-04 18:19 ` [RFC PATCH QEMU] i386/kvm: Enable paravirtual unused page hint mechanism Alexander Duyck
2019-02-05 17:25 ` [RFC PATCH 0/4] kvm: Report unused guest pages to host Nitesh Narayan Lal
2019-02-05 18:43   ` Alexander Duyck
2019-02-07 14:48 ` Nitesh Narayan Lal
2019-02-07 16:56   ` Alexander Duyck
2019-02-10  0:51 ` Michael S. Tsirkin

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190204181546.12095.81356.stgit@localhost.localdomain \
    --to=alexander.duyck@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.h.duyck@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox