kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] kvm: x86: mmu: Make NX huge page recovery period configurable
@ 2021-10-19  1:39 Junaid Shahid
  2021-10-19 23:48 ` Ben Gardon
  0 siblings, 1 reply; 4+ messages in thread
From: Junaid Shahid @ 2021-10-19  1:39 UTC (permalink / raw)
  To: kvm, pbonzini; +Cc: jmattson, seanjc, bgardon, dmatlack

v2:
 - A configured period of 0 now automatically derives the actual period
   from the ratio.

Currently, the NX huge page recovery thread wakes up every minute and
zaps 1/nx_huge_pages_recovery_ratio of the total number of split NX
huge pages at a time. This is intended to ensure that only a
relatively small number of pages get zapped at a time. But for very
large VMs (or more specifically, VMs with a large number of
executable pages), a period of 1 minute could still result in this
number being too high (unless the ratio is changed significantly,
but that can result in split pages lingering on for too long).

This change makes the period configurable instead of fixing it at
1 minute. Users of large VMs can then adjust the period and/or the
ratio to reduce the number of pages zapped at one time while still
maintaining the same overall duration for cycling through the
entire list. By default, KVM derives a period from the ratio such
that the entire list can be zapped in 1 hour.

Signed-off-by: Junaid Shahid <junaids@google.com>
---
 .../admin-guide/kernel-parameters.txt         | 10 +++++-
 arch/x86/kvm/mmu/mmu.c                        | 33 +++++++++++++------
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 91ba391f9b32..dd47336a525f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2353,7 +2353,15 @@
 			[KVM] Controls how many 4KiB pages are periodically zapped
 			back to huge pages.  0 disables the recovery, otherwise if
 			the value is N KVM will zap 1/Nth of the 4KiB pages every
-			minute.  The default is 60.
+			period (see below).  The default is 60.
+
+	kvm.nx_huge_pages_recovery_period_ms=
+			[KVM] Controls the time period at which KVM zaps 4KiB pages
+			back to huge pages. If the value is a non-zero N, KVM will
+			zap a portion (see ratio above) of the pages every N msecs.
+			If the value is 0 (the default), KVM will pick a period based
+			on the ratio such that the entire set of pages can be zapped
+			in approximately 1 hour on average.
 
 	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
 			Default is 1 (enabled)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 24a9f4c3f5e7..273b43272c4d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -58,6 +58,7 @@
 extern bool itlb_multihit_kvm_mitigation;
 
 int __read_mostly nx_huge_pages = -1;
+static uint __read_mostly nx_huge_pages_recovery_period_ms;
 #ifdef CONFIG_PREEMPT_RT
 /* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
 static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
@@ -66,23 +67,26 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
 #endif
 
 static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
-static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
 
 static const struct kernel_param_ops nx_huge_pages_ops = {
 	.set = set_nx_huge_pages,
 	.get = param_get_bool,
 };
 
-static const struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
-	.set = set_nx_huge_pages_recovery_ratio,
+static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = {
+	.set = set_nx_huge_pages_recovery_param,
 	.get = param_get_uint,
 };
 
 module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
 __MODULE_PARM_TYPE(nx_huge_pages, "bool");
-module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_param_ops,
 		&nx_huge_pages_recovery_ratio, 0644);
 __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+module_param_cb(nx_huge_pages_recovery_period_ms, &nx_huge_pages_recovery_param_ops,
+		&nx_huge_pages_recovery_period_ms, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_period_ms, "uint");
 
 static bool __read_mostly force_flush_and_sync_on_reuse;
 module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
@@ -6088,18 +6092,21 @@ void kvm_mmu_module_exit(void)
 	mmu_audit_disable();
 }
 
-static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
 {
-	unsigned int old_val;
+	bool was_recovery_enabled, is_recovery_enabled;
 	int err;
 
-	old_val = nx_huge_pages_recovery_ratio;
+	was_recovery_enabled = nx_huge_pages_recovery_ratio;
+
 	err = param_set_uint(val, kp);
 	if (err)
 		return err;
 
+	is_recovery_enabled = nx_huge_pages_recovery_ratio;
+
 	if (READ_ONCE(nx_huge_pages) &&
-	    !old_val && nx_huge_pages_recovery_ratio) {
+	    !was_recovery_enabled && is_recovery_enabled) {
 		struct kvm *kvm;
 
 		mutex_lock(&kvm_lock);
@@ -6162,8 +6169,14 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 
 static long get_nx_lpage_recovery_timeout(u64 start_time)
 {
-	return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
-		? start_time + 60 * HZ - get_jiffies_64()
+	uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+	uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+
+	if (!period && ratio)
+		period = 60 * 60 * 1000 / ratio;
+
+	return READ_ONCE(nx_huge_pages) && ratio
+		? start_time + msecs_to_jiffies(period) - get_jiffies_64()
 		: MAX_SCHEDULE_TIMEOUT;
 }
 
-- 
2.33.0.1079.g6e70778dc9-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] kvm: x86: mmu: Make NX huge page recovery period configurable
  2021-10-19  1:39 [PATCH v2] kvm: x86: mmu: Make NX huge page recovery period configurable Junaid Shahid
@ 2021-10-19 23:48 ` Ben Gardon
  2021-10-20  0:07   ` Junaid Shahid
  0 siblings, 1 reply; 4+ messages in thread
From: Ben Gardon @ 2021-10-19 23:48 UTC (permalink / raw)
  To: Junaid Shahid
  Cc: kvm, Paolo Bonzini, Jim Mattson, Sean Christopherson, David Matlack

On Mon, Oct 18, 2021 at 6:40 PM Junaid Shahid <junaids@google.com> wrote:
>
> v2:
>  - A configured period of 0 now automatically derives the actual period
>    from the ratio.
>
> Currently, the NX huge page recovery thread wakes up every minute and
> zaps 1/nx_huge_pages_recovery_ratio of the total number of split NX
> huge pages at a time. This is intended to ensure that only a
> relatively small number of pages get zapped at a time. But for very
> large VMs (or more specifically, VMs with a large number of
> executable pages), a period of 1 minute could still result in this
> number being too high (unless the ratio is changed significantly,
> but that can result in split pages lingering on for too long).
>
> This change makes the period configurable instead of fixing it at
> 1 minute. Users of large VMs can then adjust the period and/or the
> ratio to reduce the number of pages zapped at one time while still
> maintaining the same overall duration for cycling through the
> entire list. By default, KVM derives a period from the ratio such
> that the entire list can be zapped in 1 hour.
>
> Signed-off-by: Junaid Shahid <junaids@google.com>
> ---
>  .../admin-guide/kernel-parameters.txt         | 10 +++++-
>  arch/x86/kvm/mmu/mmu.c                        | 33 +++++++++++++------
>  2 files changed, 32 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 91ba391f9b32..dd47336a525f 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2353,7 +2353,15 @@
>                         [KVM] Controls how many 4KiB pages are periodically zapped
>                         back to huge pages.  0 disables the recovery, otherwise if
>                         the value is N KVM will zap 1/Nth of the 4KiB pages every
> -                       minute.  The default is 60.
> +                       period (see below).  The default is 60.
> +
> +       kvm.nx_huge_pages_recovery_period_ms=
> +                       [KVM] Controls the time period at which KVM zaps 4KiB pages
> +                       back to huge pages. If the value is a non-zero N, KVM will
> +                       zap a portion (see ratio above) of the pages every N msecs.
> +                       If the value is 0 (the default), KVM will pick a period based
> +                       on the ratio such that the entire set of pages can be zapped
> +                       in approximately 1 hour on average.
>
>         kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
>                         Default is 1 (enabled)
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 24a9f4c3f5e7..273b43272c4d 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -58,6 +58,7 @@
>  extern bool itlb_multihit_kvm_mitigation;
>
>  int __read_mostly nx_huge_pages = -1;
> +static uint __read_mostly nx_huge_pages_recovery_period_ms;
>  #ifdef CONFIG_PREEMPT_RT
>  /* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
>  static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
> @@ -66,23 +67,26 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
>  #endif
>
>  static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
> -static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
> +static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
>
>  static const struct kernel_param_ops nx_huge_pages_ops = {
>         .set = set_nx_huge_pages,
>         .get = param_get_bool,
>  };
>
> -static const struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
> -       .set = set_nx_huge_pages_recovery_ratio,
> +static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = {
> +       .set = set_nx_huge_pages_recovery_param,
>         .get = param_get_uint,
>  };
>
>  module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
>  __MODULE_PARM_TYPE(nx_huge_pages, "bool");
> -module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
> +module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_param_ops,
>                 &nx_huge_pages_recovery_ratio, 0644);
>  __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
> +module_param_cb(nx_huge_pages_recovery_period_ms, &nx_huge_pages_recovery_param_ops,
> +               &nx_huge_pages_recovery_period_ms, 0644);
> +__MODULE_PARM_TYPE(nx_huge_pages_recovery_period_ms, "uint");
>
>  static bool __read_mostly force_flush_and_sync_on_reuse;
>  module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
> @@ -6088,18 +6092,21 @@ void kvm_mmu_module_exit(void)
>         mmu_audit_disable();
>  }
>
> -static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
> +static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
>  {
> -       unsigned int old_val;
> +       bool was_recovery_enabled, is_recovery_enabled;
>         int err;
>
> -       old_val = nx_huge_pages_recovery_ratio;
> +       was_recovery_enabled = nx_huge_pages_recovery_ratio;
> +
>         err = param_set_uint(val, kp);
>         if (err)
>                 return err;
>
> +       is_recovery_enabled = nx_huge_pages_recovery_ratio;
> +
>         if (READ_ONCE(nx_huge_pages) &&
> -           !old_val && nx_huge_pages_recovery_ratio) {
> +           !was_recovery_enabled && is_recovery_enabled) {
>                 struct kvm *kvm;
>
>                 mutex_lock(&kvm_lock);

I might be missing something, but it seems like setting
nx_huge_pages_recovery_period_ms through this function won't do
anything special. Is there any reason to use this function for it
versus param_set_uint?

> @@ -6162,8 +6169,14 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
>
>  static long get_nx_lpage_recovery_timeout(u64 start_time)
>  {
> -       return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
> -               ? start_time + 60 * HZ - get_jiffies_64()
> +       uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
> +       uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
> +
> +       if (!period && ratio)
> +               period = 60 * 60 * 1000 / ratio;
> +
> +       return READ_ONCE(nx_huge_pages) && ratio
> +               ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
>                 : MAX_SCHEDULE_TIMEOUT;
>  }
>
> --
> 2.33.0.1079.g6e70778dc9-goog
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] kvm: x86: mmu: Make NX huge page recovery period configurable
  2021-10-19 23:48 ` Ben Gardon
@ 2021-10-20  0:07   ` Junaid Shahid
  2021-10-20  0:51     ` Junaid Shahid
  0 siblings, 1 reply; 4+ messages in thread
From: Junaid Shahid @ 2021-10-20  0:07 UTC (permalink / raw)
  To: Ben Gardon
  Cc: kvm, Paolo Bonzini, Jim Mattson, Sean Christopherson, David Matlack

On 10/19/21 4:48 PM, Ben Gardon wrote:
> On Mon, Oct 18, 2021 at 6:40 PM Junaid Shahid <junaids@google.com> wrote:
>>
>> -static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
>> +static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
>>   {
>> -       unsigned int old_val;
>> +       bool was_recovery_enabled, is_recovery_enabled;
>>          int err;
>>
>> -       old_val = nx_huge_pages_recovery_ratio;
>> +       was_recovery_enabled = nx_huge_pages_recovery_ratio;
>> +
>>          err = param_set_uint(val, kp);
>>          if (err)
>>                  return err;
>>
>> +       is_recovery_enabled = nx_huge_pages_recovery_ratio;
>> +
>>          if (READ_ONCE(nx_huge_pages) &&
>> -           !old_val && nx_huge_pages_recovery_ratio) {
>> +           !was_recovery_enabled && is_recovery_enabled) {
>>                  struct kvm *kvm;
>>
>>                  mutex_lock(&kvm_lock);
> 
> I might be missing something, but it seems like setting
> nx_huge_pages_recovery_period_ms through this function won't do
> anything special. Is there any reason to use this function for it
> versus param_set_uint?
> 

Yes, you are right. The original patch was using a 0 period to mean that recovery is disabled, but v2 no longer does that, so we indeed don't need to handle the period parameter through this function.

Thanks,
Junaid

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] kvm: x86: mmu: Make NX huge page recovery period configurable
  2021-10-20  0:07   ` Junaid Shahid
@ 2021-10-20  0:51     ` Junaid Shahid
  0 siblings, 0 replies; 4+ messages in thread
From: Junaid Shahid @ 2021-10-20  0:51 UTC (permalink / raw)
  To: Ben Gardon
  Cc: kvm, Paolo Bonzini, Jim Mattson, Sean Christopherson, David Matlack

On 10/19/21 5:07 PM, Junaid Shahid wrote:
> On 10/19/21 4:48 PM, Ben Gardon wrote:
>> On Mon, Oct 18, 2021 at 6:40 PM Junaid Shahid <junaids@google.com> wrote:
>>>
>>> -static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
>>> +static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
>>>   {
>>> -       unsigned int old_val;
>>> +       bool was_recovery_enabled, is_recovery_enabled;
>>>          int err;
>>>
>>> -       old_val = nx_huge_pages_recovery_ratio;
>>> +       was_recovery_enabled = nx_huge_pages_recovery_ratio;
>>> +
>>>          err = param_set_uint(val, kp);
>>>          if (err)
>>>                  return err;
>>>
>>> +       is_recovery_enabled = nx_huge_pages_recovery_ratio;
>>> +
>>>          if (READ_ONCE(nx_huge_pages) &&
>>> -           !old_val && nx_huge_pages_recovery_ratio) {
>>> +           !was_recovery_enabled && is_recovery_enabled) {
>>>                  struct kvm *kvm;
>>>
>>>                  mutex_lock(&kvm_lock);
>>
>> I might be missing something, but it seems like setting
>> nx_huge_pages_recovery_period_ms through this function won't do
>> anything special. Is there any reason to use this function for it
>> versus param_set_uint?
>>
> 
> Yes, you are right. The original patch was using a 0 period to mean that recovery is disabled, but v2 no longer does that, so we indeed don't need to handle the period parameter through this function.
> 

Actually, I think that it may be a good idea to still use a slightly modified form of that function. If the period was originally set to a large value and is now set to a small value, then ideally we should wake up the thread rather than having it wait for the original period to expire.


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-10-20  0:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-19  1:39 [PATCH v2] kvm: x86: mmu: Make NX huge page recovery period configurable Junaid Shahid
2021-10-19 23:48 ` Ben Gardon
2021-10-20  0:07   ` Junaid Shahid
2021-10-20  0:51     ` Junaid Shahid

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).