All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"
@ 2022-04-15  0:51 Sean Christopherson
  2022-04-15 16:56 ` David Matlack
  2022-04-18 13:36 ` Chao Peng
  0 siblings, 2 replies; 4+ messages in thread
From: Sean Christopherson @ 2022-04-15  0:51 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, David Matlack, Chao Peng

Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and
kvm_faultin_pfn() to signal that the page fault handler should continue
doing its thing.  Aside from being gross and inefficient, using a boolean
return to signal continue vs. stop makes it extremely difficult to add
more helpers and/or move existing code to a helper.

E.g. hypothetically, if nested MMUs were to gain a separate page fault
handler in the future, everything up to the "is self-modifying PTE" check
can be shared by all shadow MMUs, but communicating up the stack whether
to continue on or stop becomes a nightmare.

More concretely, proposed support for private guest memory ran into a
similar issue, where it'll be forced to forego a helper in order to yield
sane code: https://lore.kernel.org/all/YkJbxiL%2FAz7olWlq@google.com.

No functional change intended.

Cc: David Matlack <dmatlack@google.com>
Cc: Chao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/mmu.c          | 51 ++++++++++++++-------------------
 arch/x86/kvm/mmu/mmu_internal.h |  9 +++++-
 arch/x86/kvm/mmu/paging_tmpl.h  |  6 ++--
 3 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 69a30d6d1e2b..cb2982c6b513 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
 	return -EFAULT;
 }
 
-static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
-				unsigned int access, int *ret_val)
+static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
+			       unsigned int access)
 {
 	/* The pfn is invalid, report the error! */
-	if (unlikely(is_error_pfn(fault->pfn))) {
-		*ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
-		return true;
-	}
+	if (unlikely(is_error_pfn(fault->pfn)))
+		return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
 
 	if (unlikely(!fault->slot)) {
 		gva_t gva = fault->is_tdp ? 0 : fault->addr;
@@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
 		 * touching the shadow page tables as attempting to install an
 		 * MMIO SPTE will just be an expensive nop.
 		 */
-		if (unlikely(!shadow_mmio_value)) {
-			*ret_val = RET_PF_EMULATE;
-			return true;
-		}
+		if (unlikely(!shadow_mmio_value))
+			return RET_PF_EMULATE;
 	}
 
-	return false;
+	return RET_PF_CONTINUE;
 }
 
 static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
@@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 				  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
-static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
+static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
 	struct kvm_memory_slot *slot = fault->slot;
 	bool async;
@@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 	 * be zapped before KVM inserts a new MMIO SPTE for the gfn.
 	 */
 	if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
-		goto out_retry;
+		return RET_PF_RETRY;
 
 	if (!kvm_is_visible_memslot(slot)) {
 		/* Don't expose private memslots to L2. */
@@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 			fault->slot = NULL;
 			fault->pfn = KVM_PFN_NOSLOT;
 			fault->map_writable = false;
-			return false;
+			return RET_PF_CONTINUE;
 		}
 		/*
 		 * If the APIC access page exists but is disabled, go directly
@@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 		 * when the AVIC is re-enabled.
 		 */
 		if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
-		    !kvm_apicv_activated(vcpu->kvm)) {
-			*r = RET_PF_EMULATE;
-			return true;
-		}
+		    !kvm_apicv_activated(vcpu->kvm))
+			return RET_PF_EMULATE;
 	}
 
 	async = false;
@@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 					  fault->write, &fault->map_writable,
 					  &fault->hva);
 	if (!async)
-		return false; /* *pfn has correct page already */
+		return RET_PF_CONTINUE; /* *pfn has correct page already */
 
 	if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(fault->addr, fault->gfn);
 		if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
 			trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
 			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
-			goto out_retry;
-		} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn))
-			goto out_retry;
+			return RET_PF_RETRY;
+		} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
+			return RET_PF_RETRY;
+		}
 	}
 
 	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
 					  fault->write, &fault->map_writable,
 					  &fault->hva);
-	return false;
-
-out_retry:
-	*r = RET_PF_RETRY;
-	return true;
+	return RET_PF_CONTINUE;
 }
 
 /*
@@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (kvm_faultin_pfn(vcpu, fault, &r))
+	r = kvm_faultin_pfn(vcpu, fault);
+	if (r != RET_PF_CONTINUE)
 		return r;
 
-	if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
+	r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
+	if (r != RET_PF_CONTINUE)
 		return r;
 
 	r = RET_PF_RETRY;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 1bff453f7cbe..c0e502b17ef7 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 /*
  * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
  *
+ * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
  * RET_PF_RETRY: let CPU fault again on the address.
  * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
  * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
@@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
  *
  * Any names added to this enum should be exported to userspace for use in
  * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
+ *
+ * Note, all values must be greater than or equal to zero so as not to encroach
+ * on -errno return values.  Somewhat arbitrarily use '0' for CONTINUE, which
+ * will allow for efficient machine code when checking for CONTINUE, e.g.
+ * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero.
  */
 enum {
-	RET_PF_RETRY = 0,
+	RET_PF_CONTINUE = 0,
+	RET_PF_RETRY,
 	RET_PF_EMULATE,
 	RET_PF_INVALID,
 	RET_PF_FIXED,
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 66f1acf153c4..7038273d04ab 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (kvm_faultin_pfn(vcpu, fault, &r))
+	r = kvm_faultin_pfn(vcpu, fault);
+	if (r != RET_PF_CONTINUE)
 		return r;
 
-	if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
+	r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
+	if (r != RET_PF_CONTINUE)
 		return r;
 
 	/*

base-commit: 150866cd0ec871c765181d145aa0912628289c8a
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"
  2022-04-15  0:51 [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns" Sean Christopherson
@ 2022-04-15 16:56 ` David Matlack
  2022-04-15 19:28   ` Sean Christopherson
  2022-04-18 13:36 ` Chao Peng
  1 sibling, 1 reply; 4+ messages in thread
From: David Matlack @ 2022-04-15 16:56 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm list, LKML, Chao Peng

On Thu, Apr 14, 2022 at 5:51 PM Sean Christopherson <seanjc@google.com> wrote:
>
> Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and
> kvm_faultin_pfn() to signal that the page fault handler should continue
> doing its thing.  Aside from being gross and inefficient, using a boolean
> return to signal continue vs. stop makes it extremely difficult to add
> more helpers and/or move existing code to a helper.
>
> E.g. hypothetically, if nested MMUs were to gain a separate page fault
> handler in the future, everything up to the "is self-modifying PTE" check
> can be shared by all shadow MMUs, but communicating up the stack whether
> to continue on or stop becomes a nightmare.
>
> More concretely, proposed support for private guest memory ran into a
> similar issue, where it'll be forced to forego a helper in order to yield
> sane code: https://lore.kernel.org/all/YkJbxiL%2FAz7olWlq@google.com.
>
> No functional change intended.
>
> Cc: David Matlack <dmatlack@google.com>
> Cc: Chao Peng <chao.p.peng@linux.intel.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c          | 51 ++++++++++++++-------------------
>  arch/x86/kvm/mmu/mmu_internal.h |  9 +++++-
>  arch/x86/kvm/mmu/paging_tmpl.h  |  6 ++--
>  3 files changed, 34 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 69a30d6d1e2b..cb2982c6b513 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
>         return -EFAULT;
>  }
>
> -static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> -                               unsigned int access, int *ret_val)
> +static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> +                              unsigned int access)
>  {
>         /* The pfn is invalid, report the error! */
> -       if (unlikely(is_error_pfn(fault->pfn))) {
> -               *ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
> -               return true;
> -       }
> +       if (unlikely(is_error_pfn(fault->pfn)))
> +               return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
>
>         if (unlikely(!fault->slot)) {
>                 gva_t gva = fault->is_tdp ? 0 : fault->addr;
> @@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
>                  * touching the shadow page tables as attempting to install an
>                  * MMIO SPTE will just be an expensive nop.
>                  */
> -               if (unlikely(!shadow_mmio_value)) {
> -                       *ret_val = RET_PF_EMULATE;
> -                       return true;
> -               }
> +               if (unlikely(!shadow_mmio_value))
> +                       return RET_PF_EMULATE;
>         }
>
> -       return false;
> +       return RET_PF_CONTINUE;
>  }
>
>  static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
> @@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>                                   kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
>  }
>
> -static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
> +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
>         struct kvm_memory_slot *slot = fault->slot;
>         bool async;
> @@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>          * be zapped before KVM inserts a new MMIO SPTE for the gfn.
>          */
>         if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
> -               goto out_retry;
> +               return RET_PF_RETRY;
>
>         if (!kvm_is_visible_memslot(slot)) {
>                 /* Don't expose private memslots to L2. */
> @@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>                         fault->slot = NULL;
>                         fault->pfn = KVM_PFN_NOSLOT;
>                         fault->map_writable = false;
> -                       return false;
> +                       return RET_PF_CONTINUE;
>                 }
>                 /*
>                  * If the APIC access page exists but is disabled, go directly
> @@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>                  * when the AVIC is re-enabled.
>                  */
>                 if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
> -                   !kvm_apicv_activated(vcpu->kvm)) {
> -                       *r = RET_PF_EMULATE;
> -                       return true;
> -               }
> +                   !kvm_apicv_activated(vcpu->kvm))
> +                       return RET_PF_EMULATE;
>         }
>
>         async = false;
> @@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>                                           fault->write, &fault->map_writable,
>                                           &fault->hva);
>         if (!async)
> -               return false; /* *pfn has correct page already */
> +               return RET_PF_CONTINUE; /* *pfn has correct page already */
>
>         if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
>                 trace_kvm_try_async_get_page(fault->addr, fault->gfn);
>                 if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
>                         trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
>                         kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> -                       goto out_retry;
> -               } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn))
> -                       goto out_retry;
> +                       return RET_PF_RETRY;
> +               } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
> +                       return RET_PF_RETRY;
> +               }
>         }
>
>         fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
>                                           fault->write, &fault->map_writable,
>                                           &fault->hva);
> -       return false;
> -
> -out_retry:
> -       *r = RET_PF_RETRY;
> -       return true;
> +       return RET_PF_CONTINUE;
>  }
>
>  /*
> @@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>         mmu_seq = vcpu->kvm->mmu_notifier_seq;
>         smp_rmb();
>
> -       if (kvm_faultin_pfn(vcpu, fault, &r))
> +       r = kvm_faultin_pfn(vcpu, fault);
> +       if (r != RET_PF_CONTINUE)
>                 return r;
>
> -       if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
> +       r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
> +       if (r != RET_PF_CONTINUE)
>                 return r;
>
>         r = RET_PF_RETRY;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 1bff453f7cbe..c0e502b17ef7 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>  /*
>   * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
>   *
> + * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
>   * RET_PF_RETRY: let CPU fault again on the address.
>   * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
>   * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.

RET_PF_CONTINUE and RET_PF_INVALID are pretty similar, they both
indicate to the PF handler that it should keep going. What do you
think about taking this a step further and removing RET_PF_INVALID and
RET_PF_CONTINUE and using 0 instead? That way we can replace:

  if (ret != RET_PF_CONTINUE)
          return r;

and

  if (ret != RET_PF_INVALID)
          return r;

with

  if (r)
          return r;

?

> @@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>   *
>   * Any names added to this enum should be exported to userspace for use in
>   * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h

Please add RET_PF_CONTINUE to mmutrace.h, e.g.

  TRACE_DEFINE_ENUM(RET_PF_CONTINUE);

It doesn't matter in practice (yet) since the trace enums are only
used for trace_fast_page_fault, which does not return RET_PF_CONTINUE.
But it would be good to keep it up to date.

> + *
> + * Note, all values must be greater than or equal to zero so as not to encroach
> + * on -errno return values.  Somewhat arbitrarily use '0' for CONTINUE, which
> + * will allow for efficient machine code when checking for CONTINUE, e.g.
> + * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero.
>   */
>  enum {
> -       RET_PF_RETRY = 0,
> +       RET_PF_CONTINUE = 0,
> +       RET_PF_RETRY,
>         RET_PF_EMULATE,
>         RET_PF_INVALID,
>         RET_PF_FIXED,
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 66f1acf153c4..7038273d04ab 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>         mmu_seq = vcpu->kvm->mmu_notifier_seq;
>         smp_rmb();
>
> -       if (kvm_faultin_pfn(vcpu, fault, &r))
> +       r = kvm_faultin_pfn(vcpu, fault);
> +       if (r != RET_PF_CONTINUE)
>                 return r;
>
> -       if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
> +       r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
> +       if (r != RET_PF_CONTINUE)
>                 return r;
>
>         /*
>
> base-commit: 150866cd0ec871c765181d145aa0912628289c8a
> --
> 2.36.0.rc0.470.gd361397f0d-goog
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"
  2022-04-15 16:56 ` David Matlack
@ 2022-04-15 19:28   ` Sean Christopherson
  0 siblings, 0 replies; 4+ messages in thread
From: Sean Christopherson @ 2022-04-15 19:28 UTC (permalink / raw)
  To: David Matlack
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm list, LKML, Chao Peng

On Fri, Apr 15, 2022, David Matlack wrote:
> On Thu, Apr 14, 2022 at 5:51 PM Sean Christopherson <seanjc@google.com> wrote:
> > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > index 1bff453f7cbe..c0e502b17ef7 100644
> > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > @@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> >  /*
> >   * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
> >   *
> > + * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
> >   * RET_PF_RETRY: let CPU fault again on the address.
> >   * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
> >   * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
> 
> RET_PF_CONTINUE and RET_PF_INVALID are pretty similar, they both
> indicate to the PF handler that it should keep going. What do you
> think about taking this a step further and removing RET_PF_INVALID and
> RET_PF_CONTINUE and using 0 instead?

RET_PF_INVALID is useful because it allows kvm_mmu_page_fault() to sanity check
that the page fault handler didn't barf:

		r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
					  lower_32_bits(error_code), false);
		if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
			return -EIO;

It's a bit odd that fast_page_fault() returns RET_PF_INVALID instead of RET_PF_CONTINUE,
but at the same time the fault _is_ indeed invalid for fast handling.

> That way we can replace:
> 
>   if (ret != RET_PF_CONTINUE)
>           return r;
> 
> and
> 
>   if (ret != RET_PF_INVALID)
>           return r;
> 
> with
> 
>   if (r)
>           return r;
> 
> ?

We could actually do that now, at least for RET_PF_CONTINUE, but I deliberately
avoided doing that so as to not take a hard dependency on the underlying value of
RET_PF_CONTINUE.  KVM's magic "0 == exit to userspace, 1 == resume guest" logic
is dangerous, e.g. it's caused real bugs in the past.  But in that case, the
return value is multiplexed with -errno, i.e. there's a good reason for using
magic values.

For this code, there's no such requirement because the caller must convert the
RET_PF_* return to the aforementioned magic returns.

And the even bigger motivation was to discourage "if (r)" for this case because
that suggests that this code _does_ utilize KVM's magic return value.  I actually
wrote it that way at first and then got completely turned around and forgot what
value was being returned :-)

Heh, and thinking about it, I'd be tempted to assign RET_PF_CONTINUE a non-zero
value if some debug Kconfig is enabled to really enforce that KVM explicitly checks
for RET_PF_CONTINUE instead of assuming a non-zero value means "bail".

> > @@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> >   *
> >   * Any names added to this enum should be exported to userspace for use in
> >   * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
> 
> Please add RET_PF_CONTINUE to mmutrace.h, e.g.
> 
>   TRACE_DEFINE_ENUM(RET_PF_CONTINUE);
> 
> It doesn't matter in practice (yet) since the trace enums are only
> used for trace_fast_page_fault, which does not return RET_PF_CONTINUE.
> But it would be good to keep it up to date.

Drat, missed that.  Thanks!

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"
  2022-04-15  0:51 [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns" Sean Christopherson
  2022-04-15 16:56 ` David Matlack
@ 2022-04-18 13:36 ` Chao Peng
  1 sibling, 0 replies; 4+ messages in thread
From: Chao Peng @ 2022-04-18 13:36 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, David Matlack

On Fri, Apr 15, 2022 at 12:51:07AM +0000, Sean Christopherson wrote:
> Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and
> kvm_faultin_pfn() to signal that the page fault handler should continue
> doing its thing.  Aside from being gross and inefficient, using a boolean
> return to signal continue vs. stop makes it extremely difficult to add
> more helpers and/or move existing code to a helper.
> 
> E.g. hypothetically, if nested MMUs were to gain a separate page fault
> handler in the future, everything up to the "is self-modifying PTE" check
> can be shared by all shadow MMUs, but communicating up the stack whether
> to continue on or stop becomes a nightmare.
> 
> More concretely, proposed support for private guest memory ran into a
> similar issue, where it'll be forced to forego a helper in order to yield
> sane code: https://lore.kernel.org/all/YkJbxiL%2FAz7olWlq@google.com.

Thanks for cooking this patch, it makes private memory patch much
easier.

> 
> No functional change intended.
> 
> Cc: David Matlack <dmatlack@google.com>
> Cc: Chao Peng <chao.p.peng@linux.intel.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c          | 51 ++++++++++++++-------------------
>  arch/x86/kvm/mmu/mmu_internal.h |  9 +++++-
>  arch/x86/kvm/mmu/paging_tmpl.h  |  6 ++--
>  3 files changed, 34 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 69a30d6d1e2b..cb2982c6b513 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
>  	return -EFAULT;
>  }
>  
> -static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> -				unsigned int access, int *ret_val)
> +static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> +			       unsigned int access)
>  {
>  	/* The pfn is invalid, report the error! */
> -	if (unlikely(is_error_pfn(fault->pfn))) {
> -		*ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
> -		return true;
> -	}
> +	if (unlikely(is_error_pfn(fault->pfn)))
> +		return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
>  
>  	if (unlikely(!fault->slot)) {
>  		gva_t gva = fault->is_tdp ? 0 : fault->addr;
> @@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
>  		 * touching the shadow page tables as attempting to install an
>  		 * MMIO SPTE will just be an expensive nop.
>  		 */
> -		if (unlikely(!shadow_mmio_value)) {
> -			*ret_val = RET_PF_EMULATE;
> -			return true;
> -		}
> +		if (unlikely(!shadow_mmio_value))
> +			return RET_PF_EMULATE;
>  	}
>  
> -	return false;
> +	return RET_PF_CONTINUE;
>  }
>  
>  static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
> @@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  				  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
>  }
>  
> -static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
> +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
>  	struct kvm_memory_slot *slot = fault->slot;
>  	bool async;
> @@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>  	 * be zapped before KVM inserts a new MMIO SPTE for the gfn.
>  	 */
>  	if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
> -		goto out_retry;
> +		return RET_PF_RETRY;
>  
>  	if (!kvm_is_visible_memslot(slot)) {
>  		/* Don't expose private memslots to L2. */
> @@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>  			fault->slot = NULL;
>  			fault->pfn = KVM_PFN_NOSLOT;
>  			fault->map_writable = false;
> -			return false;
> +			return RET_PF_CONTINUE;
>  		}
>  		/*
>  		 * If the APIC access page exists but is disabled, go directly
> @@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>  		 * when the AVIC is re-enabled.
>  		 */
>  		if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
> -		    !kvm_apicv_activated(vcpu->kvm)) {
> -			*r = RET_PF_EMULATE;
> -			return true;
> -		}
> +		    !kvm_apicv_activated(vcpu->kvm))
> +			return RET_PF_EMULATE;
>  	}
>  
>  	async = false;
> @@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>  					  fault->write, &fault->map_writable,
>  					  &fault->hva);
>  	if (!async)
> -		return false; /* *pfn has correct page already */
> +		return RET_PF_CONTINUE; /* *pfn has correct page already */
>  
>  	if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
>  		trace_kvm_try_async_get_page(fault->addr, fault->gfn);
>  		if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
>  			trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
>  			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> -			goto out_retry;
> -		} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn))
> -			goto out_retry;
> +			return RET_PF_RETRY;
> +		} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
> +			return RET_PF_RETRY;
> +		}
>  	}
>  
>  	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
>  					  fault->write, &fault->map_writable,
>  					  &fault->hva);
> -	return false;
> -
> -out_retry:
> -	*r = RET_PF_RETRY;
> -	return true;
> +	return RET_PF_CONTINUE;
>  }
>  
>  /*
> @@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  	mmu_seq = vcpu->kvm->mmu_notifier_seq;
>  	smp_rmb();
>  
> -	if (kvm_faultin_pfn(vcpu, fault, &r))
> +	r = kvm_faultin_pfn(vcpu, fault);
> +	if (r != RET_PF_CONTINUE)
>  		return r;
>  
> -	if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
> +	r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
> +	if (r != RET_PF_CONTINUE)
>  		return r;
>  
>  	r = RET_PF_RETRY;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 1bff453f7cbe..c0e502b17ef7 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>  /*
>   * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
>   *
> + * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
>   * RET_PF_RETRY: let CPU fault again on the address.
>   * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
>   * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
> @@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>   *
>   * Any names added to this enum should be exported to userspace for use in
>   * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
> + *
> + * Note, all values must be greater than or equal to zero so as not to encroach
> + * on -errno return values.  Somewhat arbitrarily use '0' for CONTINUE, which
> + * will allow for efficient machine code when checking for CONTINUE, e.g.
> + * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero.
>   */
>  enum {
> -	RET_PF_RETRY = 0,
> +	RET_PF_CONTINUE = 0,
> +	RET_PF_RETRY,
>  	RET_PF_EMULATE,
>  	RET_PF_INVALID,
>  	RET_PF_FIXED,

Unrelated to this patch but related to private memory patch, when
implicit conversion happens, I'm thinking which return value I should
use. Current -1 does not make much sense since it causes KVM_RUN
returns an error while it's expected to be 0. None of the above
including RET_PF_CONTINUE seems appropriate. Maybe we should go further
to introduce another RET_PF_USER indicating we should exit to
userspace for handling with a return value of 0 instead of -error in
kvm_mmu_page_fault().

Thanks,
Chao
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 66f1acf153c4..7038273d04ab 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  	mmu_seq = vcpu->kvm->mmu_notifier_seq;
>  	smp_rmb();
>  
> -	if (kvm_faultin_pfn(vcpu, fault, &r))
> +	r = kvm_faultin_pfn(vcpu, fault);
> +	if (r != RET_PF_CONTINUE)
>  		return r;
>  
> -	if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
> +	r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
> +	if (r != RET_PF_CONTINUE)
>  		return r;
>  
>  	/*
> 
> base-commit: 150866cd0ec871c765181d145aa0912628289c8a
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-04-18 14:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-15  0:51 [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns" Sean Christopherson
2022-04-15 16:56 ` David Matlack
2022-04-15 19:28   ` Sean Christopherson
2022-04-18 13:36 ` Chao Peng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.