All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation
@ 2019-02-14 12:10 Paul Durrant
  2019-02-14 12:34 ` Juergen Gross
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Durrant @ 2019-02-14 12:10 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, Paul Durrant, Wei Liu, Jan Beulich, Roger Pau Monné

The current code uses hvm_asid_flush_vcpu() but this is insufficient for
a guest running in shadow mode, which results in guest crashes early in
boot if the 'hcall_remote_tlb_flush' is enabled.

This patch, instead of open coding a new flush algorithm, adapts the one
already used by the HVMOP_flush_tlbs Xen hypercall. The implementation is
modified to allow TLB flushing a subset of a domain's vCPUs. A callback
function determines whether or not a vCPU requires flushing. This mechanism
was chosen because, while it is the case that the currently implemented
viridian hypercalls specify a vCPU mask, there are newer variants which
specify a sparse HV_VP_SET and thus use of a callback will avoid needing to
expose details of this outside of the viridian subsystem if and when those
newer variants are implemented.

NOTE: Use of the common flush function requires that the hypercalls are
      restartable and so, with this patch applied, viridian_hypercall()
      can now return HVM_HCALL_preempted. This is safe as no modification
      to struct cpu_user_regs is done before the return.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>

v2:
 - Use cpumask_scratch
 - Make sure local CPU flush isn't missed
---
 xen/arch/x86/hvm/hvm.c               | 48 +++++++++++++++++++++-------
 xen/arch/x86/hvm/viridian/viridian.c | 45 ++++++++------------------
 xen/include/asm-x86/hvm/hvm.h        |  2 ++
 3 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 410623d437..4ab4409242 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -38,6 +38,7 @@
 #include <xen/warning.h>
 #include <xen/vpci.h>
 #include <xen/nospec.h>
+#include <xen/sched-if.h>
 #include <asm/shadow.h>
 #include <asm/hap.h>
 #include <asm/current.h>
@@ -3964,26 +3965,27 @@ static void hvm_s3_resume(struct domain *d)
     }
 }
 
-static int hvmop_flush_tlb_all(void)
+bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                        void *ctxt)
 {
+    cpumask_t *mask = cpumask_scratch;
     struct domain *d = current->domain;
     struct vcpu *v;
 
-    if ( !is_hvm_domain(d) )
-        return -EINVAL;
-
     /* Avoid deadlock if more than one vcpu tries this at the same time. */
     if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
-        return -ERESTART;
+        return false;
 
     /* Pause all other vcpus. */
     for_each_vcpu ( d, v )
-        if ( v != current )
+        if ( v != current && flush_vcpu(ctxt, v) )
             vcpu_pause_nosync(v);
 
+    cpumask_clear(mask);
+
     /* Now that all VCPUs are signalled to deschedule, we wait... */
     for_each_vcpu ( d, v )
-        if ( v != current )
+        if ( v != current && flush_vcpu(ctxt, v) )
             while ( !vcpu_runnable(v) && v->is_running )
                 cpu_relax();
 
@@ -3992,17 +3994,41 @@ static int hvmop_flush_tlb_all(void)
 
     /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
     for_each_vcpu ( d, v )
+    {
+        unsigned int cpu;
+
+        if ( !flush_vcpu(ctxt, v) )
+            continue;
+
         paging_update_cr3(v, false);
 
-    /* Flush all dirty TLBs. */
-    flush_tlb_mask(d->dirty_cpumask);
+        cpu = read_atomic(&v->dirty_cpu);
+        if ( is_vcpu_dirty_cpu(cpu) )
+            __cpumask_set_cpu(cpu, mask);
+    }
+
+    /* Flush TLBs on all CPUs with dirty vcpu state. */
+    flush_tlb_mask(mask);
 
     /* Done. */
     for_each_vcpu ( d, v )
-        if ( v != current )
+        if ( v != current && flush_vcpu(ctxt, v) )
             vcpu_unpause(v);
 
-    return 0;
+    return true;
+}
+
+static bool always_flush(void *ctxt, struct vcpu *v)
+{
+    return true;
+}
+
+static int hvmop_flush_tlb_all(void)
+{
+    if ( !is_hvm_domain(current->domain) )
+        return -EINVAL;
+
+    return hvm_flush_vcpu_tlb(always_flush, NULL) ? 0 : -ERESTART;
 }
 
 static int hvmop_set_evtchn_upcall_vector(
diff --git a/xen/arch/x86/hvm/viridian/viridian.c b/xen/arch/x86/hvm/viridian/viridian.c
index c78b2918d9..425af56856 100644
--- a/xen/arch/x86/hvm/viridian/viridian.c
+++ b/xen/arch/x86/hvm/viridian/viridian.c
@@ -430,7 +430,16 @@ void viridian_domain_deinit(struct domain *d)
         viridian_vcpu_deinit(v);
 }
 
-static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
+/*
+ * Windows should not issue the hypercalls requiring this callback in the
+ * case where vcpu_id would exceed the size of the mask.
+ */
+static bool need_flush(void *ctxt, struct vcpu *v)
+{
+    uint64_t vcpu_mask = *(uint64_t *)ctxt;
+
+    return vcpu_mask & (1ul << v->vcpu_id);
+}
 
 int viridian_hypercall(struct cpu_user_regs *regs)
 {
@@ -494,8 +503,6 @@ int viridian_hypercall(struct cpu_user_regs *regs)
     case HvFlushVirtualAddressSpace:
     case HvFlushVirtualAddressList:
     {
-        cpumask_t *pcpu_mask;
-        struct vcpu *v;
         struct {
             uint64_t address_space;
             uint64_t flags;
@@ -521,36 +528,12 @@ int viridian_hypercall(struct cpu_user_regs *regs)
         if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
             input_params.vcpu_mask = ~0ul;
 
-        pcpu_mask = &this_cpu(ipi_cpumask);
-        cpumask_clear(pcpu_mask);
-
-        /*
-         * For each specified virtual CPU flush all ASIDs to invalidate
-         * TLB entries the next time it is scheduled and then, if it
-         * is currently running, add its physical CPU to a mask of
-         * those which need to be interrupted to force a flush.
-         */
-        for_each_vcpu ( currd, v )
-        {
-            if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) )
-                break;
-
-            if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) )
-                continue;
-
-            hvm_asid_flush_vcpu(v);
-            if ( v != curr && v->is_running )
-                __cpumask_set_cpu(v->processor, pcpu_mask);
-        }
-
         /*
-         * Since ASIDs have now been flushed it just remains to
-         * force any CPUs currently running target vCPUs out of non-
-         * root mode. It's possible that re-scheduling has taken place
-         * so we may unnecessarily IPI some CPUs.
+         * A false return means that another vcpu is currently trying
+         * a similar operation, so back off.
          */
-        if ( !cpumask_empty(pcpu_mask) )
-            smp_send_event_check_mask(pcpu_mask);
+        if ( !hvm_flush_vcpu_tlb(need_flush, &input_params.vcpu_mask) )
+            return HVM_HCALL_preempted;
 
         output.rep_complete = input.rep_count;
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0a10b51554..53ffebb2c5 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -338,6 +338,8 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t value,
                            signed int cr0_pg);
 unsigned long hvm_cr4_guest_valid_bits(const struct domain *d, bool restore);
 
+bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                        void *ctxt);
 
 #ifdef CONFIG_HVM
 
-- 
2.20.1.2.gb21ebb671


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation
  2019-02-14 12:10 [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation Paul Durrant
@ 2019-02-14 12:34 ` Juergen Gross
  2019-02-14 12:38   ` Paul Durrant
  0 siblings, 1 reply; 5+ messages in thread
From: Juergen Gross @ 2019-02-14 12:34 UTC (permalink / raw)
  To: Paul Durrant, xen-devel
  Cc: Andrew Cooper, Wei Liu, Jan Beulich, Roger Pau Monné

On 14/02/2019 13:10, Paul Durrant wrote:
> The current code uses hvm_asid_flush_vcpu() but this is insufficient for
> a guest running in shadow mode, which results in guest crashes early in
> boot if the 'hcall_remote_tlb_flush' is enabled.
> 
> This patch, instead of open coding a new flush algorithm, adapts the one
> already used by the HVMOP_flush_tlbs Xen hypercall. The implementation is
> modified to allow TLB flushing a subset of a domain's vCPUs. A callback
> function determines whether or not a vCPU requires flushing. This mechanism
> was chosen because, while it is the case that the currently implemented
> viridian hypercalls specify a vCPU mask, there are newer variants which
> specify a sparse HV_VP_SET and thus use of a callback will avoid needing to
> expose details of this outside of the viridian subsystem if and when those
> newer variants are implemented.
> 
> NOTE: Use of the common flush function requires that the hypercalls are
>       restartable and so, with this patch applied, viridian_hypercall()
>       can now return HVM_HCALL_preempted. This is safe as no modification
>       to struct cpu_user_regs is done before the return.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> 
> v2:
>  - Use cpumask_scratch

That's not a good idea. cpumask_scratch may be used from other cpus as
long as the respectice scheduler lock is being held. See the comment in
include/xen/sched-if.h:

/*
 * Scratch space, for avoiding having too many cpumask_t on the stack.
 * Within each scheduler, when using the scratch mask of one pCPU:
 * - the pCPU must belong to the scheduler,
 * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
 *   lock).
 */

So please don't use cpumask_scratch outside the scheduler!


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation
  2019-02-14 12:34 ` Juergen Gross
@ 2019-02-14 12:38   ` Paul Durrant
  2019-02-14 13:12     ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Durrant @ 2019-02-14 12:38 UTC (permalink / raw)
  To: 'Juergen Gross', xen-devel
  Cc: Andrew Cooper, Wei Liu, Jan Beulich, Roger Pau Monne

> -----Original Message-----
> From: Juergen Gross [mailto:jgross@suse.com]
> Sent: 14 February 2019 12:35
> To: Paul Durrant <Paul.Durrant@citrix.com>; xen-devel@lists.xenproject.org
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Wei Liu
> <wei.liu2@citrix.com>; Jan Beulich <jbeulich@suse.com>; Roger Pau Monne
> <roger.pau@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2] viridian: fix the
> HvFlushVirtualAddress/List hypercall implementation
> 
> On 14/02/2019 13:10, Paul Durrant wrote:
> > The current code uses hvm_asid_flush_vcpu() but this is insufficient for
> > a guest running in shadow mode, which results in guest crashes early in
> > boot if the 'hcall_remote_tlb_flush' is enabled.
> >
> > This patch, instead of open coding a new flush algorithm, adapts the one
> > already used by the HVMOP_flush_tlbs Xen hypercall. The implementation
> is
> > modified to allow TLB flushing a subset of a domain's vCPUs. A callback
> > function determines whether or not a vCPU requires flushing. This
> mechanism
> > was chosen because, while it is the case that the currently implemented
> > viridian hypercalls specify a vCPU mask, there are newer variants which
> > specify a sparse HV_VP_SET and thus use of a callback will avoid needing
> to
> > expose details of this outside of the viridian subsystem if and when
> those
> > newer variants are implemented.
> >
> > NOTE: Use of the common flush function requires that the hypercalls are
> >       restartable and so, with this patch applied, viridian_hypercall()
> >       can now return HVM_HCALL_preempted. This is safe as no
> modification
> >       to struct cpu_user_regs is done before the return.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Jan Beulich <jbeulich@suse.com>
> > Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> >
> > v2:
> >  - Use cpumask_scratch
> 
> That's not a good idea. cpumask_scratch may be used from other cpus as
> long as the respectice scheduler lock is being held. See the comment in
> include/xen/sched-if.h:
> 
> /*
>  * Scratch space, for avoiding having too many cpumask_t on the stack.
>  * Within each scheduler, when using the scratch mask of one pCPU:
>  * - the pCPU must belong to the scheduler,
>  * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
>  *   lock).
>  */
> 
> So please don't use cpumask_scratch outside the scheduler!

Ah, yes, it's because of cpumask_scratch_cpu()... I'd indeed missed that. In which case a dedicated flush_cpumask is still required.

  Paul

> 
> 
> Juergen
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation
  2019-02-14 12:38   ` Paul Durrant
@ 2019-02-14 13:12     ` Jan Beulich
  2019-02-14 13:42       ` Andrew Cooper
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2019-02-14 13:12 UTC (permalink / raw)
  To: Paul Durrant, Juergen Gross
  Cc: Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 14.02.19 at 13:38, <Paul.Durrant@citrix.com> wrote:
>> From: Juergen Gross [mailto:jgross@suse.com]
>> Sent: 14 February 2019 12:35
>> 
>> On 14/02/2019 13:10, Paul Durrant wrote:
>> > v2:
>> >  - Use cpumask_scratch
>> 
>> That's not a good idea. cpumask_scratch may be used from other cpus as
>> long as the respectice scheduler lock is being held. See the comment in
>> include/xen/sched-if.h:
>> 
>> /*
>>  * Scratch space, for avoiding having too many cpumask_t on the stack.
>>  * Within each scheduler, when using the scratch mask of one pCPU:
>>  * - the pCPU must belong to the scheduler,
>>  * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
>>  *   lock).
>>  */
>> 
>> So please don't use cpumask_scratch outside the scheduler!
> 
> Ah, yes, it's because of cpumask_scratch_cpu()... I'd indeed missed that. In 
> which case a dedicated flush_cpumask is still required.

And I didn't recall this aspect either - I'm sorry for misguiding you.
So Jürgen - thanks for spotting!

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation
  2019-02-14 13:12     ` Jan Beulich
@ 2019-02-14 13:42       ` Andrew Cooper
  0 siblings, 0 replies; 5+ messages in thread
From: Andrew Cooper @ 2019-02-14 13:42 UTC (permalink / raw)
  To: Jan Beulich, Paul Durrant, Juergen Gross
  Cc: xen-devel, Wei Liu, Roger Pau Monne

On 14/02/2019 13:12, Jan Beulich wrote:
>>>> On 14.02.19 at 13:38, <Paul.Durrant@citrix.com> wrote:
>>> From: Juergen Gross [mailto:jgross@suse.com]
>>> Sent: 14 February 2019 12:35
>>>
>>> On 14/02/2019 13:10, Paul Durrant wrote:
>>>> v2:
>>>>  - Use cpumask_scratch
>>> That's not a good idea. cpumask_scratch may be used from other cpus as
>>> long as the respectice scheduler lock is being held. See the comment in
>>> include/xen/sched-if.h:
>>>
>>> /*
>>>  * Scratch space, for avoiding having too many cpumask_t on the stack.
>>>  * Within each scheduler, when using the scratch mask of one pCPU:
>>>  * - the pCPU must belong to the scheduler,
>>>  * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
>>>  *   lock).
>>>  */
>>>
>>> So please don't use cpumask_scratch outside the scheduler!
>> Ah, yes, it's because of cpumask_scratch_cpu()... I'd indeed missed that. In 
>> which case a dedicated flush_cpumask is still required.
> And I didn't recall this aspect either - I'm sorry for misguiding you.
> So Jürgen - thanks for spotting!

Can cpumask_scratch move into a scheduler private header file so the
compiler won't let us accidentally make this mistake again?

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-02-14 13:42 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-14 12:10 [PATCH v2] viridian: fix the HvFlushVirtualAddress/List hypercall implementation Paul Durrant
2019-02-14 12:34 ` Juergen Gross
2019-02-14 12:38   ` Paul Durrant
2019-02-14 13:12     ` Jan Beulich
2019-02-14 13:42       ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.