All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
@ 2019-02-11  9:13 Razvan Cojocaru
  2019-02-11 10:10 ` Jan Beulich
  0 siblings, 1 reply; 10+ messages in thread
From: Razvan Cojocaru @ 2019-02-11  9:13 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, wei.liu2, jbeulich, Razvan Cojocaru, george.dunlap,
	andrew.cooper3, jun.nakajima, roger.pau

HVMOP_altp2m_set_domain_state does not domain_pause(), presumably
on purpose (as it was originally supposed to cater to a in-guest
agent, and a domain pausing itself is not a good idea).

This can lead to domain crashes in the vmx_vmexit_handler() code
that checks if the guest has the ability to switch EPTP without an
exit. That code can __vmread() the host p2m's EPT_POINTER
(before HVMOP_altp2m_set_domain_state "for_each_vcpu()" has a
chance to run altp2m_vcpu_initialise(), but after
d->arch.altp2m_active is set).

This patch reorganizes the code so that d->arch.altp2m_active
is set to true only after all the init work has been done, and
to false before the uninit work begins. This required adding
a new bool parameter altp2m_vcpu_update_p2m(), which relied
on d->arch.altp2m_active being set before it's called.

p2m_get_altp2m() now returns NULL if !altp2m_active(), to
prevent it from returning a "valid" altp2m pointer between
the moment where d->arch.altp2m_active = false and the
point when v->p2midx actually becomes INVALID_ALTP2M.

While at it, I've changed a couple of bool_t's to bool's.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

---
Changes since V2:
 - Removed leftover asm-x86/altp2m.h changes.
 - nstate = !!a.u.domain_state.state; becomes
   nstate = a.u.domain_state.state;
 - Removed the if() and else in do_altp2m_op() as
   recommended by Jan.
 - Using true explicitly instead of altp2m_active(d) for
   altp2m_vcpu_update_p2m() in p2m_switch_vcpu_altp2m_by_id()
   and p2m_switch_domain_altp2m_by_id().
 - Updated patch description.
 - Modified p2m_get_altp2m() to return NULL if !altp2m_active().
---
 xen/arch/x86/hvm/hvm.c        | 16 +++++++++++++---
 xen/arch/x86/hvm/vmx/vmx.c    |  4 ++--
 xen/arch/x86/mm/altp2m.c      |  4 ++--
 xen/arch/x86/mm/p2m.c         |  4 ++--
 xen/include/asm-x86/domain.h  |  2 +-
 xen/include/asm-x86/hvm/hvm.h |  6 +++---
 xen/include/asm-x86/p2m.h     |  8 +++++++-
 7 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 21944e9..50d896d 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4525,7 +4525,7 @@ static int do_altp2m_op(
     case HVMOP_altp2m_set_domain_state:
     {
         struct vcpu *v;
-        bool_t ostate;
+        bool ostate, nstate;
 
         if ( nestedhvm_enabled(d) )
         {
@@ -4534,12 +4534,15 @@ static int do_altp2m_op(
         }
 
         ostate = d->arch.altp2m_active;
-        d->arch.altp2m_active = !!a.u.domain_state.state;
+        nstate = a.u.domain_state.state;
 
         /* If the alternate p2m state has changed, handle appropriately */
-        if ( d->arch.altp2m_active != ostate &&
+        if ( nstate != ostate &&
              (ostate || !(rc = p2m_init_altp2m_by_id(d, 0))) )
         {
+            /* First mark altp2m as disabled, then altp2m_vcpu_destroy(). */
+            d->arch.altp2m_active = false;
+
             for_each_vcpu( d, v )
             {
                 if ( !ostate )
@@ -4550,7 +4553,14 @@ static int do_altp2m_op(
 
             if ( ostate )
                 p2m_flush_altp2m(d);
+
+            /*
+             * Wait until altp2m_vcpu_initialise() is done before marking
+             * altp2m as being enabled for the domain.
+             */
+            d->arch.altp2m_active = nstate;
         }
+
         break;
     }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 64af8bf..e542568 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2150,13 +2150,13 @@ static bool_t vmx_is_singlestep_supported(void)
     return !!cpu_has_monitor_trap_flag;
 }
 
-static void vmx_vcpu_update_eptp(struct vcpu *v)
+static void vmx_vcpu_update_eptp(struct vcpu *v, bool altp2m_enabled)
 {
     struct domain *d = v->domain;
     struct p2m_domain *p2m = NULL;
     struct ept_data *ept;
 
-    if ( altp2m_active(d) )
+    if ( altp2m_enabled )
         p2m = p2m_get_altp2m(v);
     if ( !p2m )
         p2m = p2m_get_hostp2m(d);
diff --git a/xen/arch/x86/mm/altp2m.c b/xen/arch/x86/mm/altp2m.c
index 930bdc2..c51303b 100644
--- a/xen/arch/x86/mm/altp2m.c
+++ b/xen/arch/x86/mm/altp2m.c
@@ -39,7 +39,7 @@ altp2m_vcpu_initialise(struct vcpu *v)
     vcpu_altp2m(v).p2midx = 0;
     atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
 
-    altp2m_vcpu_update_p2m(v);
+    altp2m_vcpu_update_p2m(v, true);
 
     if ( v != current )
         vcpu_unpause(v);
@@ -58,7 +58,7 @@ altp2m_vcpu_destroy(struct vcpu *v)
 
     altp2m_vcpu_reset(v);
 
-    altp2m_vcpu_update_p2m(v);
+    altp2m_vcpu_update_p2m(v, false);
     altp2m_vcpu_update_vmfunc_ve(v);
 
     if ( v != current )
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index d14ce57..6f991f8 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2332,7 +2332,7 @@ bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
             atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
             vcpu_altp2m(v).p2midx = idx;
             atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-            altp2m_vcpu_update_p2m(v);
+            altp2m_vcpu_update_p2m(v, true);
         }
         rc = 1;
     }
@@ -2573,7 +2573,7 @@ int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
                 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
                 vcpu_altp2m(v).p2midx = idx;
                 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-                altp2m_vcpu_update_p2m(v);
+                altp2m_vcpu_update_p2m(v, true);
             }
 
         rc = 0;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 277f99f..a4e8f5a 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -337,7 +337,7 @@ struct arch_domain
     mm_lock_t nested_p2m_lock;
 
     /* altp2m: allow multiple copies of host p2m */
-    bool_t altp2m_active;
+    bool altp2m_active;
     struct p2m_domain *altp2m_p2m[MAX_ALTP2M];
     mm_lock_t altp2m_list_lock;
     uint64_t *altp2m_eptp;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0a10b51..de0969b 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -214,7 +214,7 @@ struct hvm_function_table {
     bool_t (*is_singlestep_supported)(void);
 
     /* Alternate p2m */
-    void (*altp2m_vcpu_update_p2m)(struct vcpu *v);
+    void (*altp2m_vcpu_update_p2m)(struct vcpu *v, bool altp2m_enabled);
     void (*altp2m_vcpu_update_vmfunc_ve)(struct vcpu *v);
     bool_t (*altp2m_vcpu_emulate_ve)(struct vcpu *v);
     int (*altp2m_vcpu_emulate_vmfunc)(const struct cpu_user_regs *regs);
@@ -639,10 +639,10 @@ static inline bool hvm_altp2m_supported(void)
 }
 
 /* updates the current hardware p2m */
-static inline void altp2m_vcpu_update_p2m(struct vcpu *v)
+static inline void altp2m_vcpu_update_p2m(struct vcpu *v, bool altp2m_enabled)
 {
     if ( hvm_funcs.altp2m_vcpu_update_p2m )
-        hvm_funcs.altp2m_vcpu_update_p2m(v);
+        hvm_funcs.altp2m_vcpu_update_p2m(v, altp2m_enabled);
 }
 
 /* updates VMCS fields related to VMFUNC and #VE */
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 2095076..a683d20 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -28,6 +28,7 @@
 
 #include <xen/paging.h>
 #include <xen/mem_access.h>
+#include <asm/altp2m.h>
 #include <asm/mem_sharing.h>
 #include <asm/page.h>    /* for pagetable_t */
 
@@ -847,7 +848,12 @@ void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
 /* get current alternate p2m table */
 static inline struct p2m_domain *p2m_get_altp2m(struct vcpu *v)
 {
-    unsigned int index = vcpu_altp2m(v).p2midx;
+    unsigned int index;
+
+    if ( !altp2m_active(v->domain) )
+        return NULL;
+
+    index = vcpu_altp2m(v).p2midx;
 
     if ( index == INVALID_ALTP2M )
         return NULL;
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11  9:13 [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race Razvan Cojocaru
@ 2019-02-11 10:10 ` Jan Beulich
  2019-02-11 10:57   ` Razvan Cojocaru
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2019-02-11 10:10 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

>>> On 11.02.19 at 10:13, <rcojocaru@bitdefender.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2150,13 +2150,13 @@ static bool_t vmx_is_singlestep_supported(void)
>      return !!cpu_has_monitor_trap_flag;
>  }
>  
> -static void vmx_vcpu_update_eptp(struct vcpu *v)
> +static void vmx_vcpu_update_eptp(struct vcpu *v, bool altp2m_enabled)
>  {
>      struct domain *d = v->domain;
>      struct p2m_domain *p2m = NULL;
>      struct ept_data *ept;
>  
> -    if ( altp2m_active(d) )
> +    if ( altp2m_enabled )
>          p2m = p2m_get_altp2m(v);
>      if ( !p2m )
>          p2m = p2m_get_hostp2m(d);

With the change you now make to p2m_get_altp2m(), this looks to be
a benign change. Which to me would suggest to either leave the code
alone, or to drop the if() (but - again - not its body) altogether. At
which point the code could be further streamlined, as then the NULL
initializer can go away and the assignment (or then perhaps initializer)
could become "p2m = p2m_get_altp2m(v) ?: p2m_get_hostp2m(d)".
(Generally I'd recommend to leave out the change here, and do the
transformation in a follow-on patch.)

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11 10:10 ` Jan Beulich
@ 2019-02-11 10:57   ` Razvan Cojocaru
  2019-02-11 13:46     ` Razvan Cojocaru
  0 siblings, 1 reply; 10+ messages in thread
From: Razvan Cojocaru @ 2019-02-11 10:57 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

On 2/11/19 12:10 PM, Jan Beulich wrote:
>>>> On 11.02.19 at 10:13, <rcojocaru@bitdefender.com> wrote:
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -2150,13 +2150,13 @@ static bool_t vmx_is_singlestep_supported(void)
>>       return !!cpu_has_monitor_trap_flag;
>>   }
>>   
>> -static void vmx_vcpu_update_eptp(struct vcpu *v)
>> +static void vmx_vcpu_update_eptp(struct vcpu *v, bool altp2m_enabled)
>>   {
>>       struct domain *d = v->domain;
>>       struct p2m_domain *p2m = NULL;
>>       struct ept_data *ept;
>>   
>> -    if ( altp2m_active(d) )
>> +    if ( altp2m_enabled )
>>           p2m = p2m_get_altp2m(v);
>>       if ( !p2m )
>>           p2m = p2m_get_hostp2m(d);
> 
> With the change you now make to p2m_get_altp2m(), this looks to be
> a benign change. Which to me would suggest to either leave the code
> alone, or to drop the if() (but - again - not its body) altogether. At
> which point the code could be further streamlined, as then the NULL
> initializer can go away and the assignment (or then perhaps initializer)
> could become "p2m = p2m_get_altp2m(v) ?: p2m_get_hostp2m(d)".
> (Generally I'd recommend to leave out the change here, and do the
> transformation in a follow-on patch.)

Thanks for noticing, actually this appears to invalidate the whole 
purpose of the patch (I should have tested this more before sumbitting 
V3, sorry).

The whole point of the new boolean is to have p2m assigned an altp2m 
regardless of altp2m_active() (hence the change) - which now no longer 
happens. I got carried away with this change.

The fact that this is so easy to get wrong is the reason why I've 
preferred the domain_pause() solution. There appears to be no clean way 
to fix this otherwise, and if this is so easy to misunderstand it'll 
break just as easily with further changes.

I suppose I could just pass the bool along to p2m_get_altp2m() (and 
indeed remove the if())...


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11 10:57   ` Razvan Cojocaru
@ 2019-02-11 13:46     ` Razvan Cojocaru
  2019-02-11 16:59       ` Jan Beulich
  0 siblings, 1 reply; 10+ messages in thread
From: Razvan Cojocaru @ 2019-02-11 13:46 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

On 2/11/19 12:57 PM, Razvan Cojocaru wrote:
> On 2/11/19 12:10 PM, Jan Beulich wrote:
>>>>> On 11.02.19 at 10:13, <rcojocaru@bitdefender.com> wrote:
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>> @@ -2150,13 +2150,13 @@ static bool_t vmx_is_singlestep_supported(void)
>>>       return !!cpu_has_monitor_trap_flag;
>>>   }
>>> -static void vmx_vcpu_update_eptp(struct vcpu *v)
>>> +static void vmx_vcpu_update_eptp(struct vcpu *v, bool altp2m_enabled)
>>>   {
>>>       struct domain *d = v->domain;
>>>       struct p2m_domain *p2m = NULL;
>>>       struct ept_data *ept;
>>> -    if ( altp2m_active(d) )
>>> +    if ( altp2m_enabled )
>>>           p2m = p2m_get_altp2m(v);
>>>       if ( !p2m )
>>>           p2m = p2m_get_hostp2m(d);
>>
>> With the change you now make to p2m_get_altp2m(), this looks to be
>> a benign change. Which to me would suggest to either leave the code
>> alone, or to drop the if() (but - again - not its body) altogether. At
>> which point the code could be further streamlined, as then the NULL
>> initializer can go away and the assignment (or then perhaps initializer)
>> could become "p2m = p2m_get_altp2m(v) ?: p2m_get_hostp2m(d)".
>> (Generally I'd recommend to leave out the change here, and do the
>> transformation in a follow-on patch.)
> 
> Thanks for noticing, actually this appears to invalidate the whole 
> purpose of the patch (I should have tested this more before sumbitting 
> V3, sorry).
> 
> The whole point of the new boolean is to have p2m assigned an altp2m 
> regardless of altp2m_active() (hence the change) - which now no longer 
> happens. I got carried away with this change.
> 
> The fact that this is so easy to get wrong is the reason why I've 
> preferred the domain_pause() solution. There appears to be no clean way 
> to fix this otherwise, and if this is so easy to misunderstand it'll 
> break just as easily with further changes.
> 
> I suppose I could just pass the bool along to p2m_get_altp2m() (and 
> indeed remove the if())...

I think the best that can be done here is to check if altp2m_active() 
early in p2m_switch_vcpu_altp2m_by_id() and 
p2m_switch_domain_altp2m_by_id(), then bail if it's not active. Since 
these are only called by HVMOP_altp2m_* (and thus serialized by the 
domain lock), it should be enough WRT HVMOP_altp2m_set_domain_state.

This of course means reverting p2m_get_altp2m() to its original 
non-intuitive state of returning a valid altp2m pointer even when 
altp2m_active() is false.

I see no other way out of this (aside from the domain_pause() fix).


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11 13:46     ` Razvan Cojocaru
@ 2019-02-11 16:59       ` Jan Beulich
  2019-02-11 17:21         ` Razvan Cojocaru
  2019-02-12 10:11         ` Razvan Cojocaru
  0 siblings, 2 replies; 10+ messages in thread
From: Jan Beulich @ 2019-02-11 16:59 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

>>> On 11.02.19 at 14:46, <rcojocaru@bitdefender.com> wrote:
> On 2/11/19 12:57 PM, Razvan Cojocaru wrote:
>> On 2/11/19 12:10 PM, Jan Beulich wrote:
>>>>>> On 11.02.19 at 10:13, <rcojocaru@bitdefender.com> wrote:
>>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>>> @@ -2150,13 +2150,13 @@ static bool_t vmx_is_singlestep_supported(void)
>>>>       return !!cpu_has_monitor_trap_flag;
>>>>   }
>>>> -static void vmx_vcpu_update_eptp(struct vcpu *v)
>>>> +static void vmx_vcpu_update_eptp(struct vcpu *v, bool altp2m_enabled)
>>>>   {
>>>>       struct domain *d = v->domain;
>>>>       struct p2m_domain *p2m = NULL;
>>>>       struct ept_data *ept;
>>>> -    if ( altp2m_active(d) )
>>>> +    if ( altp2m_enabled )
>>>>           p2m = p2m_get_altp2m(v);
>>>>       if ( !p2m )
>>>>           p2m = p2m_get_hostp2m(d);
>>>
>>> With the change you now make to p2m_get_altp2m(), this looks to be
>>> a benign change. Which to me would suggest to either leave the code
>>> alone, or to drop the if() (but - again - not its body) altogether. At
>>> which point the code could be further streamlined, as then the NULL
>>> initializer can go away and the assignment (or then perhaps initializer)
>>> could become "p2m = p2m_get_altp2m(v) ?: p2m_get_hostp2m(d)".
>>> (Generally I'd recommend to leave out the change here, and do the
>>> transformation in a follow-on patch.)
>> 
>> Thanks for noticing, actually this appears to invalidate the whole 
>> purpose of the patch (I should have tested this more before sumbitting 
>> V3, sorry).
>> 
>> The whole point of the new boolean is to have p2m assigned an altp2m 
>> regardless of altp2m_active() (hence the change) - which now no longer 
>> happens. I got carried away with this change.
>> 
>> The fact that this is so easy to get wrong is the reason why I've 
>> preferred the domain_pause() solution. There appears to be no clean way 
>> to fix this otherwise, and if this is so easy to misunderstand it'll 
>> break just as easily with further changes.
>> 
>> I suppose I could just pass the bool along to p2m_get_altp2m() (and 
>> indeed remove the if())...
> 
> I think the best that can be done here is to check if altp2m_active() 
> early in p2m_switch_vcpu_altp2m_by_id() and 
> p2m_switch_domain_altp2m_by_id(), then bail if it's not active. Since 
> these are only called by HVMOP_altp2m_* (and thus serialized by the 
> domain lock), it should be enough WRT HVMOP_altp2m_set_domain_state.

I'm confused: Where do you see the domain lock used there?
Plus I can't see p2m_switch_vcpu_altp2m_by_id() called for
any HVMOP_altp2m_* at all. One of the actual callers is guarded
by altp2m_active(), but the other isn't.

> This of course means reverting p2m_get_altp2m() to its original 
> non-intuitive state of returning a valid altp2m pointer even when 
> altp2m_active() is false.

Yeah, this looks to be unavoidable.

> I see no other way out of this (aside from the domain_pause() fix).

If only that one would have been a complete fix, rather than just a
partial one.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11 16:59       ` Jan Beulich
@ 2019-02-11 17:21         ` Razvan Cojocaru
  2019-02-12  7:31           ` Jan Beulich
  2019-02-12 10:11         ` Razvan Cojocaru
  1 sibling, 1 reply; 10+ messages in thread
From: Razvan Cojocaru @ 2019-02-11 17:21 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

On 2/11/19 6:59 PM, Jan Beulich wrote:
>>> Thanks for noticing, actually this appears to invalidate the whole 
>>> purpose of the patch (I should have tested this more before sumbitting 
>>> V3, sorry).
>>>
>>> The whole point of the new boolean is to have p2m assigned an altp2m 
>>> regardless of altp2m_active() (hence the change) - which now no longer 
>>> happens. I got carried away with this change.
>>>
>>> The fact that this is so easy to get wrong is the reason why I've 
>>> preferred the domain_pause() solution. There appears to be no clean way 
>>> to fix this otherwise, and if this is so easy to misunderstand it'll 
>>> break just as easily with further changes.
>>>
>>> I suppose I could just pass the bool along to p2m_get_altp2m() (and 
>>> indeed remove the if())...
>>
>> I think the best that can be done here is to check if altp2m_active() 
>> early in p2m_switch_vcpu_altp2m_by_id() and 
>> p2m_switch_domain_altp2m_by_id(), then bail if it's not active. Since 
>> these are only called by HVMOP_altp2m_* (and thus serialized by the 
>> domain lock), it should be enough WRT HVMOP_altp2m_set_domain_state.
> 
> I'm confused: Where do you see the domain lock used there?
> Plus I can't see p2m_switch_vcpu_altp2m_by_id() called for
> any HVMOP_altp2m_* at all. One of the actual callers is guarded
> by altp2m_active(), but the other isn't.

do_altp2m_op() does d = rcu_lock_domain_by_any_id(a.domain);, and
unlocks it before it exits.

But you're right, p2m_switch_vcpu_altp2m_by_id() is not called for any
HVMOP_altp2m_*, I've misread that. Hence, I believe both callers
ofp2m_switch_vcpu_altp2m_by_id() may race with HVMOP_altp2m_*.

Would you like me to add the altp2m_active() check in both
p2m_switch_domain_altp2m_by_id() and p2m_switch_vcpu_altp2m_by_id(), and
make it harder to race (it still won't be impossible, since the bool may
become false between the check and the actual function logic for
p2m_switch_vcpu_altp2m_by_id(), as you've noticed)?

>> I see no other way out of this (aside from the domain_pause() fix).
> 
> If only that one would have been a complete fix, rather than just a
> partial one.

Agreed, but that one at least clearly fixes the external case, whereas
this doesn't seem to cover all corner cases for any situation.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11 17:21         ` Razvan Cojocaru
@ 2019-02-12  7:31           ` Jan Beulich
  2019-02-12  9:23             ` Razvan Cojocaru
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2019-02-12  7:31 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

>>> On 11.02.19 at 18:21, <rcojocaru@bitdefender.com> wrote:
> On 2/11/19 6:59 PM, Jan Beulich wrote:
>>>> Thanks for noticing, actually this appears to invalidate the whole 
>>>> purpose of the patch (I should have tested this more before sumbitting 
>>>> V3, sorry).
>>>>
>>>> The whole point of the new boolean is to have p2m assigned an altp2m 
>>>> regardless of altp2m_active() (hence the change) - which now no longer 
>>>> happens. I got carried away with this change.
>>>>
>>>> The fact that this is so easy to get wrong is the reason why I've 
>>>> preferred the domain_pause() solution. There appears to be no clean way 
>>>> to fix this otherwise, and if this is so easy to misunderstand it'll 
>>>> break just as easily with further changes.
>>>>
>>>> I suppose I could just pass the bool along to p2m_get_altp2m() (and 
>>>> indeed remove the if())...
>>>
>>> I think the best that can be done here is to check if altp2m_active() 
>>> early in p2m_switch_vcpu_altp2m_by_id() and 
>>> p2m_switch_domain_altp2m_by_id(), then bail if it's not active. Since 
>>> these are only called by HVMOP_altp2m_* (and thus serialized by the 
>>> domain lock), it should be enough WRT HVMOP_altp2m_set_domain_state.
>> 
>> I'm confused: Where do you see the domain lock used there?
>> Plus I can't see p2m_switch_vcpu_altp2m_by_id() called for
>> any HVMOP_altp2m_* at all. One of the actual callers is guarded
>> by altp2m_active(), but the other isn't.
> 
> do_altp2m_op() does d = rcu_lock_domain_by_any_id(a.domain);, and
> unlocks it before it exits.

But that's not the "domain lock". This is just making sure the domain
won't disappear behind your back.

> But you're right, p2m_switch_vcpu_altp2m_by_id() is not called for any
> HVMOP_altp2m_*, I've misread that. Hence, I believe both callers
> ofp2m_switch_vcpu_altp2m_by_id() may race with HVMOP_altp2m_*.
> 
> Would you like me to add the altp2m_active() check in both
> p2m_switch_domain_altp2m_by_id() and p2m_switch_vcpu_altp2m_by_id(), and
> make it harder to race (it still won't be impossible, since the bool may
> become false between the check and the actual function logic for
> p2m_switch_vcpu_altp2m_by_id(), as you've noticed)?

Well, altp2m being Tech Preview, any partial fix _could_ do in
principle. But personally I dislike such half hearted approaches,
and hence I'd recommend to address the issue in full, i.e.
eliminate race potential altogether. In the end you're going to
be bitten more than me by not getting this into proper shape.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-12  7:31           ` Jan Beulich
@ 2019-02-12  9:23             ` Razvan Cojocaru
  0 siblings, 0 replies; 10+ messages in thread
From: Razvan Cojocaru @ 2019-02-12  9:23 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

On 2/12/19 9:31 AM, Jan Beulich wrote:
>>>> On 11.02.19 at 18:21, <rcojocaru@bitdefender.com> wrote:
>> On 2/11/19 6:59 PM, Jan Beulich wrote:
>>>>> Thanks for noticing, actually this appears to invalidate the whole
>>>>> purpose of the patch (I should have tested this more before sumbitting
>>>>> V3, sorry).
>>>>>
>>>>> The whole point of the new boolean is to have p2m assigned an altp2m
>>>>> regardless of altp2m_active() (hence the change) - which now no longer
>>>>> happens. I got carried away with this change.
>>>>>
>>>>> The fact that this is so easy to get wrong is the reason why I've
>>>>> preferred the domain_pause() solution. There appears to be no clean way
>>>>> to fix this otherwise, and if this is so easy to misunderstand it'll
>>>>> break just as easily with further changes.
>>>>>
>>>>> I suppose I could just pass the bool along to p2m_get_altp2m() (and
>>>>> indeed remove the if())...
>>>>
>>>> I think the best that can be done here is to check if altp2m_active()
>>>> early in p2m_switch_vcpu_altp2m_by_id() and
>>>> p2m_switch_domain_altp2m_by_id(), then bail if it's not active. Since
>>>> these are only called by HVMOP_altp2m_* (and thus serialized by the
>>>> domain lock), it should be enough WRT HVMOP_altp2m_set_domain_state.
>>>
>>> I'm confused: Where do you see the domain lock used there?
>>> Plus I can't see p2m_switch_vcpu_altp2m_by_id() called for
>>> any HVMOP_altp2m_* at all. One of the actual callers is guarded
>>> by altp2m_active(), but the other isn't.
>>
>> do_altp2m_op() does d = rcu_lock_domain_by_any_id(a.domain);, and
>> unlocks it before it exits.
> 
> But that's not the "domain lock". This is just making sure the domain
> won't disappear behind your back.
> 
>> But you're right, p2m_switch_vcpu_altp2m_by_id() is not called for any
>> HVMOP_altp2m_*, I've misread that. Hence, I believe both callers
>> ofp2m_switch_vcpu_altp2m_by_id() may race with HVMOP_altp2m_*.
>>
>> Would you like me to add the altp2m_active() check in both
>> p2m_switch_domain_altp2m_by_id() and p2m_switch_vcpu_altp2m_by_id(), and
>> make it harder to race (it still won't be impossible, since the bool may
>> become false between the check and the actual function logic for
>> p2m_switch_vcpu_altp2m_by_id(), as you've noticed)?
> 
> Well, altp2m being Tech Preview, any partial fix _could_ do in
> principle. But personally I dislike such half hearted approaches,
> and hence I'd recommend to address the issue in full, i.e.
> eliminate race potential altogether. In the end you're going to
> be bitten more than me by not getting this into proper shape.

I'll attempt a V4 doing my best to check altp2m_active() then, and see 
if that's at least satisfactory for sane use-cases.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-11 16:59       ` Jan Beulich
  2019-02-11 17:21         ` Razvan Cojocaru
@ 2019-02-12 10:11         ` Razvan Cojocaru
  2019-02-12 11:05           ` Jan Beulich
  1 sibling, 1 reply; 10+ messages in thread
From: Razvan Cojocaru @ 2019-02-12 10:11 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

On 2/11/19 6:59 PM, Jan Beulich wrote:
> Plus I can't see p2m_switch_vcpu_altp2m_by_id() called for
> any HVMOP_altp2m_* at all. One of the actual callers is guarded
> by altp2m_active(), but the other isn't.

Actually I see that both places are guarded by altp2m_active().

In p2m.c we have:

2312 void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
2313 {
2314     if ( altp2m_active(v->domain) )
2315         p2m_switch_vcpu_altp2m_by_id(v, idx);
2316 }

and in vmx.c:

2225 static int vmx_vcpu_emulate_vmfunc(const struct cpu_user_regs *regs)
2226 {
2227     int rc = X86EMUL_EXCEPTION;
2228     struct vcpu *curr = current;
2229
2230     if ( !cpu_has_vmx_vmfunc && altp2m_active(curr->domain) &&
2231          regs->eax == 0 &&
2232          p2m_switch_vcpu_altp2m_by_id(curr, regs->ecx) )
2233         rc = X86EMUL_OKAY;
2234
2235     return rc;
2236 }

here there's an "&& altp2m_active(curr->domain)" in the if().

So I suppose in our scenario all that's needed it a similar check here:

4636     case HVMOP_altp2m_switch_p2m:
4637         rc = p2m_switch_domain_altp2m_by_id(d, a.u.view.view);
4638         break;

for the other function, p2m_switch_domain_altp2m_by_id().

Unless I'm missing something.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race
  2019-02-12 10:11         ` Razvan Cojocaru
@ 2019-02-12 11:05           ` Jan Beulich
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2019-02-12 11:05 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Kevin Tian, Wei Liu, George Dunlap, Andrew Cooper, Jun Nakajima,
	xen-devel, Roger Pau Monne

>>> On 12.02.19 at 11:11, <rcojocaru@bitdefender.com> wrote:
> On 2/11/19 6:59 PM, Jan Beulich wrote:
>> Plus I can't see p2m_switch_vcpu_altp2m_by_id() called for
>> any HVMOP_altp2m_* at all. One of the actual callers is guarded
>> by altp2m_active(), but the other isn't.
> 
> Actually I see that both places are guarded by altp2m_active().
> 
> In p2m.c we have:
> 
> 2312 void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
> 2313 {
> 2314     if ( altp2m_active(v->domain) )
> 2315         p2m_switch_vcpu_altp2m_by_id(v, idx);
> 2316 }
> 
> and in vmx.c:
> 
> 2225 static int vmx_vcpu_emulate_vmfunc(const struct cpu_user_regs *regs)
> 2226 {
> 2227     int rc = X86EMUL_EXCEPTION;
> 2228     struct vcpu *curr = current;
> 2229
> 2230     if ( !cpu_has_vmx_vmfunc && altp2m_active(curr->domain) &&
> 2231          regs->eax == 0 &&
> 2232          p2m_switch_vcpu_altp2m_by_id(curr, regs->ecx) )
> 2233         rc = X86EMUL_OKAY;
> 2234
> 2235     return rc;
> 2236 }
> 
> here there's an "&& altp2m_active(curr->domain)" in the if().

Oh, so I must have overlooked one of the two, sorry.

> So I suppose in our scenario all that's needed it a similar check here:
> 
> 4636     case HVMOP_altp2m_switch_p2m:
> 4637         rc = p2m_switch_domain_altp2m_by_id(d, a.u.view.view);
> 4638         break;
> 
> for the other function, p2m_switch_domain_altp2m_by_id().
> 
> Unless I'm missing something.

Perhaps. Question is whether outside of your scenario similar checks
are missing elsewhere.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-02-12 11:06 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-11  9:13 [PATCH for-4.12 V3] x86/altp2m: fix HVMOP_altp2m_set_domain_state race Razvan Cojocaru
2019-02-11 10:10 ` Jan Beulich
2019-02-11 10:57   ` Razvan Cojocaru
2019-02-11 13:46     ` Razvan Cojocaru
2019-02-11 16:59       ` Jan Beulich
2019-02-11 17:21         ` Razvan Cojocaru
2019-02-12  7:31           ` Jan Beulich
2019-02-12  9:23             ` Razvan Cojocaru
2019-02-12 10:11         ` Razvan Cojocaru
2019-02-12 11:05           ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.