All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/mm: XSA-389 follow-up
@ 2021-12-01 11:00 Jan Beulich
  2021-12-01 11:01 ` [PATCH 1/2] x86/mm: don't open-code p2m_is_pod() Jan Beulich
                   ` (6 more replies)
  0 siblings, 7 replies; 24+ messages in thread
From: Jan Beulich @ 2021-12-01 11:00 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

1: mm: don't open-code p2m_is_pod()
2: PoD: move increment of entry count

Jan



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] x86/mm: don't open-code p2m_is_pod()
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
@ 2021-12-01 11:01 ` Jan Beulich
  2021-12-01 11:22   ` Andrew Cooper
  2021-12-02 19:10   ` Tim Deegan
  2021-12-01 11:02 ` [PATCH 2/2] x86/PoD: move increment of entry count Jan Beulich
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 24+ messages in thread
From: Jan Beulich @ 2021-12-01 11:01 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné,
	George Dunlap, Kevin Tian, Jun Nakajima, Tim Deegan

Replace all comparisons against p2m_populate_on_demand (outside of
switch() statements) with the designated predicate.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -344,7 +344,7 @@ static int ept_next_level(struct p2m_dom
     {
         int rc;
 
-        if ( e.sa_p2mt == p2m_populate_on_demand )
+        if ( p2m_is_pod(e.sa_p2mt) )
             return GUEST_TABLE_POD_PAGE;
 
         if ( read_only )
@@ -1071,7 +1071,7 @@ static mfn_t ept_get_entry(struct p2m_do
     index = gfn_remainder >> (i * EPT_TABLE_ORDER);
     ept_entry = table + index;
 
-    if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
+    if ( p2m_is_pod(ept_entry->sa_p2mt) )
     {
         if ( !(q & P2M_ALLOC) )
         {
@@ -1478,7 +1478,7 @@ static void ept_dump_p2m_table(unsigned
             ept_entry = table + (gfn_remainder >> order);
             if ( ret != GUEST_TABLE_MAP_FAILED && is_epte_valid(ept_entry) )
             {
-                if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
+                if ( p2m_is_pod(ept_entry->sa_p2mt) )
                     printk("gfn: %13lx order: %2d PoD\n", gfn, order);
                 else
                     printk("gfn: %13lx order: %2d mfn: %13lx %c%c%c %c%c%c\n",
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -543,7 +543,7 @@ decrease_reservation(struct domain *d, g
 
         p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, &cur_order, NULL);
         n = 1UL << min(order, cur_order);
-        if ( t == p2m_populate_on_demand )
+        if ( p2m_is_pod(t) )
             pod += n;
         else if ( p2m_is_ram(t) )
             ram += n;
@@ -618,7 +618,7 @@ decrease_reservation(struct domain *d, g
         if ( order < cur_order )
             cur_order = order;
         n = 1UL << cur_order;
-        if ( t == p2m_populate_on_demand )
+        if ( p2m_is_pod(t) )
         {
             /* This shouldn't be able to fail */
             if ( p2m_set_entry(p2m, gfn_add(gfn, i), INVALID_MFN, cur_order,
@@ -1332,7 +1332,7 @@ mark_populate_on_demand(struct domain *d
 
         p2m->get_entry(p2m, gfn_add(gfn, i), &ot, &a, 0, &cur_order, NULL);
         n = 1UL << min(order, cur_order);
-        if ( ot == p2m_populate_on_demand )
+        if ( p2m_is_pod(ot) )
         {
             /* Count how many PoD entries we'll be replacing if successful */
             pod_count += n;
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -841,7 +841,7 @@ pod_retry_l3:
         flags = l3e_get_flags(*l3e);
         if ( !(flags & _PAGE_PRESENT) )
         {
-            if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
+            if ( p2m_is_pod(p2m_flags_to_type(flags)) )
             {
                 if ( q & P2M_ALLOC )
                 {
@@ -884,7 +884,7 @@ pod_retry_l2:
     if ( !(flags & _PAGE_PRESENT) )
     {
         /* PoD: Try to populate a 2-meg chunk */
-        if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
+        if ( p2m_is_pod(p2m_flags_to_type(flags)) )
         {
             if ( q & P2M_ALLOC ) {
                 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_2M) )
@@ -923,7 +923,7 @@ pod_retry_l1:
     if ( !(flags & _PAGE_PRESENT) && !p2m_is_paging(l1t) )
     {
         /* PoD: Try to populate */
-        if ( l1t == p2m_populate_on_demand )
+        if ( p2m_is_pod(l1t) )
         {
             if ( q & P2M_ALLOC ) {
                 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
@@ -1094,8 +1094,7 @@ static long p2m_pt_audit_p2m(struct p2m_
                     if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
                     {
                         if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
-                             && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
-                                  == p2m_populate_on_demand ) )
+                             && p2m_is_pod(p2m_flags_to_type(l2e_get_flags(l2e[i2]))) )
                             entry_count+=SUPERPAGE_PAGES;
                         gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
                         continue;
@@ -1132,7 +1131,7 @@ static long p2m_pt_audit_p2m(struct p2m_
                         type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
                         if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
                         {
-                            if ( type == p2m_populate_on_demand )
+                            if ( p2m_is_pod(type) )
                                 entry_count++;
                             continue;
                         }
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -992,7 +992,7 @@ guest_physmap_add_entry(struct domain *d
             ASSERT(mfn_valid(omfn));
             set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
         }
-        else if ( ot == p2m_populate_on_demand )
+        else if ( p2m_is_pod(ot) )
         {
             /* Count how man PoD entries we'll be replacing if successful */
             pod_count++;
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1476,7 +1476,7 @@ static int validate_gl4e(struct vcpu *v,
         mfn_t gl3mfn = get_gfn_query_unlocked(d, gfn_x(gl3gfn), &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(d, gl3mfn, SH_type_l3_shadow);
-        else if ( p2mt != p2m_populate_on_demand )
+        else if ( !p2m_is_pod(p2mt) )
             result |= SHADOW_SET_ERROR;
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
@@ -1535,7 +1535,7 @@ static int validate_gl3e(struct vcpu *v,
         mfn_t gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(d, gl2mfn, SH_type_l2_shadow);
-        else if ( p2mt != p2m_populate_on_demand )
+        else if ( !p2m_is_pod(p2mt) )
             result |= SHADOW_SET_ERROR;
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
@@ -1586,7 +1586,7 @@ static int validate_gl2e(struct vcpu *v,
             mfn_t gl1mfn = get_gfn_query_unlocked(d, gfn_x(gl1gfn), &p2mt);
             if ( p2m_is_ram(p2mt) )
                 sl1mfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow);
-            else if ( p2mt != p2m_populate_on_demand )
+            else if ( !p2m_is_pod(p2mt) )
                 result |= SHADOW_SET_ERROR;
         }
     }



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] x86/PoD: move increment of entry count
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
  2021-12-01 11:01 ` [PATCH 1/2] x86/mm: don't open-code p2m_is_pod() Jan Beulich
@ 2021-12-01 11:02 ` Jan Beulich
  2021-12-01 11:27   ` Andrew Cooper
  2021-12-01 11:52 ` Jan Beulich
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-12-01 11:02 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count should indicate too large a
value in preference to a too small one, to avoid functions bailing early
when they find the count is zero. Hence increments should happen ahead
of P2M updates, while decrements should happen only after. Deal with the
one place where this hasn't been the case yet.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    pod_lock(p2m);
+    p2m->pod.entry_count += (1UL << order) - pod_count;
+    pod_unlock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
-    {
-        pod_lock(p2m);
-        p2m->pod.entry_count += 1UL << order;
-        p2m->pod.entry_count -= pod_count;
-        BUG_ON(p2m->pod.entry_count < 0);
-        pod_unlock(p2m);
-
         ioreq_request_mapcache_invalidate(d);
-    }
     else if ( order )
     {
         /*
@@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
                d, gfn_l, order, rc);
         domain_crash(d);
     }
+    else if ( !pod_count )
+    {
+        pod_lock(p2m);
+        BUG_ON(!p2m->pod.entry_count);
+        --p2m->pod.entry_count;
+        pod_unlock(p2m);
+    }
 
 out:
     gfn_unlock(p2m, gfn, order);
When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count should indicate too large a
value in preference to a too small one, to avoid functions bailing early
when they find the count is zero. Hence increments should happen ahead
of P2M updates, while decrements should happen only after. Deal with the
one place where this hasn't been the case yet.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    pod_lock(p2m);
+    p2m->pod.entry_count += (1UL << order) - pod_count;
+    pod_unlock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
-    {
-        pod_lock(p2m);
-        p2m->pod.entry_count += 1UL << order;
-        p2m->pod.entry_count -= pod_count;
-        BUG_ON(p2m->pod.entry_count < 0);
-        pod_unlock(p2m);
-
         ioreq_request_mapcache_invalidate(d);
-    }
     else if ( order )
     {
         /*
@@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
                d, gfn_l, order, rc);
         domain_crash(d);
     }
+    else if ( !pod_count )
+    {
+        pod_lock(p2m);
+        BUG_ON(!p2m->pod.entry_count);
+        --p2m->pod.entry_count;
+        pod_unlock(p2m);
+    }
 
 out:
     gfn_unlock(p2m, gfn, order);



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] x86/mm: don't open-code p2m_is_pod()
  2021-12-01 11:01 ` [PATCH 1/2] x86/mm: don't open-code p2m_is_pod() Jan Beulich
@ 2021-12-01 11:22   ` Andrew Cooper
  2021-12-02 19:10   ` Tim Deegan
  1 sibling, 0 replies; 24+ messages in thread
From: Andrew Cooper @ 2021-12-01 11:22 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné,
	George Dunlap, Kevin Tian, Jun Nakajima, Tim Deegan

On 01/12/2021 11:01, Jan Beulich wrote:
> Replace all comparisons against p2m_populate_on_demand (outside of
> switch() statements) with the designated predicate.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] x86/PoD: move increment of entry count
  2021-12-01 11:02 ` [PATCH 2/2] x86/PoD: move increment of entry count Jan Beulich
@ 2021-12-01 11:27   ` Andrew Cooper
  2021-12-01 11:50     ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Andrew Cooper @ 2021-12-01 11:27 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

On 01/12/2021 11:02, Jan Beulich wrote:
> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count should indicate too large a
> value in preference to a too small one, to avoid functions bailing early
> when they find the count is zero. Hence increments should happen ahead
> of P2M updates, while decrements should happen only after. Deal with the
> one place where this hasn't been the case yet.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
>          }
>      }
>  
> +    pod_lock(p2m);
> +    p2m->pod.entry_count += (1UL << order) - pod_count;
> +    pod_unlock(p2m);
> +
>      /* Now, actually do the two-way mapping */
>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>                         p2m_populate_on_demand, p2m->default_access);
>      if ( rc == 0 )
> -    {
> -        pod_lock(p2m);
> -        p2m->pod.entry_count += 1UL << order;
> -        p2m->pod.entry_count -= pod_count;
> -        BUG_ON(p2m->pod.entry_count < 0);
> -        pod_unlock(p2m);
> -
>          ioreq_request_mapcache_invalidate(d);
> -    }
>      else if ( order )
>      {
>          /*
> @@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
>                 d, gfn_l, order, rc);
>          domain_crash(d);
>      }
> +    else if ( !pod_count )
> +    {
> +        pod_lock(p2m);
> +        BUG_ON(!p2m->pod.entry_count);
> +        --p2m->pod.entry_count;
> +        pod_unlock(p2m);
> +    }
>  
>  out:
>      gfn_unlock(p2m, gfn, order);

This email appears to contain the same patch twice, presumably split at
this point.

Which one should be reviewed?

~Andrew

> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count should indicate too large a
> value in preference to a too small one, to avoid functions bailing early
> when they find the count is zero. Hence increments should happen ahead
> of P2M updates, while decrements should happen only after. Deal with the
> one place where this hasn't been the case yet.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
>          }
>      }
>  
> +    pod_lock(p2m);
> +    p2m->pod.entry_count += (1UL << order) - pod_count;
> +    pod_unlock(p2m);
> +
>      /* Now, actually do the two-way mapping */
>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>                         p2m_populate_on_demand, p2m->default_access);
>      if ( rc == 0 )
> -    {
> -        pod_lock(p2m);
> -        p2m->pod.entry_count += 1UL << order;
> -        p2m->pod.entry_count -= pod_count;
> -        BUG_ON(p2m->pod.entry_count < 0);
> -        pod_unlock(p2m);
> -
>          ioreq_request_mapcache_invalidate(d);
> -    }
>      else if ( order )
>      {
>          /*
> @@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
>                 d, gfn_l, order, rc);
>          domain_crash(d);
>      }
> +    else if ( !pod_count )
> +    {
> +        pod_lock(p2m);
> +        BUG_ON(!p2m->pod.entry_count);
> +        --p2m->pod.entry_count;
> +        pod_unlock(p2m);
> +    }
>  
>  out:
>      gfn_unlock(p2m, gfn, order);
>
>



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] x86/PoD: move increment of entry count
  2021-12-01 11:27   ` Andrew Cooper
@ 2021-12-01 11:50     ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2021-12-01 11:50 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap, xen-devel

On 01.12.2021 12:27, Andrew Cooper wrote:
> On 01/12/2021 11:02, Jan Beulich wrote:
>> When not holding the PoD lock across the entire region covering P2M
>> update and stats update, the entry count should indicate too large a
>> value in preference to a too small one, to avoid functions bailing early
>> when they find the count is zero. Hence increments should happen ahead
>> of P2M updates, while decrements should happen only after. Deal with the
>> one place where this hasn't been the case yet.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/arch/x86/mm/p2m-pod.c
>> +++ b/xen/arch/x86/mm/p2m-pod.c
>> @@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
>>          }
>>      }
>>  
>> +    pod_lock(p2m);
>> +    p2m->pod.entry_count += (1UL << order) - pod_count;
>> +    pod_unlock(p2m);
>> +
>>      /* Now, actually do the two-way mapping */
>>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>>                         p2m_populate_on_demand, p2m->default_access);
>>      if ( rc == 0 )
>> -    {
>> -        pod_lock(p2m);
>> -        p2m->pod.entry_count += 1UL << order;
>> -        p2m->pod.entry_count -= pod_count;
>> -        BUG_ON(p2m->pod.entry_count < 0);
>> -        pod_unlock(p2m);
>> -
>>          ioreq_request_mapcache_invalidate(d);
>> -    }
>>      else if ( order )
>>      {
>>          /*
>> @@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
>>                 d, gfn_l, order, rc);
>>          domain_crash(d);
>>      }
>> +    else if ( !pod_count )
>> +    {
>> +        pod_lock(p2m);
>> +        BUG_ON(!p2m->pod.entry_count);
>> +        --p2m->pod.entry_count;
>> +        pod_unlock(p2m);
>> +    }
>>  
>>  out:
>>      gfn_unlock(p2m, gfn, order);
> 
> This email appears to contain the same patch twice, presumably split at
> this point.

Urgh - no idea how this has happened.

> Which one should be reviewed?

Just everything up from here. Or let me simply resend.

Jan



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] x86/PoD: move increment of entry count
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
  2021-12-01 11:01 ` [PATCH 1/2] x86/mm: don't open-code p2m_is_pod() Jan Beulich
  2021-12-01 11:02 ` [PATCH 2/2] x86/PoD: move increment of entry count Jan Beulich
@ 2021-12-01 11:52 ` Jan Beulich
  2021-12-01 15:40   ` Andrew Cooper
  2022-01-04 10:57 ` [PATCH v2] " Jan Beulich
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-12-01 11:52 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count should indicate too large a
value in preference to a too small one, to avoid functions bailing early
when they find the count is zero. Hence increments should happen ahead
of P2M updates, while decrements should happen only after. Deal with the
one place where this hasn't been the case yet.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Resending due to the original submission having had the same contents
twice.

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    pod_lock(p2m);
+    p2m->pod.entry_count += (1UL << order) - pod_count;
+    pod_unlock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
-    {
-        pod_lock(p2m);
-        p2m->pod.entry_count += 1UL << order;
-        p2m->pod.entry_count -= pod_count;
-        BUG_ON(p2m->pod.entry_count < 0);
-        pod_unlock(p2m);
-
         ioreq_request_mapcache_invalidate(d);
-    }
     else if ( order )
     {
         /*
@@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
                d, gfn_l, order, rc);
         domain_crash(d);
     }
+    else if ( !pod_count )
+    {
+        pod_lock(p2m);
+        BUG_ON(!p2m->pod.entry_count);
+        --p2m->pod.entry_count;
+        pod_unlock(p2m);
+    }
 
 out:
     gfn_unlock(p2m, gfn, order);



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] x86/PoD: move increment of entry count
  2021-12-01 11:52 ` Jan Beulich
@ 2021-12-01 15:40   ` Andrew Cooper
  0 siblings, 0 replies; 24+ messages in thread
From: Andrew Cooper @ 2021-12-01 15:40 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

On 01/12/2021 11:52, Jan Beulich wrote:
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -1345,19 +1345,15 @@ mark_populate_on_demand(struct domain *d
>          }
>      }
>  
> +    pod_lock(p2m);
> +    p2m->pod.entry_count += (1UL << order) - pod_count;
> +    pod_unlock(p2m);
> +
>      /* Now, actually do the two-way mapping */
>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>                         p2m_populate_on_demand, p2m->default_access);
>      if ( rc == 0 )
> -    {
> -        pod_lock(p2m);
> -        p2m->pod.entry_count += 1UL << order;
> -        p2m->pod.entry_count -= pod_count;
> -        BUG_ON(p2m->pod.entry_count < 0);
> -        pod_unlock(p2m);
> -
>          ioreq_request_mapcache_invalidate(d);
> -    }
>      else if ( order )
>      {
>          /*
> @@ -1369,6 +1365,13 @@ mark_populate_on_demand(struct domain *d
>                 d, gfn_l, order, rc);
>          domain_crash(d);
>      }
> +    else if ( !pod_count )
> +    {
> +        pod_lock(p2m);
> +        BUG_ON(!p2m->pod.entry_count);
> +        --p2m->pod.entry_count;
> +        pod_unlock(p2m);
> +    }

This is very confusing logic to follow at the best of times.  Given that
the p2m is already locked, what's wrong with holding the pod_lock()
across p2m_set_entry()?

At the very least, this needs some comments helping to explain why these
two hunks are undo's of each other, given how differently they're written.

~Andrew


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] x86/mm: don't open-code p2m_is_pod()
  2021-12-01 11:01 ` [PATCH 1/2] x86/mm: don't open-code p2m_is_pod() Jan Beulich
  2021-12-01 11:22   ` Andrew Cooper
@ 2021-12-02 19:10   ` Tim Deegan
  1 sibling, 0 replies; 24+ messages in thread
From: Tim Deegan @ 2021-12-02 19:10 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné,
	George Dunlap, Kevin Tian, Jun Nakajima

At 12:01 +0100 on 01 Dec (1638360084), Jan Beulich wrote:
> Replace all comparisons against p2m_populate_on_demand (outside of
> switch() statements) with the designated predicate.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Tim Deegan <tim@xen.org>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2] x86/PoD: move increment of entry count
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
                   ` (2 preceding siblings ...)
  2021-12-01 11:52 ` Jan Beulich
@ 2022-01-04 10:57 ` Jan Beulich
  2022-01-27 15:04   ` Ping: " Jan Beulich
  2024-03-11 16:04   ` George Dunlap
  2024-03-12 15:22 ` [PATCH v3] x86/PoD: tie together P2M update and " Jan Beulich
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 24+ messages in thread
From: Jan Beulich @ 2022-01-04 10:57 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count should indicate too large a
value in preference to a too small one, to avoid functions bailing early
when they find the count is zero. Hence increments should happen ahead
of P2M updates, while decrements should happen only after. Deal with the
one place where this hasn't been the case yet.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Add comments.
---
While it might be possible to hold the PoD lock over the entire
operation, I didn't want to chance introducing a lock order violation on
a perhaps rarely taken code path.

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1342,19 +1342,22 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    /*
+     * Without holding the PoD lock across the entire operation, bump the
+     * entry count up front assuming success of p2m_set_entry(), undoing the
+     * bump as necessary upon failure.  Bumping only upon success would risk
+     * code elsewhere observing entry count being zero despite there actually
+     * still being PoD entries.
+     */
+    pod_lock(p2m);
+    p2m->pod.entry_count += (1UL << order) - pod_count;
+    pod_unlock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
-    {
-        pod_lock(p2m);
-        p2m->pod.entry_count += 1UL << order;
-        p2m->pod.entry_count -= pod_count;
-        BUG_ON(p2m->pod.entry_count < 0);
-        pod_unlock(p2m);
-
         ioreq_request_mapcache_invalidate(d);
-    }
     else if ( order )
     {
         /*
@@ -1366,6 +1369,14 @@ mark_populate_on_demand(struct domain *d
                d, gfn_l, order, rc);
         domain_crash(d);
     }
+    else if ( !pod_count )
+    {
+        /* Undo earlier increment; see comment above. */
+        pod_lock(p2m);
+        BUG_ON(!p2m->pod.entry_count);
+        --p2m->pod.entry_count;
+        pod_unlock(p2m);
+    }
 
 out:
     gfn_unlock(p2m, gfn, order);



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Ping: [PATCH v2] x86/PoD: move increment of entry count
  2022-01-04 10:57 ` [PATCH v2] " Jan Beulich
@ 2022-01-27 15:04   ` Jan Beulich
  2022-02-24 13:04     ` Ping²: " Jan Beulich
  2024-03-11 16:04   ` George Dunlap
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2022-01-27 15:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Roger Pau Monné, George Dunlap, xen-devel

On 04.01.2022 11:57, Jan Beulich wrote:
> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count should indicate too large a
> value in preference to a too small one, to avoid functions bailing early
> when they find the count is zero. Hence increments should happen ahead
> of P2M updates, while decrements should happen only after. Deal with the
> one place where this hasn't been the case yet.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Add comments.

Ping?

Jan

> ---
> While it might be possible to hold the PoD lock over the entire
> operation, I didn't want to chance introducing a lock order violation on
> a perhaps rarely taken code path.
> 
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -1342,19 +1342,22 @@ mark_populate_on_demand(struct domain *d
>          }
>      }
>  
> +    /*
> +     * Without holding the PoD lock across the entire operation, bump the
> +     * entry count up front assuming success of p2m_set_entry(), undoing the
> +     * bump as necessary upon failure.  Bumping only upon success would risk
> +     * code elsewhere observing entry count being zero despite there actually
> +     * still being PoD entries.
> +     */
> +    pod_lock(p2m);
> +    p2m->pod.entry_count += (1UL << order) - pod_count;
> +    pod_unlock(p2m);
> +
>      /* Now, actually do the two-way mapping */
>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>                         p2m_populate_on_demand, p2m->default_access);
>      if ( rc == 0 )
> -    {
> -        pod_lock(p2m);
> -        p2m->pod.entry_count += 1UL << order;
> -        p2m->pod.entry_count -= pod_count;
> -        BUG_ON(p2m->pod.entry_count < 0);
> -        pod_unlock(p2m);
> -
>          ioreq_request_mapcache_invalidate(d);
> -    }
>      else if ( order )
>      {
>          /*
> @@ -1366,6 +1369,14 @@ mark_populate_on_demand(struct domain *d
>                 d, gfn_l, order, rc);
>          domain_crash(d);
>      }
> +    else if ( !pod_count )
> +    {
> +        /* Undo earlier increment; see comment above. */
> +        pod_lock(p2m);
> +        BUG_ON(!p2m->pod.entry_count);
> +        --p2m->pod.entry_count;
> +        pod_unlock(p2m);
> +    }
>  
>  out:
>      gfn_unlock(p2m, gfn, order);
> 
> 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Ping²: [PATCH v2] x86/PoD: move increment of entry count
  2022-01-27 15:04   ` Ping: " Jan Beulich
@ 2022-02-24 13:04     ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2022-02-24 13:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Roger Pau Monné, George Dunlap, xen-devel

On 27.01.2022 16:04, Jan Beulich wrote:
> On 04.01.2022 11:57, Jan Beulich wrote:
>> When not holding the PoD lock across the entire region covering P2M
>> update and stats update, the entry count should indicate too large a
>> value in preference to a too small one, to avoid functions bailing early
>> when they find the count is zero. Hence increments should happen ahead
>> of P2M updates, while decrements should happen only after. Deal with the
>> one place where this hasn't been the case yet.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v2: Add comments.
> 
> Ping?

Similarly here: Another 4 weeks have passed ...

Thanks for feedback either way, Jan

>> ---
>> While it might be possible to hold the PoD lock over the entire
>> operation, I didn't want to chance introducing a lock order violation on
>> a perhaps rarely taken code path.
>>
>> --- a/xen/arch/x86/mm/p2m-pod.c
>> +++ b/xen/arch/x86/mm/p2m-pod.c
>> @@ -1342,19 +1342,22 @@ mark_populate_on_demand(struct domain *d
>>          }
>>      }
>>  
>> +    /*
>> +     * Without holding the PoD lock across the entire operation, bump the
>> +     * entry count up front assuming success of p2m_set_entry(), undoing the
>> +     * bump as necessary upon failure.  Bumping only upon success would risk
>> +     * code elsewhere observing entry count being zero despite there actually
>> +     * still being PoD entries.
>> +     */
>> +    pod_lock(p2m);
>> +    p2m->pod.entry_count += (1UL << order) - pod_count;
>> +    pod_unlock(p2m);
>> +
>>      /* Now, actually do the two-way mapping */
>>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>>                         p2m_populate_on_demand, p2m->default_access);
>>      if ( rc == 0 )
>> -    {
>> -        pod_lock(p2m);
>> -        p2m->pod.entry_count += 1UL << order;
>> -        p2m->pod.entry_count -= pod_count;
>> -        BUG_ON(p2m->pod.entry_count < 0);
>> -        pod_unlock(p2m);
>> -
>>          ioreq_request_mapcache_invalidate(d);
>> -    }
>>      else if ( order )
>>      {
>>          /*
>> @@ -1366,6 +1369,14 @@ mark_populate_on_demand(struct domain *d
>>                 d, gfn_l, order, rc);
>>          domain_crash(d);
>>      }
>> +    else if ( !pod_count )
>> +    {
>> +        /* Undo earlier increment; see comment above. */
>> +        pod_lock(p2m);
>> +        BUG_ON(!p2m->pod.entry_count);
>> +        --p2m->pod.entry_count;
>> +        pod_unlock(p2m);
>> +    }
>>  
>>  out:
>>      gfn_unlock(p2m, gfn, order);
>>
>>
> 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] x86/PoD: move increment of entry count
  2022-01-04 10:57 ` [PATCH v2] " Jan Beulich
  2022-01-27 15:04   ` Ping: " Jan Beulich
@ 2024-03-11 16:04   ` George Dunlap
  2024-03-12 13:17     ` Jan Beulich
  1 sibling, 1 reply; 24+ messages in thread
From: George Dunlap @ 2024-03-11 16:04 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

[-- Attachment #1: Type: text/plain, Size: 1144 bytes --]

On Tue, Jan 4, 2022 at 10:58 AM Jan Beulich <jbeulich@suse.com> wrote:

> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count should indicate too large a
> value in preference to a too small one, to avoid functions bailing early
> when they find the count is zero. Hence increments should happen ahead
> of P2M updates, while decrements should happen only after. Deal with the
> one place where this hasn't been the case yet.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Add comments.
> ---
> While it might be possible to hold the PoD lock over the entire
> operation, I didn't want to chance introducing a lock order violation on
> a perhaps rarely taken code path.
>

[No idea how I missed this 2 years ago, sorry for the delay]

Actually I think just holding the lock is probably the right thing to do.
We already grab gfn_lock() over the entire operation, and p2m_set_entry()
ASSERTs gfn_locked_by_me(); and all we have to do to trigger the check is
boot any guest in PoD mode at all; surely we have osstest tests for that?

 -George

[-- Attachment #2: Type: text/html, Size: 1573 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] x86/PoD: move increment of entry count
  2024-03-11 16:04   ` George Dunlap
@ 2024-03-12 13:17     ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2024-03-12 13:17 UTC (permalink / raw)
  To: George Dunlap
  Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

On 11.03.2024 17:04, George Dunlap wrote:
> On Tue, Jan 4, 2022 at 10:58 AM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> When not holding the PoD lock across the entire region covering P2M
>> update and stats update, the entry count should indicate too large a
>> value in preference to a too small one, to avoid functions bailing early
>> when they find the count is zero. Hence increments should happen ahead
>> of P2M updates, while decrements should happen only after. Deal with the
>> one place where this hasn't been the case yet.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v2: Add comments.
>> ---
>> While it might be possible to hold the PoD lock over the entire
>> operation, I didn't want to chance introducing a lock order violation on
>> a perhaps rarely taken code path.
>>
> 
> [No idea how I missed this 2 years ago, sorry for the delay]
> 
> Actually I think just holding the lock is probably the right thing to do.
> We already grab gfn_lock() over the entire operation, and p2m_set_entry()
> ASSERTs gfn_locked_by_me(); and all we have to do to trigger the check is
> boot any guest in PoD mode at all; surely we have osstest tests for that?

The gfn (aka p2m) lock isn't of interest here. It's the locks whose lock
level is between p2m and pod which are. Then again there are obviously
other calls to p2m_set_entry() with the PoD lock held. So if there was a
problem (e.g. with ept_set_entry() calling p2m_altp2m_propagate_change()),
I wouldn't make things meaningfully worse by holding the PoD lock for
longer here. So yes, let me switch to that and then hope ...

Jan


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3] x86/PoD: tie together P2M update and increment of entry count
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
                   ` (3 preceding siblings ...)
  2022-01-04 10:57 ` [PATCH v2] " Jan Beulich
@ 2024-03-12 15:22 ` Jan Beulich
  2024-03-13 10:58   ` George Dunlap
  2024-03-13 14:00 ` [PATCH v4] " Jan Beulich
  2024-03-19 13:22 ` [PATCH v5] " Jan Beulich
  6 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-03-12 15:22 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count - if to be incorrect at all -
should indicate too large a value in preference to a too small one, to
avoid functions bailing early when they find the count is zero. However,
instead of moving the increment ahead (and adjust back upon failure),
extend the PoD-locked region.

Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
The locked region could be shrunk again, by having multiple unlock
calls. But I think both ioreq_request_mapcache_invalidate() and
domain_crash() are fair enough to call with the lock still held?
---
v3: Extend locked region instead. Add Fixes: tag.
v2: Add comments.

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1348,16 +1348,22 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    /*
+     * P2M update and stats increment need to collectively be under PoD lock,
+     * to prevent code elsewhere observing PoD entry count being zero despite
+     * there actually still being PoD entries (created by the p2m_set_entry()
+     * invocation below).
+     */
+    pod_lock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
     {
-        pod_lock(p2m);
         p2m->pod.entry_count += 1UL << order;
         p2m->pod.entry_count -= pod_count;
         BUG_ON(p2m->pod.entry_count < 0);
-        pod_unlock(p2m);
 
         ioreq_request_mapcache_invalidate(d);
     }
@@ -1373,6 +1379,8 @@ mark_populate_on_demand(struct domain *d
         domain_crash(d);
     }
 
+    pod_unlock(p2m);
+
 out:
     gfn_unlock(p2m, gfn, order);
 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3] x86/PoD: tie together P2M update and increment of entry count
  2024-03-12 15:22 ` [PATCH v3] x86/PoD: tie together P2M update and " Jan Beulich
@ 2024-03-13 10:58   ` George Dunlap
  2024-03-13 12:19     ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: George Dunlap @ 2024-03-13 10:58 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On Tue, Mar 12, 2024 at 3:22 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count - if to be incorrect at all -
> should indicate too large a value in preference to a too small one, to
> avoid functions bailing early when they find the count is zero. However,
> instead of moving the increment ahead (and adjust back upon failure),
> extend the PoD-locked region.
>
> Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> The locked region could be shrunk again, by having multiple unlock
> calls. But I think both ioreq_request_mapcache_invalidate() and
> domain_crash() are fair enough to call with the lock still held?
> ---
> v3: Extend locked region instead. Add Fixes: tag.
> v2: Add comments.
>
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -1348,16 +1348,22 @@ mark_populate_on_demand(struct domain *d
>          }
>      }
>
> +    /*
> +     * P2M update and stats increment need to collectively be under PoD lock,
> +     * to prevent code elsewhere observing PoD entry count being zero despite
> +     * there actually still being PoD entries (created by the p2m_set_entry()
> +     * invocation below).
> +     */
> +    pod_lock(p2m);
> +
>      /* Now, actually do the two-way mapping */
>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>                         p2m_populate_on_demand, p2m->default_access);
>      if ( rc == 0 )
>      {
> -        pod_lock(p2m);
>          p2m->pod.entry_count += 1UL << order;
>          p2m->pod.entry_count -= pod_count;
>          BUG_ON(p2m->pod.entry_count < 0);
> -        pod_unlock(p2m);
>
>          ioreq_request_mapcache_invalidate(d);
>      }
> @@ -1373,6 +1379,8 @@ mark_populate_on_demand(struct domain *d
>          domain_crash(d);
>      }
>
> +    pod_unlock(p2m);

We're confident that neither domain_crash() nor
ioreq_request_mapcache_invalidate() will grab any of the p2m locks?

If so,

Reviewed-by: George Dunlap <george.dunlap@cloud.com>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3] x86/PoD: tie together P2M update and increment of entry count
  2024-03-13 10:58   ` George Dunlap
@ 2024-03-13 12:19     ` Jan Beulich
  2024-03-13 12:25       ` George Dunlap
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-03-13 12:19 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On 13.03.2024 11:58, George Dunlap wrote:
> On Tue, Mar 12, 2024 at 3:22 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> When not holding the PoD lock across the entire region covering P2M
>> update and stats update, the entry count - if to be incorrect at all -
>> should indicate too large a value in preference to a too small one, to
>> avoid functions bailing early when they find the count is zero. However,
>> instead of moving the increment ahead (and adjust back upon failure),
>> extend the PoD-locked region.
>>
>> Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> The locked region could be shrunk again, by having multiple unlock
>> calls. But I think both ioreq_request_mapcache_invalidate() and
>> domain_crash() are fair enough to call with the lock still held?
>> ---
>> v3: Extend locked region instead. Add Fixes: tag.
>> v2: Add comments.
>>
>> --- a/xen/arch/x86/mm/p2m-pod.c
>> +++ b/xen/arch/x86/mm/p2m-pod.c
>> @@ -1348,16 +1348,22 @@ mark_populate_on_demand(struct domain *d
>>          }
>>      }
>>
>> +    /*
>> +     * P2M update and stats increment need to collectively be under PoD lock,
>> +     * to prevent code elsewhere observing PoD entry count being zero despite
>> +     * there actually still being PoD entries (created by the p2m_set_entry()
>> +     * invocation below).
>> +     */
>> +    pod_lock(p2m);
>> +
>>      /* Now, actually do the two-way mapping */
>>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
>>                         p2m_populate_on_demand, p2m->default_access);
>>      if ( rc == 0 )
>>      {
>> -        pod_lock(p2m);
>>          p2m->pod.entry_count += 1UL << order;
>>          p2m->pod.entry_count -= pod_count;
>>          BUG_ON(p2m->pod.entry_count < 0);
>> -        pod_unlock(p2m);
>>
>>          ioreq_request_mapcache_invalidate(d);
>>      }
>> @@ -1373,6 +1379,8 @@ mark_populate_on_demand(struct domain *d
>>          domain_crash(d);
>>      }
>>
>> +    pod_unlock(p2m);
> 
> We're confident that neither domain_crash() nor
> ioreq_request_mapcache_invalidate() will grab any of the p2m locks?

There's no doubt about ioreq_request_mapcache_invalidate(). domain_crash(),
otoh, invokes show_execution_state(), which in principle would be nice to
dump the guest stack among other things. My patch doing so was reverted, so
right now there's no issue there. Plus any attempt to do so would need to
be careful anyway regarding locks. But as you see it is not a clear cut no,
so ...

> If so,
> 
> Reviewed-by: George Dunlap <george.dunlap@cloud.com>

... rather than taking this (thanks), maybe I indeed better follow the
alternative outlined in the post-commit-message remark?

Jan


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3] x86/PoD: tie together P2M update and increment of entry count
  2024-03-13 12:19     ` Jan Beulich
@ 2024-03-13 12:25       ` George Dunlap
  2024-03-13 12:25         ` George Dunlap
  0 siblings, 1 reply; 24+ messages in thread
From: George Dunlap @ 2024-03-13 12:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On Wed, Mar 13, 2024 at 12:19 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 13.03.2024 11:58, George Dunlap wrote:
> > On Tue, Mar 12, 2024 at 3:22 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> When not holding the PoD lock across the entire region covering P2M
> >> update and stats update, the entry count - if to be incorrect at all -
> >> should indicate too large a value in preference to a too small one, to
> >> avoid functions bailing early when they find the count is zero. However,
> >> instead of moving the increment ahead (and adjust back upon failure),
> >> extend the PoD-locked region.
> >>
> >> Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> The locked region could be shrunk again, by having multiple unlock
> >> calls. But I think both ioreq_request_mapcache_invalidate() and
> >> domain_crash() are fair enough to call with the lock still held?
> >> ---
> >> v3: Extend locked region instead. Add Fixes: tag.
> >> v2: Add comments.
> >>
> >> --- a/xen/arch/x86/mm/p2m-pod.c
> >> +++ b/xen/arch/x86/mm/p2m-pod.c
> >> @@ -1348,16 +1348,22 @@ mark_populate_on_demand(struct domain *d
> >>          }
> >>      }
> >>
> >> +    /*
> >> +     * P2M update and stats increment need to collectively be under PoD lock,
> >> +     * to prevent code elsewhere observing PoD entry count being zero despite
> >> +     * there actually still being PoD entries (created by the p2m_set_entry()
> >> +     * invocation below).
> >> +     */
> >> +    pod_lock(p2m);
> >> +
> >>      /* Now, actually do the two-way mapping */
> >>      rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
> >>                         p2m_populate_on_demand, p2m->default_access);
> >>      if ( rc == 0 )
> >>      {
> >> -        pod_lock(p2m);
> >>          p2m->pod.entry_count += 1UL << order;
> >>          p2m->pod.entry_count -= pod_count;
> >>          BUG_ON(p2m->pod.entry_count < 0);
> >> -        pod_unlock(p2m);
> >>
> >>          ioreq_request_mapcache_invalidate(d);
> >>      }
> >> @@ -1373,6 +1379,8 @@ mark_populate_on_demand(struct domain *d
> >>          domain_crash(d);
> >>      }
> >>
> >> +    pod_unlock(p2m);
> >
> > We're confident that neither domain_crash() nor
> > ioreq_request_mapcache_invalidate() will grab any of the p2m locks?
>
> There's no doubt about ioreq_request_mapcache_invalidate(). domain_crash(),
> otoh, invokes show_execution_state(), which in principle would be nice to
> dump the guest stack among other things. My patch doing so was reverted, so
> right now there's no issue there. Plus any attempt to do so would need to
> be careful anyway regarding locks. But as you see it is not a clear cut no,
> so ...
>
> > If so,
> >
> > Reviewed-by: George Dunlap <george.dunlap@cloud.com>
>
> ... rather than taking this (thanks), maybe I indeed better follow the
> alternative outlined in the post-commit-message remark?

I keep missing your post-commit-message remarks due to the way I'm
applying your series.  Yes, that had occurred to me as well -- I don't
think this is a hot path, and I do think it would be good to avoid
laying a trap for future people wanting to change domain_crash(); in
particular as that would change a domain crash into either a host
crash or a potential deadlock.

I think I would go with multiple if statements, rather than multiple
unlock calls though.

 -George


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3] x86/PoD: tie together P2M update and increment of entry count
  2024-03-13 12:25       ` George Dunlap
@ 2024-03-13 12:25         ` George Dunlap
  0 siblings, 0 replies; 24+ messages in thread
From: George Dunlap @ 2024-03-13 12:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On Wed, Mar 13, 2024 at 12:25 PM George Dunlap <george.dunlap@cloud.com> wrote:
> I keep missing your post-commit-message remarks due to the way I'm
> applying your series.

Er, just to be clear, this is a problem with my workflow, not with
your patches...

 -George


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v4] x86/PoD: tie together P2M update and increment of entry count
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
                   ` (4 preceding siblings ...)
  2024-03-12 15:22 ` [PATCH v3] x86/PoD: tie together P2M update and " Jan Beulich
@ 2024-03-13 14:00 ` Jan Beulich
  2024-03-13 16:31   ` George Dunlap
  2024-03-19 13:22 ` [PATCH v5] " Jan Beulich
  6 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-03-13 14:00 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count - if to be incorrect at all -
should indicate too large a value in preference to a too small one, to
avoid functions bailing early when they find the count is zero. However,
instead of moving the increment ahead (and adjust back upon failure),
extend the PoD-locked region.

Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Shrink locked region a little again, where possible.
v3: Extend locked region instead. Add Fixes: tag.
v2: Add comments.

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1348,12 +1348,19 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    /*
+     * P2M update and stats increment need to collectively be under PoD lock,
+     * to prevent code elsewhere observing PoD entry count being zero despite
+     * there actually still being PoD entries (created by the p2m_set_entry()
+     * invocation below).
+     */
+    pod_lock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
     {
-        pod_lock(p2m);
         p2m->pod.entry_count += 1UL << order;
         p2m->pod.entry_count -= pod_count;
         BUG_ON(p2m->pod.entry_count < 0);
@@ -1363,6 +1370,8 @@ mark_populate_on_demand(struct domain *d
     }
     else if ( order )
     {
+        pod_unlock(p2m);
+
         /*
          * If this failed, we can't tell how much of the range was changed.
          * Best to crash the domain.
@@ -1372,6 +1381,8 @@ mark_populate_on_demand(struct domain *d
                d, gfn_l, order, rc);
         domain_crash(d);
     }
+    else
+        pod_unlock(p2m);
 
 out:
     gfn_unlock(p2m, gfn, order);


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v4] x86/PoD: tie together P2M update and increment of entry count
  2024-03-13 14:00 ` [PATCH v4] " Jan Beulich
@ 2024-03-13 16:31   ` George Dunlap
  2024-03-14  6:59     ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: George Dunlap @ 2024-03-13 16:31 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On Wed, Mar 13, 2024 at 2:00 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count - if to be incorrect at all -
> should indicate too large a value in preference to a too small one, to
> avoid functions bailing early when they find the count is zero. However,
> instead of moving the increment ahead (and adjust back upon failure),
> extend the PoD-locked region.
>
> Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Would you mind commenting on why you went with multiple unlocks,
rather than multiple if statements?

e.g.,

```
rc = p2m_set_entry(...);

/* Do the pod entry adjustment while holding the lock on success */
if ( rc == 0 ) {
 /* adjust pod entries */
}

pod_unlock(p2m);

/* Do the rest of the clean-up and error handling */
if (rc == 0 ) {
```

Just right now the multiple unlocks makes me worry that we may forget
one at some point.

 -George


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v4] x86/PoD: tie together P2M update and increment of entry count
  2024-03-13 16:31   ` George Dunlap
@ 2024-03-14  6:59     ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2024-03-14  6:59 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On 13.03.2024 17:31, George Dunlap wrote:
> On Wed, Mar 13, 2024 at 2:00 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> When not holding the PoD lock across the entire region covering P2M
>> update and stats update, the entry count - if to be incorrect at all -
>> should indicate too large a value in preference to a too small one, to
>> avoid functions bailing early when they find the count is zero. However,
>> instead of moving the increment ahead (and adjust back upon failure),
>> extend the PoD-locked region.
>>
>> Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Would you mind commenting on why you went with multiple unlocks,
> rather than multiple if statements?

Simply because what I did I view as more logical a code structure
than ...

> e.g.,
> 
> ```
> rc = p2m_set_entry(...);
> 
> /* Do the pod entry adjustment while holding the lock on success */
> if ( rc == 0 ) {
>  /* adjust pod entries */
> }
> 
> pod_unlock(p2m);
> 
> /* Do the rest of the clean-up and error handling */
> if (rc == 0 ) {

... this, ...

> Just right now the multiple unlocks makes me worry that we may forget
> one at some point.

... despite this possible concern. But well, if going the other route
is what it takes to finally get this in, so be it.

Jan


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v5] x86/PoD: tie together P2M update and increment of entry count
  2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
                   ` (5 preceding siblings ...)
  2024-03-13 14:00 ` [PATCH v4] " Jan Beulich
@ 2024-03-19 13:22 ` Jan Beulich
  2024-03-20  9:50   ` George Dunlap
  6 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2024-03-19 13:22 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, George Dunlap

When not holding the PoD lock across the entire region covering P2M
update and stats update, the entry count - if to be incorrect at all -
should indicate too large a value in preference to a too small one, to
avoid functions bailing early when they find the count is zero. However,
instead of moving the increment ahead (and adjust back upon failure),
extend the PoD-locked region.

Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Re-arrange conditionals to have just a single unlock.
v4: Shrink locked region a little again, where possible.
v3: Extend locked region instead. Add Fixes: tag.
v2: Add comments.

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1348,19 +1348,28 @@ mark_populate_on_demand(struct domain *d
         }
     }
 
+    /*
+     * P2M update and stats increment need to collectively be under PoD lock,
+     * to prevent code elsewhere observing PoD entry count being zero despite
+     * there actually still being PoD entries (created by the p2m_set_entry()
+     * invocation below).
+     */
+    pod_lock(p2m);
+
     /* Now, actually do the two-way mapping */
     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
                        p2m_populate_on_demand, p2m->default_access);
     if ( rc == 0 )
     {
-        pod_lock(p2m);
         p2m->pod.entry_count += 1UL << order;
         p2m->pod.entry_count -= pod_count;
         BUG_ON(p2m->pod.entry_count < 0);
-        pod_unlock(p2m);
+    }
+
+    pod_unlock(p2m);
 
+    if ( rc == 0 )
         ioreq_request_mapcache_invalidate(d);
-    }
     else if ( order )
     {
         /*


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v5] x86/PoD: tie together P2M update and increment of entry count
  2024-03-19 13:22 ` [PATCH v5] " Jan Beulich
@ 2024-03-20  9:50   ` George Dunlap
  0 siblings, 0 replies; 24+ messages in thread
From: George Dunlap @ 2024-03-20  9:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper, Wei Liu, Roger Pau Monné

On Tue, Mar 19, 2024 at 1:22 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> When not holding the PoD lock across the entire region covering P2M
> update and stats update, the entry count - if to be incorrect at all -
> should indicate too large a value in preference to a too small one, to
> avoid functions bailing early when they find the count is zero. However,
> instead of moving the increment ahead (and adjust back upon failure),
> extend the PoD-locked region.
>
> Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Oh!  Thanks for doing this -- I hadn't responded because I wasn't sure
whether I was bikeshedding, and then it sort of fell off my radar.  At
any rate:

Reviewed-by: George Dunlap <george.dunlap@cloud.com>


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2024-03-20  9:51 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-01 11:00 [PATCH 0/2] x86/mm: XSA-389 follow-up Jan Beulich
2021-12-01 11:01 ` [PATCH 1/2] x86/mm: don't open-code p2m_is_pod() Jan Beulich
2021-12-01 11:22   ` Andrew Cooper
2021-12-02 19:10   ` Tim Deegan
2021-12-01 11:02 ` [PATCH 2/2] x86/PoD: move increment of entry count Jan Beulich
2021-12-01 11:27   ` Andrew Cooper
2021-12-01 11:50     ` Jan Beulich
2021-12-01 11:52 ` Jan Beulich
2021-12-01 15:40   ` Andrew Cooper
2022-01-04 10:57 ` [PATCH v2] " Jan Beulich
2022-01-27 15:04   ` Ping: " Jan Beulich
2022-02-24 13:04     ` Ping²: " Jan Beulich
2024-03-11 16:04   ` George Dunlap
2024-03-12 13:17     ` Jan Beulich
2024-03-12 15:22 ` [PATCH v3] x86/PoD: tie together P2M update and " Jan Beulich
2024-03-13 10:58   ` George Dunlap
2024-03-13 12:19     ` Jan Beulich
2024-03-13 12:25       ` George Dunlap
2024-03-13 12:25         ` George Dunlap
2024-03-13 14:00 ` [PATCH v4] " Jan Beulich
2024-03-13 16:31   ` George Dunlap
2024-03-14  6:59     ` Jan Beulich
2024-03-19 13:22 ` [PATCH v5] " Jan Beulich
2024-03-20  9:50   ` George Dunlap

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.