xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
@ 2020-07-22 10:18 Andrew Cooper
  2020-07-23 10:07 ` Roger Pau Monné
  2020-07-23 10:37 ` Jan Beulich
  0 siblings, 2 replies; 7+ messages in thread
From: Andrew Cooper @ 2020-07-22 10:18 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Wei Liu, Jan Beulich, Roger Pau Monné

... rather than from the default clauses of the PV and HVM MSR handlers.

This means that we no longer take the vmce lock for any unknown MSR, and
accesses to architectural MCE banks outside of the subset implemented for the
guest no longer fall further through the unknown MSR path.

With the vmce calls removed, the hvm alternative_call()'s expression can be
simplified substantially.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/hvm/hvm.c         | 16 ++--------------
 xen/arch/x86/msr.c             | 16 ++++++++++++++++
 xen/arch/x86/pv/emul-priv-op.c | 15 ---------------
 3 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 5bb47583b3..a9d1685549 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3560,13 +3560,7 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
          break;
 
     default:
-        if ( (ret = vmce_rdmsr(msr, msr_content)) < 0 )
-            goto gp_fault;
-        /* If ret == 0 then this is not an MCE MSR, see other MSRs. */
-        ret = ((ret == 0)
-               ? alternative_call(hvm_funcs.msr_read_intercept,
-                                  msr, msr_content)
-               : X86EMUL_OKAY);
+        ret = alternative_call(hvm_funcs.msr_read_intercept, msr, msr_content);
         break;
     }
 
@@ -3696,13 +3690,7 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
         break;
 
     default:
-        if ( (ret = vmce_wrmsr(msr, msr_content)) < 0 )
-            goto gp_fault;
-        /* If ret == 0 then this is not an MCE MSR, see other MSRs. */
-        ret = ((ret == 0)
-               ? alternative_call(hvm_funcs.msr_write_intercept,
-                                  msr, msr_content)
-               : X86EMUL_OKAY);
+        ret = alternative_call(hvm_funcs.msr_write_intercept, msr, msr_content);
         break;
     }
 
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 22f921cc71..ca4307e19f 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -227,6 +227,14 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
         *val = msrs->misc_features_enables.raw;
         break;
 
+    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
+    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
+    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
+    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
+        if ( vmce_rdmsr(msr, val) < 0 )
+            goto gp_fault;
+        break;
+
     case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
         if ( !is_hvm_domain(d) || v != curr )
             goto gp_fault;
@@ -436,6 +444,14 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
         break;
     }
 
+    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
+    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
+    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
+    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
+        if ( vmce_wrmsr(msr, val) < 0 )
+            goto gp_fault;
+        break;
+
     case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
         if ( !is_hvm_domain(d) || v != curr )
             goto gp_fault;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 254da2b849..f14552cb4b 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -855,8 +855,6 @@ static int read_msr(unsigned int reg, uint64_t *val,
 
     switch ( reg )
     {
-        int rc;
-
     case MSR_FS_BASE:
         if ( is_pv_32bit_domain(currd) )
             break;
@@ -955,12 +953,6 @@ static int read_msr(unsigned int reg, uint64_t *val,
         }
         /* fall through */
     default:
-        rc = vmce_rdmsr(reg, val);
-        if ( rc < 0 )
-            break;
-        if ( rc )
-            return X86EMUL_OKAY;
-        /* fall through */
     normal:
         /* Everyone can read the MSR space. */
         /* gdprintk(XENLOG_WARNING, "Domain attempted RDMSR %08x\n", reg); */
@@ -991,7 +983,6 @@ static int write_msr(unsigned int reg, uint64_t val,
     switch ( reg )
     {
         uint64_t temp;
-        int rc;
 
     case MSR_FS_BASE:
         if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
@@ -1122,12 +1113,6 @@ static int write_msr(unsigned int reg, uint64_t val,
         }
         /* fall through */
     default:
-        rc = vmce_wrmsr(reg, val);
-        if ( rc < 0 )
-            break;
-        if ( rc )
-            return X86EMUL_OKAY;
-
         if ( (rdmsr_safe(reg, temp) != 0) || (val != temp) )
     invalid:
             gdprintk(XENLOG_WARNING,
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
  2020-07-22 10:18 [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr() Andrew Cooper
@ 2020-07-23 10:07 ` Roger Pau Monné
  2020-07-23 11:00   ` Andrew Cooper
  2020-07-23 10:37 ` Jan Beulich
  1 sibling, 1 reply; 7+ messages in thread
From: Roger Pau Monné @ 2020-07-23 10:07 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Wei Liu, Jan Beulich

On Wed, Jul 22, 2020 at 11:18:09AM +0100, Andrew Cooper wrote:
> ... rather than from the default clauses of the PV and HVM MSR handlers.
> 
> This means that we no longer take the vmce lock for any unknown MSR, and
> accesses to architectural MCE banks outside of the subset implemented for the
> guest no longer fall further through the unknown MSR path.
> 
> With the vmce calls removed, the hvm alternative_call()'s expression can be
> simplified substantially.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

LGTM, I just have one question below regarding the ranges.

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Wei Liu <wl@xen.org>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/x86/hvm/hvm.c         | 16 ++--------------
>  xen/arch/x86/msr.c             | 16 ++++++++++++++++
>  xen/arch/x86/pv/emul-priv-op.c | 15 ---------------
>  3 files changed, 18 insertions(+), 29 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 5bb47583b3..a9d1685549 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3560,13 +3560,7 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
>           break;
>  
>      default:
> -        if ( (ret = vmce_rdmsr(msr, msr_content)) < 0 )
> -            goto gp_fault;
> -        /* If ret == 0 then this is not an MCE MSR, see other MSRs. */
> -        ret = ((ret == 0)
> -               ? alternative_call(hvm_funcs.msr_read_intercept,
> -                                  msr, msr_content)
> -               : X86EMUL_OKAY);
> +        ret = alternative_call(hvm_funcs.msr_read_intercept, msr, msr_content);
>          break;
>      }
>  
> @@ -3696,13 +3690,7 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
>          break;
>  
>      default:
> -        if ( (ret = vmce_wrmsr(msr, msr_content)) < 0 )
> -            goto gp_fault;
> -        /* If ret == 0 then this is not an MCE MSR, see other MSRs. */
> -        ret = ((ret == 0)
> -               ? alternative_call(hvm_funcs.msr_write_intercept,
> -                                  msr, msr_content)
> -               : X86EMUL_OKAY);
> +        ret = alternative_call(hvm_funcs.msr_write_intercept, msr, msr_content);
>          break;
>      }
>  
> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
> index 22f921cc71..ca4307e19f 100644
> --- a/xen/arch/x86/msr.c
> +++ b/xen/arch/x86/msr.c
> @@ -227,6 +227,14 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>          *val = msrs->misc_features_enables.raw;
>          break;
>  
> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */

Where do you get the ranges from 0 to 31? It seems like the count
field in the CAP register is 8 bits, which could allow for up to 256
banks?

I'm quite sure this would then overlap with other MSRs?

> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
> +        if ( vmce_rdmsr(msr, val) < 0 )
> +            goto gp_fault;
> +        break;
> +
>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>          if ( !is_hvm_domain(d) || v != curr )
>              goto gp_fault;
> @@ -436,6 +444,14 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>          break;
>      }
>  
> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
> +        if ( vmce_wrmsr(msr, val) < 0 )
> +            goto gp_fault;
> +        break;
> +
>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>          if ( !is_hvm_domain(d) || v != curr )
>              goto gp_fault;
> diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
> index 254da2b849..f14552cb4b 100644
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -855,8 +855,6 @@ static int read_msr(unsigned int reg, uint64_t *val,
>  
>      switch ( reg )
>      {
> -        int rc;
> -
>      case MSR_FS_BASE:
>          if ( is_pv_32bit_domain(currd) )
>              break;
> @@ -955,12 +953,6 @@ static int read_msr(unsigned int reg, uint64_t *val,
>          }
>          /* fall through */
>      default:
> -        rc = vmce_rdmsr(reg, val);
> -        if ( rc < 0 )
> -            break;
> -        if ( rc )
> -            return X86EMUL_OKAY;
> -        /* fall through */
>      normal:

We could remove the 'normal' label and just use the default one
instead.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
  2020-07-22 10:18 [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr() Andrew Cooper
  2020-07-23 10:07 ` Roger Pau Monné
@ 2020-07-23 10:37 ` Jan Beulich
  2020-07-23 11:17   ` Andrew Cooper
  1 sibling, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2020-07-23 10:37 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Wei Liu, Roger Pau Monné

On 22.07.2020 12:18, Andrew Cooper wrote:
> --- a/xen/arch/x86/msr.c
> +++ b/xen/arch/x86/msr.c
> @@ -227,6 +227,14 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>          *val = msrs->misc_features_enables.raw;
>          break;
>  
> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
> +        if ( vmce_rdmsr(msr, val) < 0 )
> +            goto gp_fault;
> +        break;
> +
>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>          if ( !is_hvm_domain(d) || v != curr )
>              goto gp_fault;
> @@ -436,6 +444,14 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>          break;
>      }
>  
> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
> +        if ( vmce_wrmsr(msr, val) < 0 )
> +            goto gp_fault;
> +        break;
> +
>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>          if ( !is_hvm_domain(d) || v != curr )
>              goto gp_fault;

With this the two functions also possibly returning 0 or 1 becomes
meaningless. Would you think you can make then return bool at this
occasion, or would you prefer to leave this to whenever someone
gets to clean up this resulting anomaly? (I'm fine either way, but
would prefer to not see the then meaningless tristate return values
left in place.)

Jan


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
  2020-07-23 10:07 ` Roger Pau Monné
@ 2020-07-23 11:00   ` Andrew Cooper
  2020-07-23 11:30     ` Roger Pau Monné
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2020-07-23 11:00 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: Xen-devel, Wei Liu, Jan Beulich

On 23/07/2020 11:07, Roger Pau Monné wrote:
> On Wed, Jul 22, 2020 at 11:18:09AM +0100, Andrew Cooper wrote:
>> ... rather than from the default clauses of the PV and HVM MSR handlers.
>>
>> This means that we no longer take the vmce lock for any unknown MSR, and
>> accesses to architectural MCE banks outside of the subset implemented for the
>> guest no longer fall further through the unknown MSR path.
>>
>> With the vmce calls removed, the hvm alternative_call()'s expression can be
>> simplified substantially.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> LGTM, I just have one question below regarding the ranges.
>
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
>
>> ---
>> CC: Jan Beulich <JBeulich@suse.com>
>> CC: Wei Liu <wl@xen.org>
>> CC: Roger Pau Monné <roger.pau@citrix.com>
>> ---
>>  xen/arch/x86/hvm/hvm.c         | 16 ++--------------
>>  xen/arch/x86/msr.c             | 16 ++++++++++++++++
>>  xen/arch/x86/pv/emul-priv-op.c | 15 ---------------
>>  3 files changed, 18 insertions(+), 29 deletions(-)
>>
>> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
>> index 5bb47583b3..a9d1685549 100644
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -3560,13 +3560,7 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
>>           break;
>>  
>>      default:
>> -        if ( (ret = vmce_rdmsr(msr, msr_content)) < 0 )
>> -            goto gp_fault;
>> -        /* If ret == 0 then this is not an MCE MSR, see other MSRs. */
>> -        ret = ((ret == 0)
>> -               ? alternative_call(hvm_funcs.msr_read_intercept,
>> -                                  msr, msr_content)
>> -               : X86EMUL_OKAY);
>> +        ret = alternative_call(hvm_funcs.msr_read_intercept, msr, msr_content);
>>          break;
>>      }
>>  
>> @@ -3696,13 +3690,7 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
>>          break;
>>  
>>      default:
>> -        if ( (ret = vmce_wrmsr(msr, msr_content)) < 0 )
>> -            goto gp_fault;
>> -        /* If ret == 0 then this is not an MCE MSR, see other MSRs. */
>> -        ret = ((ret == 0)
>> -               ? alternative_call(hvm_funcs.msr_write_intercept,
>> -                                  msr, msr_content)
>> -               : X86EMUL_OKAY);
>> +        ret = alternative_call(hvm_funcs.msr_write_intercept, msr, msr_content);
>>          break;
>>      }
>>  
>> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
>> index 22f921cc71..ca4307e19f 100644
>> --- a/xen/arch/x86/msr.c
>> +++ b/xen/arch/x86/msr.c
>> @@ -227,6 +227,14 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>          *val = msrs->misc_features_enables.raw;
>>          break;
>>  
>> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
>> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
>> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
> Where do you get the ranges from 0 to 31? It seems like the count
> field in the CAP register is 8 bits, which could allow for up to 256
> banks?
>
> I'm quite sure this would then overlap with other MSRs?

Irritatingly, nothing I can find actually states an upper architectural
limit.

SDM Vol4, Table 2-2 which enumerates the Architectural MSRs.

0x280 thru 0x29f are explicitly reserved MCx_CTL2, which is a limit of
32 banks.  There are gaps after this in the architectural table, but
IceLake has PRMRR_BASE_0 at 0x2a0.

The main bank of MCx_{CTL,STATUS,ADDR,MISC} start at 0x400 and are
listed in the table up to 0x473, which is a limit of 29 banks.  The
Model specific table for SandyBridge fills in the remaining 3 banks up
to MSR 0x47f, which is the previous limit of 32 banks.  (These MSRs have
package scope rather than core/thread scope, but they are still
enumerated architecturally so I'm not sure why they are in the model
specific tables.)

More importantly however, the VMX MSR range starts at 0x480, immediately
above bank 31, which puts an architectural hard limit on the number of
banks.

>> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
>> +        if ( vmce_rdmsr(msr, val) < 0 )
>> +            goto gp_fault;
>> +        break;
>> +
>>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>>          if ( !is_hvm_domain(d) || v != curr )
>>              goto gp_fault;
>> @@ -436,6 +444,14 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>>          break;
>>      }
>>  
>> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
>> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
>> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
>> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
>> +        if ( vmce_wrmsr(msr, val) < 0 )
>> +            goto gp_fault;
>> +        break;
>> +
>>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>>          if ( !is_hvm_domain(d) || v != curr )
>>              goto gp_fault;
>> diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
>> index 254da2b849..f14552cb4b 100644
>> --- a/xen/arch/x86/pv/emul-priv-op.c
>> +++ b/xen/arch/x86/pv/emul-priv-op.c
>> @@ -855,8 +855,6 @@ static int read_msr(unsigned int reg, uint64_t *val,
>>  
>>      switch ( reg )
>>      {
>> -        int rc;
>> -
>>      case MSR_FS_BASE:
>>          if ( is_pv_32bit_domain(currd) )
>>              break;
>> @@ -955,12 +953,6 @@ static int read_msr(unsigned int reg, uint64_t *val,
>>          }
>>          /* fall through */
>>      default:
>> -        rc = vmce_rdmsr(reg, val);
>> -        if ( rc < 0 )
>> -            break;
>> -        if ( rc )
>> -            return X86EMUL_OKAY;
>> -        /* fall through */
>>      normal:
> We could remove the 'normal' label and just use the default one
> instead.

You can't "goto default;" which is what a number of paths between these
two hunks do.

I do however have a plan to clean this up in due course.

~Andrew


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
  2020-07-23 10:37 ` Jan Beulich
@ 2020-07-23 11:17   ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2020-07-23 11:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Wei Liu, Roger Pau Monné

On 23/07/2020 11:37, Jan Beulich wrote:
> On 22.07.2020 12:18, Andrew Cooper wrote:
>> --- a/xen/arch/x86/msr.c
>> +++ b/xen/arch/x86/msr.c
>> @@ -227,6 +227,14 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>          *val = msrs->misc_features_enables.raw;
>>          break;
>>  
>> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
>> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
>> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
>> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
>> +        if ( vmce_rdmsr(msr, val) < 0 )
>> +            goto gp_fault;
>> +        break;
>> +
>>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>>          if ( !is_hvm_domain(d) || v != curr )
>>              goto gp_fault;
>> @@ -436,6 +444,14 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>>          break;
>>      }
>>  
>> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
>> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
>> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
>> +    case MSR_IA32_MCG_EXT_CTL:                           /* 0x4d0 */
>> +        if ( vmce_wrmsr(msr, val) < 0 )
>> +            goto gp_fault;
>> +        break;
>> +
>>      case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
>>          if ( !is_hvm_domain(d) || v != curr )
>>              goto gp_fault;
> With this the two functions also possibly returning 0 or 1 becomes
> meaningless. Would you think you can make then return bool at this
> occasion, or would you prefer to leave this to whenever someone
> gets to clean up this resulting anomaly? (I'm fine either way, but
> would prefer to not see the then meaningless tristate return values
> left in place.)

The entire internals of vmce_{wr,rd}msr() need an overhaul.

I tried switching them to use X86EMUL_* (at which point they will match
all the other subsystems we hand off MSR blocks to), but that quickly
turned into a larger mess than I have time for right now.  I've still
got the partial work so far, and will finish it at some point.

~Andrew


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
  2020-07-23 11:00   ` Andrew Cooper
@ 2020-07-23 11:30     ` Roger Pau Monné
  2020-07-23 13:27       ` Andrew Cooper
  0 siblings, 1 reply; 7+ messages in thread
From: Roger Pau Monné @ 2020-07-23 11:30 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Wei Liu, Jan Beulich

On Thu, Jul 23, 2020 at 12:00:53PM +0100, Andrew Cooper wrote:
> On 23/07/2020 11:07, Roger Pau Monné wrote:
> > On Wed, Jul 22, 2020 at 11:18:09AM +0100, Andrew Cooper wrote:
> >> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
> >> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
> >> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
> > Where do you get the ranges from 0 to 31? It seems like the count
> > field in the CAP register is 8 bits, which could allow for up to 256
> > banks?
> >
> > I'm quite sure this would then overlap with other MSRs?
> 
> Irritatingly, nothing I can find actually states an upper architectural
> limit.
> 
> SDM Vol4, Table 2-2 which enumerates the Architectural MSRs.
> 
> 0x280 thru 0x29f are explicitly reserved MCx_CTL2, which is a limit of
> 32 banks.  There are gaps after this in the architectural table, but
> IceLake has PRMRR_BASE_0 at 0x2a0.
> 
> The main bank of MCx_{CTL,STATUS,ADDR,MISC} start at 0x400 and are
> listed in the table up to 0x473, which is a limit of 29 banks.  The
> Model specific table for SandyBridge fills in the remaining 3 banks up
> to MSR 0x47f, which is the previous limit of 32 banks.  (These MSRs have
> package scope rather than core/thread scope, but they are still
> enumerated architecturally so I'm not sure why they are in the model
> specific tables.)
> 
> More importantly however, the VMX MSR range starts at 0x480, immediately
> above bank 31, which puts an architectural hard limit on the number of
> banks.

Yes, realized about the VMX MSRs starting at 0x480, which limits the
number of banks. Maybe a small comment about the fact that albeit the
count in the CAP register could go up to 256 32 is the actual limit
due to how MSRs are arranged?

Note there's also GUEST_MC_BANK_NUM which is the actual implementation
limit in Xen AFAICT, maybe using it here would be clearer? (and limit
the ranges forwarded to vmce_rdmsr)

Thanks, Roger.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr()
  2020-07-23 11:30     ` Roger Pau Monné
@ 2020-07-23 13:27       ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2020-07-23 13:27 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: Xen-devel, Wei Liu, Jan Beulich

On 23/07/2020 12:30, Roger Pau Monné wrote:
> On Thu, Jul 23, 2020 at 12:00:53PM +0100, Andrew Cooper wrote:
>> On 23/07/2020 11:07, Roger Pau Monné wrote:
>>> On Wed, Jul 22, 2020 at 11:18:09AM +0100, Andrew Cooper wrote:
>>>> +    case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
>>>> +    case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
>>>> +    case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
>>> Where do you get the ranges from 0 to 31? It seems like the count
>>> field in the CAP register is 8 bits, which could allow for up to 256
>>> banks?
>>>
>>> I'm quite sure this would then overlap with other MSRs?
>> Irritatingly, nothing I can find actually states an upper architectural
>> limit.
>>
>> SDM Vol4, Table 2-2 which enumerates the Architectural MSRs.
>>
>> 0x280 thru 0x29f are explicitly reserved MCx_CTL2, which is a limit of
>> 32 banks.  There are gaps after this in the architectural table, but
>> IceLake has PRMRR_BASE_0 at 0x2a0.
>>
>> The main bank of MCx_{CTL,STATUS,ADDR,MISC} start at 0x400 and are
>> listed in the table up to 0x473, which is a limit of 29 banks.  The
>> Model specific table for SandyBridge fills in the remaining 3 banks up
>> to MSR 0x47f, which is the previous limit of 32 banks.  (These MSRs have
>> package scope rather than core/thread scope, but they are still
>> enumerated architecturally so I'm not sure why they are in the model
>> specific tables.)
>>
>> More importantly however, the VMX MSR range starts at 0x480, immediately
>> above bank 31, which puts an architectural hard limit on the number of
>> banks.
> Yes, realized about the VMX MSRs starting at 0x480, which limits the
> number of banks. Maybe a small comment about the fact that albeit the
> count in the CAP register could go up to 256 32 is the actual limit
> due to how MSRs are arranged?

Ok.  I've added:

The bank limit of 32 isn't stated anywhere I can locate, but is a
consequence
of the MSR layout described in SDM Volume 4.

as another paragraph to the commit message.

> Note there's also GUEST_MC_BANK_NUM which is the actual implementation
> limit in Xen AFAICT, maybe using it here would be clearer? (and limit
> the ranges forwarded to vmce_rdmsr)

First, there is a note saying that older versions of Xen advertised more
than 2 banks to guests (and therefore we might see such a guest migrated
in), and second, capturing all MSRs is a bug I'm specifically fixing
with this change, and was called out in the commit message.

These MSRs, even beyond the banks implemented by the guest, are still
MCE banks, and need handling appropriately as "out of range banks",
which isn't necessarily the same as falling into the general default MSR
path.

~Andrew


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-07-23 13:28 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-22 10:18 [PATCH] x86/vmce: Dispatch vmce_{rd,wr}msr() from guest_{rd,wr}msr() Andrew Cooper
2020-07-23 10:07 ` Roger Pau Monné
2020-07-23 11:00   ` Andrew Cooper
2020-07-23 11:30     ` Roger Pau Monné
2020-07-23 13:27       ` Andrew Cooper
2020-07-23 10:37 ` Jan Beulich
2020-07-23 11:17   ` Andrew Cooper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).