qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/4] FWNMI follow up patches
@ 2020-03-25 14:29 Nicholas Piggin
  2020-03-25 14:29 ` [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it Nicholas Piggin
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Nicholas Piggin @ 2020-03-25 14:29 UTC (permalink / raw)
  To: qemu-ppc
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Nicholas Piggin, Greg Kurz, Cédric Le Goater,
	Ganesh Goudar, David Gibson

Since v1, I addressed Greg's review comments, and took the mce
injection patch out of this series.

Thanks,
Nick

Nicholas Piggin (4):
  ppc/spapr: KVM FWNMI should not be enabled until guest requests it
  ppc/spapr: Improve FWNMI machine check delivery corner case comments
  ppc/spapr: Add FWNMI machine check delivery warnings
  ppc/spapr: Don't kill the guest if a recovered FWNMI machine check
    delivery fails

 hw/ppc/spapr_caps.c   |  7 ++++---
 hw/ppc/spapr_events.c | 46 ++++++++++++++++++++++++++++++++-----------
 hw/ppc/spapr_rtas.c   | 10 ++++++++++
 target/ppc/kvm.c      |  7 +++++++
 target/ppc/kvm_ppc.h  |  6 ++++++
 5 files changed, 62 insertions(+), 14 deletions(-)

-- 
2.23.0



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it
  2020-03-25 14:29 [PATCH v2 0/4] FWNMI follow up patches Nicholas Piggin
@ 2020-03-25 14:29 ` Nicholas Piggin
  2020-03-25 17:17   ` Greg Kurz
  2020-03-26  0:17   ` David Gibson
  2020-03-25 14:29 ` [PATCH v2 2/4] ppc/spapr: Improve FWNMI machine check delivery corner case comments Nicholas Piggin
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 13+ messages in thread
From: Nicholas Piggin @ 2020-03-25 14:29 UTC (permalink / raw)
  To: qemu-ppc
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Nicholas Piggin, Greg Kurz, Cédric Le Goater,
	Ganesh Goudar, David Gibson

The KVM FWNMI capability should be enabled with the "ibm,nmi-register"
rtas call. Although MCEs from KVM will be delivered as architected
interrupts to the guest before "ibm,nmi-register" is called, KVM has
different behaviour depending on whether the guest has enabled FWNMI
(it attempts to do more recovery on behalf of a non-FWNMI guest).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/spapr_caps.c  | 7 ++++---
 hw/ppc/spapr_rtas.c  | 7 +++++++
 target/ppc/kvm.c     | 7 +++++++
 target/ppc/kvm_ppc.h | 6 ++++++
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 679ae7959f..eb54f94227 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -517,9 +517,10 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
     }
 
     if (kvm_enabled()) {
-        if (kvmppc_set_fwnmi() < 0) {
-            error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
-                             "not supported by KVM");
+        if (!kvmppc_get_fwnmi()) {
+            error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM.");
+            error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n");
         }
     }
 }
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 9fb8c8632a..29abe66d01 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -437,6 +437,13 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
         return;
     }
 
+    if (kvm_enabled()) {
+        if (kvmppc_set_fwnmi() < 0) {
+            rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+            return;
+        }
+    }
+
     spapr->fwnmi_system_reset_addr = sreset_addr;
     spapr->fwnmi_machine_check_addr = mce_addr;
 
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 597f72be1b..03d0667e8f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -88,6 +88,7 @@ static int cap_ppc_safe_indirect_branch;
 static int cap_ppc_count_cache_flush_assist;
 static int cap_ppc_nested_kvm_hv;
 static int cap_large_decr;
+static int cap_fwnmi;
 
 static uint32_t debug_inst_opcode;
 
@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
     cap_large_decr = kvmppc_get_dec_bits();
+    cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
     /*
      * Note: setting it to false because there is not such capability
      * in KVM at this moment.
@@ -2064,6 +2066,11 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
     }
 }
 
+bool kvmppc_get_fwnmi(void)
+{
+    return cap_fwnmi;
+}
+
 int kvmppc_set_fwnmi(void)
 {
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 332fa0aa1c..fcaf745516 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+bool kvmppc_get_fwnmi(void);
 int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
@@ -163,6 +164,11 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
 {
 }
 
+static inline bool kvmppc_get_fwnmi(void)
+{
+    return false;
+}
+
 static inline int kvmppc_set_fwnmi(void)
 {
     return -1;
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 2/4] ppc/spapr: Improve FWNMI machine check delivery corner case comments
  2020-03-25 14:29 [PATCH v2 0/4] FWNMI follow up patches Nicholas Piggin
  2020-03-25 14:29 ` [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it Nicholas Piggin
@ 2020-03-25 14:29 ` Nicholas Piggin
  2020-03-26  0:18   ` David Gibson
  2020-03-25 14:29 ` [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings Nicholas Piggin
  2020-03-25 14:29 ` [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails Nicholas Piggin
  3 siblings, 1 reply; 13+ messages in thread
From: Nicholas Piggin @ 2020-03-25 14:29 UTC (permalink / raw)
  To: qemu-ppc
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Nicholas Piggin, Greg Kurz, Cédric Le Goater,
	Ganesh Goudar, David Gibson

Some of the conditions are not as clearly documented as they could be.
Also the non-FWNMI case does not need a large comment.

Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/spapr_events.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index a4a540f43d..a908c5d0e9 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -860,17 +860,13 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
     Error *local_err = NULL;
 
     if (spapr->fwnmi_machine_check_addr == -1) {
-        /*
-         * This implies that we have hit a machine check either when the
-         * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
-         * called) or between system reset and "ibm,nmi-register".
-         * Fall back to the old machine check behavior in such cases.
-         */
+        /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
         cs->exception_index = POWERPC_EXCP_MCHECK;
         ppc_cpu_do_interrupt(cs);
         return;
     }
 
+    /* Wait for FWNMI interlock. */
     while (spapr->fwnmi_machine_check_interlock != -1) {
         /*
          * Check whether the same CPU got machine check error
@@ -882,8 +878,13 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
             return;
         }
         qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
-        /* Meanwhile if the system is reset, then just return */
         if (spapr->fwnmi_machine_check_addr == -1) {
+            /*
+             * If the machine was reset while waiting for the interlock,
+             * abort the delivery. The machine check applies to a context
+             * that no longer exists, so it wouldn't make sense to deliver
+             * it now.
+             */
             return;
         }
     }
@@ -894,7 +895,9 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
          * We don't want to abort so we let the migration to continue.
          * In a rare case, the machine check handler will run on the target.
          * Though this is not preferable, it is better than aborting
-         * the migration or killing the VM.
+         * the migration or killing the VM. It is okay to call
+         * migrate_del_blocker on a blocker that was not added (which the
+         * nmi-interlock handler would do when it's called after this).
          */
         warn_report("Received a fwnmi while migration was in progress");
     }
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings
  2020-03-25 14:29 [PATCH v2 0/4] FWNMI follow up patches Nicholas Piggin
  2020-03-25 14:29 ` [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it Nicholas Piggin
  2020-03-25 14:29 ` [PATCH v2 2/4] ppc/spapr: Improve FWNMI machine check delivery corner case comments Nicholas Piggin
@ 2020-03-25 14:29 ` Nicholas Piggin
  2020-03-25 17:24   ` Greg Kurz
  2020-03-26  0:19   ` David Gibson
  2020-03-25 14:29 ` [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails Nicholas Piggin
  3 siblings, 2 replies; 13+ messages in thread
From: Nicholas Piggin @ 2020-03-25 14:29 UTC (permalink / raw)
  To: qemu-ppc
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Nicholas Piggin, Greg Kurz, Cédric Le Goater,
	Ganesh Goudar, David Gibson

Add some messages which explain problems and guest misbehaviour that
may be difficult to diagnose in rare cases of machine checks.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/spapr_events.c | 4 ++++
 hw/ppc/spapr_rtas.c   | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index a908c5d0e9..c8964eb25d 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -833,6 +833,8 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
     /* get rtas addr from fdt */
     rtas_addr = spapr_get_rtas_addr();
     if (!rtas_addr) {
+        error_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
         qemu_system_guest_panicked(NULL);
         g_free(ext_elog);
         return;
@@ -874,6 +876,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
          * that CPU called "ibm,nmi-interlock")
          */
         if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
+            error_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check.");
             qemu_system_guest_panicked(NULL);
             return;
         }
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 29abe66d01..bcac0d00e7 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -462,6 +462,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
     }
 
     if (spapr->fwnmi_machine_check_addr == -1) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n");
+
         /* NMI register not called */
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
  2020-03-25 14:29 [PATCH v2 0/4] FWNMI follow up patches Nicholas Piggin
                   ` (2 preceding siblings ...)
  2020-03-25 14:29 ` [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings Nicholas Piggin
@ 2020-03-25 14:29 ` Nicholas Piggin
  2020-03-25 18:13   ` Greg Kurz
  2020-03-26  0:30   ` David Gibson
  3 siblings, 2 replies; 13+ messages in thread
From: Nicholas Piggin @ 2020-03-25 14:29 UTC (permalink / raw)
  To: qemu-ppc
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Nicholas Piggin, Greg Kurz, Cédric Le Goater,
	Ganesh Goudar, David Gibson

Try to be tolerant of FWNMI delivery errors if the machine check had been
recovered by the host.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/spapr_events.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index c8964eb25d..b90ecb8afe 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -833,13 +833,25 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
     /* get rtas addr from fdt */
     rtas_addr = spapr_get_rtas_addr();
     if (!rtas_addr) {
-        error_report(
+        if (!recovered) {
+            error_report(
 "FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
-        qemu_system_guest_panicked(NULL);
+            qemu_system_guest_panicked(NULL);
+        } else {
+            warn_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
+"Machine check recovered.");
+        }
         g_free(ext_elog);
         return;
     }
 
+    /*
+     * Must not set interlock if the MCE does not get delivered to the guest
+     * in the error case above.
+     */
+    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
+
     stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
                 env->gpr[3]);
     cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
@@ -876,9 +888,15 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
          * that CPU called "ibm,nmi-interlock")
          */
         if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
-            error_report(
+            if (!recovered) {
+                error_report(
 "FWNMI: Unable to deliver machine check to guest: nested machine check.");
-            qemu_system_guest_panicked(NULL);
+                qemu_system_guest_panicked(NULL);
+            } else {
+                warn_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check. "
+"Machine check recovered.");
+            }
             return;
         }
         qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
@@ -906,7 +924,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
         warn_report("Received a fwnmi while migration was in progress");
     }
 
-    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
     spapr_mce_dispatch_elog(cpu, recovered);
 }
 
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it
  2020-03-25 14:29 ` [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it Nicholas Piggin
@ 2020-03-25 17:17   ` Greg Kurz
  2020-03-26  0:17   ` David Gibson
  1 sibling, 0 replies; 13+ messages in thread
From: Greg Kurz @ 2020-03-25 17:17 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Mahesh Salgaonkar, qemu-devel,
	Cédric Le Goater, qemu-ppc, Ganesh Goudar, David Gibson

On Thu, 26 Mar 2020 00:29:03 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> The KVM FWNMI capability should be enabled with the "ibm,nmi-register"
> rtas call. Although MCEs from KVM will be delivered as architected
> interrupts to the guest before "ibm,nmi-register" is called, KVM has
> different behaviour depending on whether the guest has enabled FWNMI
> (it attempts to do more recovery on behalf of a non-FWNMI guest).
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr_caps.c  | 7 ++++---
>  hw/ppc/spapr_rtas.c  | 7 +++++++
>  target/ppc/kvm.c     | 7 +++++++
>  target/ppc/kvm_ppc.h | 6 ++++++
>  4 files changed, 24 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 679ae7959f..eb54f94227 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -517,9 +517,10 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  
>      if (kvm_enabled()) {
> -        if (kvmppc_set_fwnmi() < 0) {
> -            error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
> -                             "not supported by KVM");
> +        if (!kvmppc_get_fwnmi()) {
> +            error_setg(errp,
> +"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM.");
> +            error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n");
>          }
>      }
>  }
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 9fb8c8632a..29abe66d01 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -437,6 +437,13 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>          return;
>      }
>  
> +    if (kvm_enabled()) {
> +        if (kvmppc_set_fwnmi() < 0) {
> +            rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +            return;
> +        }
> +    }
> +
>      spapr->fwnmi_system_reset_addr = sreset_addr;
>      spapr->fwnmi_machine_check_addr = mce_addr;
>  
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 597f72be1b..03d0667e8f 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -88,6 +88,7 @@ static int cap_ppc_safe_indirect_branch;
>  static int cap_ppc_count_cache_flush_assist;
>  static int cap_ppc_nested_kvm_hv;
>  static int cap_large_decr;
> +static int cap_fwnmi;
>  
>  static uint32_t debug_inst_opcode;
>  
> @@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>      kvmppc_get_cpu_characteristics(s);
>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
>      cap_large_decr = kvmppc_get_dec_bits();
> +    cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
>      /*
>       * Note: setting it to false because there is not such capability
>       * in KVM at this moment.
> @@ -2064,6 +2066,11 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>      }
>  }
>  
> +bool kvmppc_get_fwnmi(void)
> +{
> +    return cap_fwnmi;
> +}
> +
>  int kvmppc_set_fwnmi(void)
>  {
>      PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 332fa0aa1c..fcaf745516 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
>  void kvmppc_set_papr(PowerPCCPU *cpu);
>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
> +bool kvmppc_get_fwnmi(void);
>  int kvmppc_set_fwnmi(void);
>  int kvmppc_smt_threads(void);
>  void kvmppc_error_append_smt_possible_hint(Error *const *errp);
> @@ -163,6 +164,11 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>  {
>  }
>  
> +static inline bool kvmppc_get_fwnmi(void)
> +{
> +    return false;
> +}
> +
>  static inline int kvmppc_set_fwnmi(void)
>  {
>      return -1;



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings
  2020-03-25 14:29 ` [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings Nicholas Piggin
@ 2020-03-25 17:24   ` Greg Kurz
  2020-03-26  0:19   ` David Gibson
  1 sibling, 0 replies; 13+ messages in thread
From: Greg Kurz @ 2020-03-25 17:24 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Cédric Le Goater, Ganesh Goudar, qemu-ppc,
	David Gibson

On Thu, 26 Mar 2020 00:29:05 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> Add some messages which explain problems and guest misbehaviour that
> may be difficult to diagnose in rare cases of machine checks.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr_events.c | 4 ++++
>  hw/ppc/spapr_rtas.c   | 3 +++
>  2 files changed, 7 insertions(+)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index a908c5d0e9..c8964eb25d 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -833,6 +833,8 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>      /* get rtas addr from fdt */
>      rtas_addr = spapr_get_rtas_addr();
>      if (!rtas_addr) {
> +        error_report(
> +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
>          qemu_system_guest_panicked(NULL);
>          g_free(ext_elog);
>          return;
> @@ -874,6 +876,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>           * that CPU called "ibm,nmi-interlock")
>           */
>          if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
> +            error_report(
> +"FWNMI: Unable to deliver machine check to guest: nested machine check.");
>              qemu_system_guest_panicked(NULL);
>              return;
>          }
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 29abe66d01..bcac0d00e7 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -462,6 +462,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>      }
>  
>      if (spapr->fwnmi_machine_check_addr == -1) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n");
> +
>          /* NMI register not called */
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>          return;



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
  2020-03-25 14:29 ` [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails Nicholas Piggin
@ 2020-03-25 18:13   ` Greg Kurz
  2020-03-26  0:30     ` David Gibson
  2020-03-26  0:30   ` David Gibson
  1 sibling, 1 reply; 13+ messages in thread
From: Greg Kurz @ 2020-03-25 18:13 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Cédric Le Goater, Ganesh Goudar, qemu-ppc,
	David Gibson

On Thu, 26 Mar 2020 00:29:06 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> Try to be tolerant of FWNMI delivery errors if the machine check had been
> recovered by the host.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  hw/ppc/spapr_events.c | 27 ++++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index c8964eb25d..b90ecb8afe 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -833,13 +833,25 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>      /* get rtas addr from fdt */
>      rtas_addr = spapr_get_rtas_addr();
>      if (!rtas_addr) {
> -        error_report(
> +        if (!recovered) {
> +            error_report(
>  "FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
> -        qemu_system_guest_panicked(NULL);
> +            qemu_system_guest_panicked(NULL);
> +        } else {
> +            warn_report(
> +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
> +"Machine check recovered.");
> +        }
>          g_free(ext_elog);
>          return;
>      }
>  
> +    /*
> +     * Must not set interlock if the MCE does not get delivered to the guest
> +     * in the error case above.
> +     */

It is a bit confusing to read "must not set interlock" and to see the
interlock being set the line below IM-non-native-speaker-HO... also
a small clarification of the outcome of taking the interlock without
delivering the MCE could help people who aren't familiar with FWNMI
to avoid doing bad things.

What about something like the following ?

    /*
     * By taking the interlock, we assume that the MCE will be
     * delivered to the guest. CAUTION: don't add anything that
     * could prevent the MCE to be delivered after this line,
     * otherwise the guest won't be able to release the interlock
     * and ultimately hang/crash?
     */

> +    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
> +

For improved paranoia, this could even be done just before calling
ppc_cpu_do_fwnmi_machine_check().

Anyway, the change is good enough so:

Reviewed-by: Greg Kurz <groug@kaod.org>

>      stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
>                  env->gpr[3]);
>      cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
> @@ -876,9 +888,15 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>           * that CPU called "ibm,nmi-interlock")
>           */
>          if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
> -            error_report(
> +            if (!recovered) {
> +                error_report(
>  "FWNMI: Unable to deliver machine check to guest: nested machine check.");
> -            qemu_system_guest_panicked(NULL);
> +                qemu_system_guest_panicked(NULL);
> +            } else {
> +                warn_report(
> +"FWNMI: Unable to deliver machine check to guest: nested machine check. "
> +"Machine check recovered.");
> +            }
>              return;
>          }
>          qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
> @@ -906,7 +924,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>          warn_report("Received a fwnmi while migration was in progress");
>      }
>  
> -    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
>      spapr_mce_dispatch_elog(cpu, recovered);
>  }
>  



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it
  2020-03-25 14:29 ` [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it Nicholas Piggin
  2020-03-25 17:17   ` Greg Kurz
@ 2020-03-26  0:17   ` David Gibson
  1 sibling, 0 replies; 13+ messages in thread
From: David Gibson @ 2020-03-26  0:17 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Greg Kurz, Cédric Le Goater, Ganesh Goudar,
	qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 4150 bytes --]

On Thu, Mar 26, 2020 at 12:29:03AM +1000, Nicholas Piggin wrote:
65;5803;1c> The KVM FWNMI capability should be enabled with the "ibm,nmi-register"
> rtas call. Although MCEs from KVM will be delivered as architected
> interrupts to the guest before "ibm,nmi-register" is called, KVM has
> different behaviour depending on whether the guest has enabled FWNMI
> (it attempts to do more recovery on behalf of a non-FWNMI guest).
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Applied to ppc-for-5.0.

> ---
>  hw/ppc/spapr_caps.c  | 7 ++++---
>  hw/ppc/spapr_rtas.c  | 7 +++++++
>  target/ppc/kvm.c     | 7 +++++++
>  target/ppc/kvm_ppc.h | 6 ++++++
>  4 files changed, 24 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 679ae7959f..eb54f94227 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -517,9 +517,10 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  
>      if (kvm_enabled()) {
> -        if (kvmppc_set_fwnmi() < 0) {
> -            error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
> -                             "not supported by KVM");
> +        if (!kvmppc_get_fwnmi()) {
> +            error_setg(errp,
> +"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM.");
> +            error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n");
>          }
>      }
>  }
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 9fb8c8632a..29abe66d01 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -437,6 +437,13 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>          return;
>      }
>  
> +    if (kvm_enabled()) {
> +        if (kvmppc_set_fwnmi() < 0) {
> +            rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +            return;
> +        }
> +    }
> +
>      spapr->fwnmi_system_reset_addr = sreset_addr;
>      spapr->fwnmi_machine_check_addr = mce_addr;
>  
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 597f72be1b..03d0667e8f 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -88,6 +88,7 @@ static int cap_ppc_safe_indirect_branch;
>  static int cap_ppc_count_cache_flush_assist;
>  static int cap_ppc_nested_kvm_hv;
>  static int cap_large_decr;
> +static int cap_fwnmi;
>  
>  static uint32_t debug_inst_opcode;
>  
> @@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>      kvmppc_get_cpu_characteristics(s);
>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
>      cap_large_decr = kvmppc_get_dec_bits();
> +    cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
>      /*
>       * Note: setting it to false because there is not such capability
>       * in KVM at this moment.
> @@ -2064,6 +2066,11 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>      }
>  }
>  
> +bool kvmppc_get_fwnmi(void)
> +{
> +    return cap_fwnmi;
> +}
> +
>  int kvmppc_set_fwnmi(void)
>  {
>      PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 332fa0aa1c..fcaf745516 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
>  void kvmppc_set_papr(PowerPCCPU *cpu);
>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
> +bool kvmppc_get_fwnmi(void);
>  int kvmppc_set_fwnmi(void);
>  int kvmppc_smt_threads(void);
>  void kvmppc_error_append_smt_possible_hint(Error *const *errp);
> @@ -163,6 +164,11 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>  {
>  }
>  
> +static inline bool kvmppc_get_fwnmi(void)
> +{
> +    return false;
> +}
> +
>  static inline int kvmppc_set_fwnmi(void)
>  {
>      return -1;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 2/4] ppc/spapr: Improve FWNMI machine check delivery corner case comments
  2020-03-25 14:29 ` [PATCH v2 2/4] ppc/spapr: Improve FWNMI machine check delivery corner case comments Nicholas Piggin
@ 2020-03-26  0:18   ` David Gibson
  0 siblings, 0 replies; 13+ messages in thread
From: David Gibson @ 2020-03-26  0:18 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Greg Kurz, Cédric Le Goater, Ganesh Goudar,
	qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 3052 bytes --]

On Thu, Mar 26, 2020 at 12:29:04AM +1000, Nicholas Piggin wrote:
> Some of the conditions are not as clearly documented as they could be.
> Also the non-FWNMI case does not need a large comment.
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Applied to ppc-for-5.0.

> ---
>  hw/ppc/spapr_events.c | 19 +++++++++++--------
>  1 file changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index a4a540f43d..a908c5d0e9 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -860,17 +860,13 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>      Error *local_err = NULL;
>  
>      if (spapr->fwnmi_machine_check_addr == -1) {
> -        /*
> -         * This implies that we have hit a machine check either when the
> -         * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
> -         * called) or between system reset and "ibm,nmi-register".
> -         * Fall back to the old machine check behavior in such cases.
> -         */
> +        /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
>          cs->exception_index = POWERPC_EXCP_MCHECK;
>          ppc_cpu_do_interrupt(cs);
>          return;
>      }
>  
> +    /* Wait for FWNMI interlock. */
>      while (spapr->fwnmi_machine_check_interlock != -1) {
>          /*
>           * Check whether the same CPU got machine check error
> @@ -882,8 +878,13 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>              return;
>          }
>          qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
> -        /* Meanwhile if the system is reset, then just return */
>          if (spapr->fwnmi_machine_check_addr == -1) {
> +            /*
> +             * If the machine was reset while waiting for the interlock,
> +             * abort the delivery. The machine check applies to a context
> +             * that no longer exists, so it wouldn't make sense to deliver
> +             * it now.
> +             */
>              return;
>          }
>      }
> @@ -894,7 +895,9 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>           * We don't want to abort so we let the migration to continue.
>           * In a rare case, the machine check handler will run on the target.
>           * Though this is not preferable, it is better than aborting
> -         * the migration or killing the VM.
> +         * the migration or killing the VM. It is okay to call
> +         * migrate_del_blocker on a blocker that was not added (which the
> +         * nmi-interlock handler would do when it's called after this).
>           */
>          warn_report("Received a fwnmi while migration was in progress");
>      }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings
  2020-03-25 14:29 ` [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings Nicholas Piggin
  2020-03-25 17:24   ` Greg Kurz
@ 2020-03-26  0:19   ` David Gibson
  1 sibling, 0 replies; 13+ messages in thread
From: David Gibson @ 2020-03-26  0:19 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Greg Kurz, Cédric Le Goater, Ganesh Goudar,
	qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 2116 bytes --]

On Thu, Mar 26, 2020 at 12:29:05AM +1000, Nicholas Piggin wrote:
> Add some messages which explain problems and guest misbehaviour that
> may be difficult to diagnose in rare cases of machine checks.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Applied to ppc-for-5.0.

> ---
>  hw/ppc/spapr_events.c | 4 ++++
>  hw/ppc/spapr_rtas.c   | 3 +++
>  2 files changed, 7 insertions(+)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index a908c5d0e9..c8964eb25d 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -833,6 +833,8 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>      /* get rtas addr from fdt */
>      rtas_addr = spapr_get_rtas_addr();
>      if (!rtas_addr) {
> +        error_report(
> +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
>          qemu_system_guest_panicked(NULL);
>          g_free(ext_elog);
>          return;
> @@ -874,6 +876,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>           * that CPU called "ibm,nmi-interlock")
>           */
>          if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
> +            error_report(
> +"FWNMI: Unable to deliver machine check to guest: nested machine check.");
>              qemu_system_guest_panicked(NULL);
>              return;
>          }
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 29abe66d01..bcac0d00e7 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -462,6 +462,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>      }
>  
>      if (spapr->fwnmi_machine_check_addr == -1) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n");
> +
>          /* NMI register not called */
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>          return;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
  2020-03-25 18:13   ` Greg Kurz
@ 2020-03-26  0:30     ` David Gibson
  0 siblings, 0 replies; 13+ messages in thread
From: David Gibson @ 2020-03-26  0:30 UTC (permalink / raw)
  To: Greg Kurz
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Nicholas Piggin, Cédric Le Goater,
	Ganesh Goudar, qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 4339 bytes --]

On Wed, Mar 25, 2020 at 07:13:32PM +0100, Greg Kurz wrote:
> On Thu, 26 Mar 2020 00:29:06 +1000
> Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> > Try to be tolerant of FWNMI delivery errors if the machine check had been
> > recovered by the host.
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  hw/ppc/spapr_events.c | 27 ++++++++++++++++++++++-----
> >  1 file changed, 22 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index c8964eb25d..b90ecb8afe 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -833,13 +833,25 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
> >      /* get rtas addr from fdt */
> >      rtas_addr = spapr_get_rtas_addr();
> >      if (!rtas_addr) {
> > -        error_report(
> > +        if (!recovered) {
> > +            error_report(
> >  "FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
> > -        qemu_system_guest_panicked(NULL);
> > +            qemu_system_guest_panicked(NULL);
> > +        } else {
> > +            warn_report(
> > +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
> > +"Machine check recovered.");
> > +        }
> >          g_free(ext_elog);
> >          return;
> >      }
> >  
> > +    /*
> > +     * Must not set interlock if the MCE does not get delivered to the guest
> > +     * in the error case above.
> > +     */
> 
> It is a bit confusing to read "must not set interlock" and to see the
> interlock being set the line below IM-non-native-speaker-HO... also
> a small clarification of the outcome of taking the interlock without
> delivering the MCE could help people who aren't familiar with FWNMI
> to avoid doing bad things.

That's a good point.  That comment made sense to me (and presumably to
Nick) because it directly answers a question I had on an earlier draft
(which moved this without comment).  But someone without that context
(say, future me) could well find that confusing.

> 
> What about something like the following ?
> 
>     /*
>      * By taking the interlock, we assume that the MCE will be
>      * delivered to the guest. CAUTION: don't add anything that
>      * could prevent the MCE to be delivered after this line,
>      * otherwise the guest won't be able to release the interlock
>      * and ultimately hang/crash?
>      */

I've substituted your comment inline.

> 
> > +    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
> > +
> 
> For improved paranoia, this could even be done just before calling
> ppc_cpu_do_fwnmi_machine_check().
> 
> Anyway, the change is good enough so:
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> >      stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
> >                  env->gpr[3]);
> >      cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
> > @@ -876,9 +888,15 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >           * that CPU called "ibm,nmi-interlock")
> >           */
> >          if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
> > -            error_report(
> > +            if (!recovered) {
> > +                error_report(
> >  "FWNMI: Unable to deliver machine check to guest: nested machine check.");
> > -            qemu_system_guest_panicked(NULL);
> > +                qemu_system_guest_panicked(NULL);
> > +            } else {
> > +                warn_report(
> > +"FWNMI: Unable to deliver machine check to guest: nested machine check. "
> > +"Machine check recovered.");
> > +            }
> >              return;
> >          }
> >          qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
> > @@ -906,7 +924,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >          warn_report("Received a fwnmi while migration was in progress");
> >      }
> >  
> > -    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
> >      spapr_mce_dispatch_elog(cpu, recovered);
> >  }
> >  
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
  2020-03-25 14:29 ` [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails Nicholas Piggin
  2020-03-25 18:13   ` Greg Kurz
@ 2020-03-26  0:30   ` David Gibson
  1 sibling, 0 replies; 13+ messages in thread
From: David Gibson @ 2020-03-26  0:30 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Aravinda Prasad, Alexey Kardashevskiy, Mahesh Salgaonkar,
	qemu-devel, Greg Kurz, Cédric Le Goater, Ganesh Goudar,
	qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 2928 bytes --]

On Thu, Mar 26, 2020 at 12:29:06AM +1000, Nicholas Piggin wrote:
> Try to be tolerant of FWNMI delivery errors if the machine check had been
> recovered by the host.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Applied to ppc-for-5.0.

> ---
>  hw/ppc/spapr_events.c | 27 ++++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index c8964eb25d..b90ecb8afe 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -833,13 +833,25 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>      /* get rtas addr from fdt */
>      rtas_addr = spapr_get_rtas_addr();
>      if (!rtas_addr) {
> -        error_report(
> +        if (!recovered) {
> +            error_report(
>  "FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
> -        qemu_system_guest_panicked(NULL);
> +            qemu_system_guest_panicked(NULL);
> +        } else {
> +            warn_report(
> +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
> +"Machine check recovered.");
> +        }
>          g_free(ext_elog);
>          return;
>      }
>  
> +    /*
> +     * Must not set interlock if the MCE does not get delivered to the guest
> +     * in the error case above.
> +     */
> +    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
> +
>      stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
>                  env->gpr[3]);
>      cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
> @@ -876,9 +888,15 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>           * that CPU called "ibm,nmi-interlock")
>           */
>          if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
> -            error_report(
> +            if (!recovered) {
> +                error_report(
>  "FWNMI: Unable to deliver machine check to guest: nested machine check.");
> -            qemu_system_guest_panicked(NULL);
> +                qemu_system_guest_panicked(NULL);
> +            } else {
> +                warn_report(
> +"FWNMI: Unable to deliver machine check to guest: nested machine check. "
> +"Machine check recovered.");
> +            }
>              return;
>          }
>          qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
> @@ -906,7 +924,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>          warn_report("Received a fwnmi while migration was in progress");
>      }
>  
> -    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
>      spapr_mce_dispatch_elog(cpu, recovered);
>  }
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-03-26  0:43 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-25 14:29 [PATCH v2 0/4] FWNMI follow up patches Nicholas Piggin
2020-03-25 14:29 ` [PATCH v2 1/4] ppc/spapr: KVM FWNMI should not be enabled until guest requests it Nicholas Piggin
2020-03-25 17:17   ` Greg Kurz
2020-03-26  0:17   ` David Gibson
2020-03-25 14:29 ` [PATCH v2 2/4] ppc/spapr: Improve FWNMI machine check delivery corner case comments Nicholas Piggin
2020-03-26  0:18   ` David Gibson
2020-03-25 14:29 ` [PATCH v2 3/4] ppc/spapr: Add FWNMI machine check delivery warnings Nicholas Piggin
2020-03-25 17:24   ` Greg Kurz
2020-03-26  0:19   ` David Gibson
2020-03-25 14:29 ` [PATCH v2 4/4] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails Nicholas Piggin
2020-03-25 18:13   ` Greg Kurz
2020-03-26  0:30     ` David Gibson
2020-03-26  0:30   ` David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).