Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Build error log

From: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
To: Alexander Graf <agraf@suse.de>
Cc: benh@au1.ibm.com, aik@au1.ibm.com, qemu-devel@nongnu.org,
	qemu-ppc@nongnu.org, paulus@samba.org
Subject: Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Build error log
Date: Fri, 05 Sep 2014 13:58:08 +0530	[thread overview]
Message-ID: <54097418.6040101@linux.vnet.ibm.com> (raw)
In-Reply-To: <54096EA8.30600@suse.de>

On Friday 05 September 2014 01:34 PM, Alexander Graf wrote:
> 
> 
> On 04.09.14 13:13, Aravinda Prasad wrote:
>> Whenever there is a physical memory error due to bit
>> flips, which cannot be corrected by hardware, the error
>> is passed on to the kernel. If the memory address in
>> error belongs to guest address space then guest kernel
>> is responsible to take action. Hence the error is passed
>> on to guest via KVM by invoking 0x200 NMI vector.
>>
>> However, guest OS, as per PAPR, expects an error log
>> upon such error. This patch registers a new hcall
>> which is issued from 0x200 interrupt vector and builds
>> the error log, copies the error log to rtas space and
>> passes the address of the error log to guest
>>
>> Enhancement to KVM to perform above functionality is
>> already in upstream kernel.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr_hcall.c   |  154 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    4 +
>>  2 files changed, 157 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>> index 01650ba..c3aa448 100644
>> --- a/hw/ppc/spapr_hcall.c
>> +++ b/hw/ppc/spapr_hcall.c
>> @@ -14,6 +14,88 @@ struct SPRSyncState {
>>      target_ulong mask;
>>  };
>>  
>> +/* Offset from rtas-base where error log is placed */
>> +#define RTAS_ERROR_OFFSET       (TARGET_PAGE_SIZE)
> 
> You can't assume this. Please compute the value at the same place you
> compute the rtas-size.

sure

> 
>> +
>> +#define RTAS_ELOG_SEVERITY_SHIFT         0x5
>> +#define RTAS_ELOG_DISPOSITION_SHIFT      0x3
>> +#define RTAS_ELOG_INITIATOR_SHIFT        0x4
>> +
>> +/*
>> + * Only required RTAS event severity, disposition, initiator
>> + * target and type are copied from arch/powerpc/include/asm/rtas.h
>> + */
>> +
>> +/* RTAS event severity */
>> +#define RTAS_SEVERITY_ERROR_SYNC    0x3
>> +
>> +/* RTAS event disposition */
>> +#define RTAS_DISP_NOT_RECOVERED     0x2
>> +
>> +/* RTAS event initiator */
>> +#define RTAS_INITIATOR_MEMORY       0x4
>> +
>> +/* RTAS event target */
>> +#define RTAS_TARGET_MEMORY          0x4
>> +
>> +/* RTAS event type */
>> +#define RTAS_TYPE_ECC_UNCORR        0x09
>> +
>> +/*
>> + * Currently KVM only passes on the uncorrected machine
>> + * check memory error to guest. Other machine check errors
>> + * such as SLB multi-hit and TLB multi-hit are recovered
>> + * in KVM and are not passed on to guest.
>> + *
>> + * DSISR Bit for uncorrected machine check error. Based
>> + * on arch/powerpc/include/asm/mce.h
> 
> Please don't include Linux code. This file is GPLv2+ licensed and I
> don't want to taint it as GPLv2 only just for the sake of mce.

Hmm.. ok.  In that case I should not copy rtas_error_log structure below
from kernel source as well.

> 
>> + */
>> +#define PPC_BIT(bit)                (0x8000000000000000ULL >> bit)
>> +#define P7_DSISR_MC_UE              (PPC_BIT(48))  /* P8 too */
>> +
>> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
>> +struct rtas_error_log {
>> +    /* Byte 0 */
>> +    uint8_t     byte0;          /* Architectural version */
>> +
>> +    /* Byte 1 */
>> +    uint8_t     byte1;
>> +    /* XXXXXXXX
>> +     * XXX      3: Severity level of error
>> +     *    XX    2: Degree of recovery
>> +     *      X   1: Extended log present?
>> +     *       XX 2: Reserved
>> +     */
>> +
>> +    /* Byte 2 */
>> +    uint8_t     byte2;
>> +    /* XXXXXXXX
>> +     * XXXX     4: Initiator of event
>> +     *     XXXX 4: Target of failed operation
>> +     */
>> +    uint8_t     byte3;          /* General event or error*/
>> +};
>> +
>> +/*
>> + * Data format in RTAS-Blob
>> + *
>> + * This structure contains error information related to Machine
>> + * Check exception. This is filled up and copied to rtas-blob
>> + * upon machine check exception.
>> + */
>> +struct rtas_mc_log {
>> +    target_ulong srr0;
>> +    target_ulong srr1;
>> +    /*
>> +     * Beginning of error log address. This is properly
>> +     * populated and passed on to OS registered machine
>> +     * check notification routine upon machine check
>> +     * exception
>> +     */
>> +    target_ulong r3;
>> +    struct rtas_error_log err_log;
>> +};
>> +
>>  static void do_spr_sync(void *arg)
>>  {
>>      struct SPRSyncState *s = arg;
>> @@ -586,6 +668,77 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>>      return 0;
>>  }
>>  
>> +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>> +                                 target_ulong opcode, target_ulong *args)
>> +{
>> +    struct rtas_mc_log mc_log;
>> +    CPUPPCState *env = &cpu->env;
>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
>> +
>> +    /*
>> +     * We save the original r3 register in SPRG2 in 0x200 vector,
>> +     * which is patched during call to ibm.nmi-register. Original
>> +     * r3 is required to be included in error log
>> +     */
>> +    mc_log.r3 = env->spr[SPR_SPRG2];
> 
> Don't you have to call cpu_synchronize_registers() before you access
> SPRG2? Otherwise the value may be stale.

Yes true. Will include.

> 
>> +
>> +    /*
>> +     * SRR0 and SRR1, containing nip and msr at the time of exception,
>> +     * are clobbered when we return from this hcall. Hence they
>> +     * need to be properly saved and restored. We save srr0
>> +     * and srr1 in rtas blob and restore it in 0x200 vector
>> +     * before branching to OS registered machine check handler
>> +     */
>> +    mc_log.srr0 = env->spr[SPR_SRR0];
>> +    mc_log.srr1 = env->spr[SPR_SRR1];
>> +
>> +    /* Set error log fields */
>> +    mc_log.err_log.byte0 = 0x00;
>> +    mc_log.err_log.byte1 =
>> +        (RTAS_SEVERITY_ERROR_SYNC << RTAS_ELOG_SEVERITY_SHIFT);
>> +    mc_log.err_log.byte1 |=
>> +        (RTAS_DISP_NOT_RECOVERED << RTAS_ELOG_DISPOSITION_SHIFT);
>> +    mc_log.err_log.byte2 =
>> +        (RTAS_INITIATOR_MEMORY << RTAS_ELOG_INITIATOR_SHIFT);
>> +    mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
>> +
>> +    if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
>> +        mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
>> +    } else {
>> +        mc_log.err_log.byte3 = 0x0;
>> +    }
>> +
>> +    /* Handle all Host/Guest LE/BE combinations */
>> +    if ((*pcc->interrupts_big_endian)(cpu)) {
> 
> This should check MSR.LE, not ILE.

I did not quite get you. Did you mean to say I should check LE bit in
MSR instead of invoking pcc->interrupts_big_endian?

If so I should also change that check in 4/4 where I patch 0x200.

Regards,
Aravinda

> 
> 
> Alex
> 

-- 
Regards,
Aravinda