* [RFC PATCH v4 01/12] powerpc/book3s: Split the common exception prolog logic into two section.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 02/12] powerpc/book3s: Introduce exclusive emergency stack for machine check exception Mahesh J Salgaonkar
` (10 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
This patch splits the common exception prolog logic into three parts to
facilitate reuse of existing code in the next patch. This patch also
re-arranges few instructions in such a way that the second part now deals
with saving register values from paca save area to stack frame, and
the third part deals with saving current register values to stack frame.
The second and third part will be reused in the machine check exception
routine in the subsequent patch.
Please note that this patch does not introduce or change existing code
logic. Instead it is just a code movement and instruction re-ordering.
Patch Acked-by Paul. But made some minor modification (explained above) to
address Paul's comment in the later patch(3).
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/exception-64s.h | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 07ca627..94ab2d2 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -281,9 +281,12 @@ do_kvm_##n: \
beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r9, r10); \
SAVE_PPR(area, r9, r10); \
-4: std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+4: EXCEPTION_PROLOG_COMMON_2(area) \
+ EXCEPTION_PROLOG_COMMON_3(n) \
+ ACCOUNT_STOLEN_TIME
+
+/* Save original regs values from save area to stack frame. */
+#define EXCEPTION_PROLOG_COMMON_2(area) \
ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
ld r10,area+EX_R10(r13); \
std r9,GPR9(r1); \
@@ -298,9 +301,14 @@ do_kvm_##n: \
ld r10,area+EX_CFAR(r13); \
std r10,ORIG_GPR3(r1); \
END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
- GET_LR(r9,area); /* Get LR, later save to stack */ \
+ GET_LR(r9,area); /* Save LR to stack */ \
+ std r9,_LINK(r1);
+
+#define EXCEPTION_PROLOG_COMMON_3(n) \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- std r9,_LINK(r1); \
mfctr r10; /* save CTR in stackframe */ \
std r10,_CTR(r1); \
lbz r10,PACASOFTIRQEN(r13); \
@@ -312,8 +320,7 @@ do_kvm_##n: \
li r10,0; \
ld r11,exception_marker@toc(r2); \
std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
- ACCOUNT_STOLEN_TIME
+ std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
/*
* Exception vectors.
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 02/12] powerpc/book3s: Introduce exclusive emergency stack for machine check exception.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 01/12] powerpc/book3s: Split the common exception prolog logic into two section Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 03/12] powerpc/book3s: handle machine check in Linux host Mahesh J Salgaonkar
` (9 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
This patch introduces exclusive emergency stack for machine check exception.
We use emergency stack to handle machine check exception so that we can save
MCE information (srr1, srr0, dar and dsisr) before turning on ME bit and be
ready for re-entrancy. This helps us to prevent clobbering of MCE information
in case of nested machine checks.
The reason for using emergency stack over normal kernel stack is that the
machine check might occur in the middle of setting up a stack frame which may
result into improper use of kernel stack.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/paca.h | 9 +++++++++
arch/powerpc/kernel/setup_64.c | 10 +++++++++-
arch/powerpc/xmon/xmon.c | 4 ++++
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 77c91e7..b4ca4e9 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -147,6 +147,15 @@ struct paca_struct {
*/
struct opal_machine_check_event *opal_mc_evt;
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Exclusive emergency stack pointer for machine check exception. */
+ void *mc_emergency_sp;
+ /*
+ * Flag to check whether we are in machine check early handler
+ * and already using emergency stack.
+ */
+ u16 in_mce;
+#endif
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 389fb807..6f96af0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -529,7 +529,8 @@ static void __init exc_lvl_early_init(void)
/*
* Stack space used when we detect a bad kernel stack pointer, and
- * early in SMP boots before relocation is enabled.
+ * early in SMP boots before relocation is enabled. Exclusive emergency
+ * stack for machine checks.
*/
static void __init emergency_stack_init(void)
{
@@ -552,6 +553,13 @@ static void __init emergency_stack_init(void)
sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
sp += THREAD_SIZE;
paca[i].emergency_sp = __va(sp);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* emergency stack for machine check exception handling. */
+ sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ sp += THREAD_SIZE;
+ paca[i].mc_emergency_sp = __va(sp);
+#endif
}
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 96bf5bd..5f17adb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2044,6 +2044,10 @@ static void dump_one_paca(int cpu)
DUMP(p, stab_addr, "lx");
#endif
DUMP(p, emergency_sp, "p");
+#ifdef CONFIG_PPC_BOOK3S_64
+ DUMP(p, mc_emergency_sp, "p");
+ DUMP(p, in_mce, "x");
+#endif
DUMP(p, data_offset, "lx");
DUMP(p, hw_cpu_id, "x");
DUMP(p, cpu_start, "x");
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 03/12] powerpc/book3s: handle machine check in Linux host.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 01/12] powerpc/book3s: Split the common exception prolog logic into two section Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 02/12] powerpc/book3s: Introduce exclusive emergency stack for machine check exception Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 04/12] powerpc/book3s: Return from interrupt if coming from evil context Mahesh J Salgaonkar
` (8 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
arch/powerpc/kernel/asm-offsets.c | 4 +
arch/powerpc/kernel/exceptions-64s.S | 111 ++++++++++++++++++++++++++++++++++
arch/powerpc/kernel/traps.c | 12 ++++
3 files changed, 127 insertions(+)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8207459..e0e8ebb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -238,6 +238,10 @@ int main(void)
DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
#endif /* CONFIG_PPC_STD_MMU_64 */
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
+ DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 902ca3c..87a70d2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -156,7 +156,11 @@ machine_check_pSeries_1:
HMT_MEDIUM_PPR_DISCARD
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_pSeries_early
+FTR_SECTION_ELSE
b machine_check_pSeries_0
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
. = 0x300
.globl data_access_pSeries
@@ -404,6 +408,64 @@ denorm_exception_hv:
.align 7
/* moved from 0x200 */
+machine_check_pSeries_early:
+BEGIN_FTR_SECTION
+ EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+ /*
+ * Register contents:
+ * R13 = PACA
+ * R9 = CR
+ * Original R9 to R13 is saved on PACA_EXMC
+ *
+ * Switch to mc_emergency stack and handle re-entrancy (though we
+ * currently don't test for overflow). Save MCE registers srr1,
+ * srr0, dar and dsisr and then set ME=1
+ *
+ * We use paca->in_mce to check whether this is the first entry or
+ * nested machine check. We increment paca->in_mce to track nested
+ * machine checks.
+ *
+ * If this is the first entry then set stack pointer to
+ * paca->mc_emergency_sp, otherwise r1 is already pointing to
+ * stack frame on mc_emergency stack.
+ *
+ * NOTE: We are here with MSR_ME=0 (off), which means we risk a
+ * checkstop if we get another machine check exception before we do
+ * rfid with MSR_ME=1.
+ */
+ mr r11,r1 /* Save r1 */
+ lhz r10,PACA_IN_MCE(r13)
+ cmpwi r10,0 /* Are we in nested machine check */
+ bne 0f /* Yes, we are. */
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ addi r10,r10,1 /* increment paca->in_mce */
+ sth r10,PACA_IN_MCE(r13)
+ std r11,GPR1(r1) /* Save r1 on the stack. */
+ std r11,0(r1) /* make stack chain pointer */
+ mfspr r11,SPRN_SRR0 /* Save SRR0 */
+ std r11,_NIP(r1)
+ mfspr r11,SPRN_SRR1 /* Save SRR1 */
+ std r11,_MSR(r1)
+ mfspr r11,SPRN_DAR /* Save DAR */
+ std r11,_DAR(r1)
+ mfspr r11,SPRN_DSISR /* Save DSISR */
+ std r11,_DSISR(r1)
+ std r9,_CCR(r1) /* Save CR in stackframe */
+ /* Save r9 through r13 from EXMC save area to stack frame. */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+ mfmsr r11 /* get MSR value */
+ ori r11,r11,MSR_ME /* turn on ME bit */
+ ori r11,r11,MSR_RI /* turn on RI bit */
+ ld r12,PACAKBASE(r13) /* get high part of &label */
+ LOAD_HANDLER(r12, machine_check_handle_early)
+ mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r11
+ rfid
+ b . /* prevent speculative execution */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
machine_check_pSeries:
.globl machine_check_fwnmi
machine_check_fwnmi:
@@ -681,6 +743,55 @@ machine_check_common:
bl .machine_check_exception
b .ret_from_except
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r0,MSR_RI; \
+ mfmsr r9; /* get MSR value */ \
+ andc r9,r9,r0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Move original SRR0 and SRR1 into the respective regs */ \
+ ld r9,_MSR(r1); \
+ mtspr SPRN_SRR1,r9; \
+ ld r3,_NIP(r1); \
+ mtspr SPRN_SRR0,r3; \
+ ld r9,_CTR(r1); \
+ mtctr r9; \
+ ld r9,_XER(r1); \
+ mtxer r9; \
+ ld r9,_LINK(r1); \
+ mtlr r9; \
+ REST_GPR(0, r1); \
+ REST_8GPRS(2, r1); \
+ REST_GPR(10, r1); \
+ ld r11,_CCR(r1); \
+ mtcr r11; \
+ /* Decrement paca->in_mce. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ REST_GPR(11, r1); \
+ REST_2GPRS(12, r1); \
+ /* restore original r1. */ \
+ ld r1,GPR1(r1)
+
+ /*
+ * Handle machine check early in real mode. We come here with
+ * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
+ */
+ .align 7
+ .globl machine_check_handle_early
+machine_check_handle_early:
+BEGIN_FTR_SECTION
+ std r0,GPR0(r1) /* Save r0 */
+ EXCEPTION_PROLOG_COMMON_3(0x200)
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .machine_check_early
+ /* Deliver the machine check to host kernel in V mode. */
+ MACHINE_CHECK_HANDLER_WINDUP
+ b machine_check_pSeries
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e435bc0..e8d6bf1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -284,6 +284,18 @@ void system_reset_exception(struct pt_regs *regs)
/* What should we do here? We could issue a shutdown or hard reset. */
}
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+ /* TODO: handle/decode machine check reason */
+ return 0;
+}
+
#endif
/*
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 04/12] powerpc/book3s: Return from interrupt if coming from evil context.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (2 preceding siblings ...)
2013-09-16 12:41 ` [RFC PATCH v4 03/12] powerpc/book3s: handle machine check in Linux host Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 05/12] powerpc/book3s: Introduce a early machine check hook in cpu_spec Mahesh J Salgaonkar
` (7 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
We can get machine checks from any context. We need to make sure that
we handle all of them correctly. If we are coming from hypervisor user-space,
we can continue in host kernel in virtual mode to deliver the MC event.
If we got woken up from power-saving mode then we may come in with one of
the following state:
a. No state loss
b. Supervisor state loss
c. Hypervisor state loss
For (a) and (b), we go back to nap again. State (c) is fatal, keep spinning.
For all other context which we not sure of queue up the MCE event and return
from the interrupt.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
arch/powerpc/kernel/exceptions-64s.S | 82 ++++++++++++++++++++++++++++++++++
arch/powerpc/kernel/idle_power7.S | 1
2 files changed, 83 insertions(+)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 87a70d2..0a92ba4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -155,6 +155,24 @@ machine_check_pSeries_1:
*/
HMT_MEDIUM_PPR_DISCARD
SET_SCRATCH0(r13) /* save r13 */
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ beq 9f
+
+ /* waking up from powersave (nap) state */
+ cmpwi cr1,r13,2
+ /* Total loss of HV state is fatal. let's just stay stuck here */
+ bgt cr1,.
+9:
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif /* CONFIG_PPC_P7_NAP */
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_pSeries_early
@@ -787,6 +805,70 @@ BEGIN_FTR_SECTION
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl .machine_check_early
+ ld r12,_MSR(r1)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss or
+ * supervisor state loss
+ *
+ * Go back to nap again if (b) is true.
+ */
+ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
+ beq 4f /* No, it wasn;t */
+ /* Thread was in power saving mode. Go back to nap again. */
+ cmpwi r11,2
+ bne 3f
+ /* Supervisor state loss */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+3: MACHINE_CHECK_HANDLER_WINDUP
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ b .power7_enter_nap_mode
+4:
+#endif
+ /*
+ * Check if we are coming from hypervisor userspace. If yes then we
+ * continue in host kernel in V mode to deliver the MC event.
+ */
+ rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
+ beq 5f
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne 9f /* continue in V mode if we are. */
+
+5:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /*
+ * We are coming from kernel context. Check if we are coming from
+ * guest. if yes, then we can continue. We will fall through
+ * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne 9f /* continue if we are. */
+#endif
+ /*
+ * At this point we are not sure about what context we come from.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ bne 2f
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+2:
+ /*
+ * Return from MC interrupt.
+ * TODO: Queue up the MCE event so that we can log it later.
+ */
+ MACHINE_CHECK_HANDLER_WINDUP
+ rfid
+9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
b machine_check_pSeries
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index e11863f..c4b384d 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -84,6 +84,7 @@ _GLOBAL(power7_nap)
std r9,_MSR(r1)
std r1,PACAR1(r13)
+_GLOBAL(power7_enter_nap_mode)
#ifdef CONFIG_KVM_BOOK3S_64_HV
/* Tell KVM we're napping */
li r4,KVM_HWTHREAD_IN_NAP
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 05/12] powerpc/book3s: Introduce a early machine check hook in cpu_spec.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (3 preceding siblings ...)
2013-09-16 12:41 ` [RFC PATCH v4 04/12] powerpc/book3s: Return from interrupt if coming from evil context Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 06/12] powerpc/book3s: Add flush_tlb operation " Mahesh J Salgaonkar
` (6 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
This patch adds the early machine check function pointer in cputable for
CPU specific early machine check handling. The early machine handle routine
will be called in real mode to handle SLB and TLB errors. We can not reuse
the existing machine_check hook because it is always invoked in kernel
virtual mode and we would already be in trouble if we get SLB or TLB errors.
This patch just sets up a mechanism to invoke CPU specific handler. The
subsequent patches will populate the function pointer.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/cputable.h | 7 +++++++
arch/powerpc/kernel/traps.c | 7 +++++--
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 6f3887d..d8c098e 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -90,6 +90,13 @@ struct cpu_spec {
* if the error is fatal, 1 if it was fully recovered and 0 to
* pass up (not CPU originated) */
int (*machine_check)(struct pt_regs *regs);
+
+ /*
+ * Processor specific early machine check handler which is
+ * called in real mode to handle SLB and TLB errors.
+ */
+ long (*machine_check_early)(struct pt_regs *regs);
+
};
extern struct cpu_spec *cur_cpu_spec;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e8d6bf1..8b0a946 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -292,8 +292,11 @@ void system_reset_exception(struct pt_regs *regs)
*/
long machine_check_early(struct pt_regs *regs)
{
- /* TODO: handle/decode machine check reason */
- return 0;
+ long handled = 0;
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+ return handled;
}
#endif
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 06/12] powerpc/book3s: Add flush_tlb operation in cpu_spec.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (4 preceding siblings ...)
2013-09-16 12:41 ` [RFC PATCH v4 05/12] powerpc/book3s: Introduce a early machine check hook in cpu_spec Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 07/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power7 Mahesh J Salgaonkar
` (5 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
This patch introduces flush_tlb operation in cpu_spec structure. This will
help us to invoke appropriate CPU-side flush tlb routine. This patch
adds the foundation to invoke CPU specific flush routine for respective
architectures. Currently this patch introduce flush_tlb for p7 and p8.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/cputable.h | 5 ++++
arch/powerpc/kernel/cpu_setup_power.S | 38 +++++++++++++++++++++++----------
arch/powerpc/kernel/cputable.c | 8 +++++++
arch/powerpc/kvm/book3s_hv_ras.c | 18 +++-------------
4 files changed, 44 insertions(+), 25 deletions(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index d8c098e..d76e47b 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -97,6 +97,11 @@ struct cpu_spec {
*/
long (*machine_check_early)(struct pt_regs *regs);
+ /*
+ * Processor specific routine to flush tlbs.
+ */
+ void (*flush_tlb)(unsigned long inval_selector);
+
};
extern struct cpu_spec *cur_cpu_spec;
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 18b5b9c..37d1bb0 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
bl __init_LPCR
- bl __init_TLB
+ bl __init_tlb_power7
mtlr r11
blr
@@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
bl __init_LPCR
- bl __init_TLB
+ bl __init_tlb_power7
mtlr r11
blr
@@ -59,7 +59,7 @@ _GLOBAL(__setup_cpu_power8)
oris r3, r3, LPCR_AIL_3@h
bl __init_LPCR
bl __init_HFSCR
- bl __init_TLB
+ bl __init_tlb_power8
bl __init_PMU_HV
mtlr r11
blr
@@ -78,7 +78,7 @@ _GLOBAL(__restore_cpu_power8)
oris r3, r3, LPCR_AIL_3@h
bl __init_LPCR
bl __init_HFSCR
- bl __init_TLB
+ bl __init_tlb_power8
bl __init_PMU_HV
mtlr r11
blr
@@ -134,15 +134,31 @@ __init_HFSCR:
mtspr SPRN_HFSCR,r3
blr
-__init_TLB:
- /*
- * Clear the TLB using the "IS 3" form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs, P8 has 512
- * so we just always do 512
- */
+/*
+ * Clear the TLB using the specified IS form of tlbiel instruction
+ * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
+ *
+ * r3 = IS field
+ */
+__init_tlb_power7:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power7)
+ li r6,128
+ mtctr r6
+ mr r7,r3 /* IS field */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_tlb_power8:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power8)
li r6,512
mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
+ mr r7,r3 /* IS field */
ptesync
2: tlbiel r7
addi r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 22973a7..cdbe115 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,6 +71,8 @@ extern void __restore_cpu_power7(void);
extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __restore_cpu_a2(void);
+extern void __flush_tlb_power7(unsigned long inval_selector);
+extern void __flush_tlb_power8(unsigned long inval_selector);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -440,6 +442,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
.platform = "power7",
},
{ /* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -456,6 +459,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
.platform = "power8",
},
{ /* Power7 */
@@ -474,6 +478,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
.platform = "power7",
},
{ /* Power7+ */
@@ -492,6 +497,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
.platform = "power7+",
},
{ /* Power8E */
@@ -510,6 +516,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
.platform = "power8",
},
{ /* Power8 */
@@ -528,6 +535,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
.platform = "power8",
},
{ /* Cell Broadband Engine */
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index a353c48..5c427b4 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -58,18 +58,6 @@ static void reload_slb(struct kvm_vcpu *vcpu)
}
}
-/* POWER7 TLB flush */
-static void flush_tlb_power7(struct kvm_vcpu *vcpu)
-{
- unsigned long i, rb;
-
- rb = TLBIEL_INVAL_SET_LPID;
- for (i = 0; i < POWER7_TLB_SETS; ++i) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
-}
-
/*
* On POWER7, see if we can handle a machine check that occurred inside
* the guest in real mode, without switching to the host partition.
@@ -96,7 +84,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
}
if (dsisr & DSISR_MC_TLB_MULTI) {
- flush_tlb_power7(vcpu);
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
dsisr &= ~DSISR_MC_TLB_MULTI;
}
/* Any other errors we don't understand? */
@@ -113,7 +102,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
reload_slb(vcpu);
break;
case SRR1_MC_IFETCH_TLBMULTI:
- flush_tlb_power7(vcpu);
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
break;
default:
handled = 0;
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 07/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power7.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (5 preceding siblings ...)
2013-09-16 12:41 ` [RFC PATCH v4 06/12] powerpc/book3s: Add flush_tlb operation " Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:41 ` [RFC PATCH v4 08/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8 Mahesh J Salgaonkar
` (4 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
If we get a machine check exception due to SLB or TLB errors, then flush
SLBs/TLBs and reload SLBs to recover. We do this in real mode before turning
on MMU. Otherwise we would run into nested machine checks.
If we get a machine check when we are in guest, then just flush the
SLBs and continue. This patch handles errors for power7. The next
patch will handle errors for power8
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/bitops.h | 5 +
arch/powerpc/include/asm/mce.h | 67 +++++++++++++++++
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/cputable.c | 4 +
arch/powerpc/kernel/mce_power.c | 150 +++++++++++++++++++++++++++++++++++++
5 files changed, 227 insertions(+)
create mode 100644 arch/powerpc/include/asm/mce.h
create mode 100644 arch/powerpc/kernel/mce_power.c
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 910194e..a5e9a7d 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -46,6 +46,11 @@
#include <asm/asm-compat.h>
#include <asm/synch.h>
+/* PPC bit number conversion */
+#define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be))
+#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit))
+#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
/*
* clear_bit doesn't imply a memory barrier
*/
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
new file mode 100644
index 0000000..8157d4e
--- /dev/null
+++ b/arch/powerpc/include/asm/mce.h
@@ -0,0 +1,67 @@
+/*
+ * Machine check exception header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_MCE_H__
+#define __ASM_PPC64_MCE_H__
+
+#include <linux/bitops.h>
+
+/*
+ * Machine Check bits on power7 and power8
+ */
+#define P7_SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) /* P8 too */
+
+/* SRR1 bits for machine check (On Power7 and Power8) */
+#define P7_SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43, 45)) /* P8 too */
+
+#define P7_SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45))
+#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45))
+
+/* SRR1 bits for machine check (On Power8) */
+#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT (0x4 << PPC_BITLSHIFT(45))
+
+/* DSISR bits for machine check (On Power7 and Power8) */
+#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */
+#define P7_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) /* P8 too */
+#define P7_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) /* P8 too */
+#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) /* P8 too */
+#define P7_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) /* P8 too */
+#define P7_DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) /* P8 too */
+#define P7_DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) /* P8 too */
+
+/*
+ * DSISR bits for machine check (Power8) in addition to above.
+ * Secondary DERAT Multihit
+ */
+#define P8_DSISR_MC_ERAT_MULTIHIT_SEC (PPC_BIT(54))
+
+/* SLB error bits */
+#define P7_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_ERAT_MULTIHIT | \
+ P7_DSISR_MC_SLB_PARITY_MFSLB | \
+ P7_DSISR_MC_SLB_MULTIHIT | \
+ P7_DSISR_MC_SLB_MULTIHIT_PARITY)
+
+#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a8619bf..a1aba53 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index cdbe115..c28cc2c 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -73,6 +73,7 @@ extern void __restore_cpu_power8(void);
extern void __restore_cpu_a2(void);
extern void __flush_tlb_power7(unsigned long inval_selector);
extern void __flush_tlb_power8(unsigned long inval_selector);
+extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -443,6 +444,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
.flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
{ /* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -479,6 +481,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
.flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
{ /* Power7+ */
@@ -498,6 +501,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
.flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7+",
},
{ /* Power8E */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 0000000..6905473
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,150 @@
+/*
+ * Machine check exception handling CPU-side for power7 and power8
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce_power: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/mce.h>
+
+/* flush SLBs and reload */
+static void flush_and_reload_slb(void)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* Invalidate all SLBs */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+ /*
+ * If machine check is hit when in guest or in transition, we will
+ * only flush the SLBs and continue.
+ */
+ if (get_paca()->kvm_hstate.in_guest)
+ return;
+#endif
+
+ /* For host kernel, reload the SLBs from shadow SLB buffer. */
+ slb = get_slb_shadow();
+ if (!slb)
+ return;
+
+ n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+
+ /* Load up the SLB entries from shadow SLB */
+ for (i = 0; i < n; i++) {
+ unsigned long rb = slb->save_area[i].esid;
+ unsigned long rs = slb->save_area[i].vsid;
+
+ rb = (rb & ~0xFFFul) | i;
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+{
+ long handled = 1;
+
+ /*
+ * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
+ * reset the error bits whenever we handle them so that at the end
+ * we can check whether we handled all of them or not.
+ * */
+ if (dsisr & slb_error_bits) {
+ flush_and_reload_slb();
+ /* reset error bits */
+ dsisr &= ~(slb_error_bits);
+ }
+ if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ /* reset error bits */
+ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+
+ return handled;
+}
+
+static long mce_handle_derror_p7(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+}
+
+static long mce_handle_common_ierror(uint64_t srr1)
+{
+ long handled = 0;
+
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case 0:
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ /* flush and reload SLBs for SLB errors. */
+ flush_and_reload_slb();
+ handled = 1;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ handled = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return handled;
+}
+
+static long mce_handle_ierror_p7(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
+{
+ uint64_t srr1;
+ long handled = 1;
+
+ srr1 = regs->msr;
+
+ if (P7_SRR1_MC_LOADSTORE(srr1))
+ handled = mce_handle_derror_p7(regs->dsisr);
+ else
+ handled = mce_handle_ierror_p7(srr1);
+
+ /* TODO: Decode machine check reason. */
+ return handled;
+}
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 08/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (6 preceding siblings ...)
2013-09-16 12:41 ` [RFC PATCH v4 07/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power7 Mahesh J Salgaonkar
@ 2013-09-16 12:41 ` Mahesh J Salgaonkar
2013-09-16 12:42 ` [RFC PATCH v4 09/12] powerpc/book3s: Decode and save machine check event Mahesh J Salgaonkar
` (3 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:41 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
This patch handles the memory errors on power8. If we get a machine check
exception due to SLB or TLB errors, then flush SLBs/TLBs and reload SLBs to
recover.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/mce.h | 3 +++
arch/powerpc/kernel/cputable.c | 4 ++++
arch/powerpc/kernel/mce_power.c | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 41 insertions(+)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 8157d4e..e3ffa82 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -64,4 +64,7 @@
P7_DSISR_MC_SLB_MULTIHIT | \
P7_DSISR_MC_SLB_MULTIHIT_PARITY)
+#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
+ P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c28cc2c..0195358 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -74,6 +74,7 @@ extern void __restore_cpu_a2(void);
extern void __flush_tlb_power7(unsigned long inval_selector);
extern void __flush_tlb_power8(unsigned long inval_selector);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -462,6 +463,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power7 */
@@ -521,6 +523,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power8 */
@@ -540,6 +543,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 6905473..60a217f 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -148,3 +148,37 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
/* TODO: Decode machine check reason. */
return handled;
}
+
+static long mce_handle_ierror_p8(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static long mce_handle_derror_p8(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
+}
+
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
+{
+ uint64_t srr1;
+ long handled = 1;
+
+ srr1 = regs->msr;
+
+ if (P7_SRR1_MC_LOADSTORE(srr1))
+ handled = mce_handle_derror_p8(regs->dsisr);
+ else
+ handled = mce_handle_ierror_p8(srr1);
+
+ /* TODO: Decode machine check reason. */
+ return handled;
+}
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 09/12] powerpc/book3s: Decode and save machine check event.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (7 preceding siblings ...)
2013-09-16 12:41 ` [RFC PATCH v4 08/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8 Mahesh J Salgaonkar
@ 2013-09-16 12:42 ` Mahesh J Salgaonkar
2013-09-16 12:42 ` [RFC PATCH v4 10/12] powerpc/book3s: Queue up and process delayed MCE events Mahesh J Salgaonkar
` (2 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:42 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Now that we handle machine check in linux, the MCE decoding should also
take place in linux host. This info is crucial to log before we go down
in case we can not handle the machine check errors. This patch decodes
and populates a machine check event which contain high level meaning full
MCE information.
We do this in real mode C code with ME bit on. The MCE information is still
available on emergency stack (in pt_regs structure format). Even if we take
another exception at this point the MCE early handler will allocate a new
stack frame on top of current one. So when we return back here we still have
our MCE information safe on current stack.
We use per cpu buffer to save high level MCE information. Each per cpu buffer
is an array of machine check event structure indexed by per cpu counter
mce_nest_count. The mce_nest_count is incremented every time we enter
machine check early handler in real mode to get the current free slot
(index = mce_nest_count - 1). The mce_nest_count is decremented once the
MCE info is consumed by virtual mode machine exception handler.
This patch provides save_mce_event(), get_mce_event() and release_mce_event()
generic routines that can be used by machine check handlers to populate and
retrieve the event. The routine release_mce_event() will free the event slot so
that it can be reused. Caller can invoke get_mce_event() with a release flag
either to release the event slot immediately OR keep it so that it can be
fetched again. The event slot can be also released anytime by invoking
release_mce_event().
This patch also updates kvm code to invoke get_mce_event to retrieve generic
mce event rather than paca->opal_mce_evt.
The KVM code always calls get_mce_event() with release flags set to false so
that event is available for linus host machine
If machine check occurs while we are in guest, KVM tries to handle the error.
If KVM is able to handle MC error successfully, it enters the guest and
delivers the machine check to guest. If KVM is not able to handle MC error, it
exists the guest and passes the control to linux host machine check handler
which then logs MC event and decides how to handle it in linux host. In failure
case, KVM needs to make sure that the MC event is available for linux host to
consume. Hence KVM always calls get_mce_event() with release flags set to false
and later it invokes release_mce_event() only if it succeeds to handle error.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/mce.h | 124 +++++++++++++++++++++++++
arch/powerpc/kernel/Makefile | 2
arch/powerpc/kernel/mce.c | 164 +++++++++++++++++++++++++++++++++
arch/powerpc/kernel/mce_power.c | 116 ++++++++++++++++++++++-
arch/powerpc/kvm/book3s_hv_ras.c | 32 ++++--
arch/powerpc/platforms/powernv/opal.c | 35 +++----
6 files changed, 434 insertions(+), 39 deletions(-)
create mode 100644 arch/powerpc/kernel/mce.c
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index e3ffa82..87cad2a 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,5 +66,129 @@
#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+enum MCE_Version {
+ MCE_V1 = 1,
+};
+
+enum MCE_Severity {
+ MCE_SEV_NO_ERROR = 0,
+ MCE_SEV_WARNING = 1,
+ MCE_SEV_ERROR_SYNC = 2,
+ MCE_SEV_FATAL = 3,
+};
+
+enum MCE_Disposition {
+ MCE_DISPOSITION_RECOVERED = 0,
+ MCE_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum MCE_Initiator {
+ MCE_INITIATOR_UNKNOWN = 0,
+ MCE_INITIATOR_CPU = 1,
+};
+
+enum MCE_ErrorType {
+ MCE_ERROR_TYPE_UNKNOWN = 0,
+ MCE_ERROR_TYPE_UE = 1,
+ MCE_ERROR_TYPE_SLB = 2,
+ MCE_ERROR_TYPE_ERAT = 3,
+ MCE_ERROR_TYPE_TLB = 4,
+};
+
+enum MCE_UeErrorType {
+ MCE_UE_ERROR_INDETERMINATE = 0,
+ MCE_UE_ERROR_IFETCH = 1,
+ MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+ MCE_UE_ERROR_LOAD_STORE = 3,
+ MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
+};
+
+enum MCE_SlbErrorType {
+ MCE_SLB_ERROR_INDETERMINATE = 0,
+ MCE_SLB_ERROR_PARITY = 1,
+ MCE_SLB_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_EratErrorType {
+ MCE_ERAT_ERROR_INDETERMINATE = 0,
+ MCE_ERAT_ERROR_PARITY = 1,
+ MCE_ERAT_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_TlbErrorType {
+ MCE_TLB_ERROR_INDETERMINATE = 0,
+ MCE_TLB_ERROR_PARITY = 1,
+ MCE_TLB_ERROR_MULTIHIT = 2,
+};
+
+struct machine_check_event {
+ enum MCE_Version version:8; /* 0x00 */
+ uint8_t in_use; /* 0x01 */
+ enum MCE_Severity severity:8; /* 0x02 */
+ enum MCE_Initiator initiator:8; /* 0x03 */
+ enum MCE_ErrorType error_type:8; /* 0x04 */
+ enum MCE_Disposition disposition:8; /* 0x05 */
+ uint8_t reserved_1[2]; /* 0x06 */
+ uint64_t gpr3; /* 0x08 */
+ uint64_t srr0; /* 0x10 */
+ uint64_t srr1; /* 0x18 */
+ union { /* 0x20 */
+ struct {
+ enum MCE_UeErrorType ue_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t physical_address_provided;
+ uint8_t reserved_1[5];
+ uint64_t effective_address;
+ uint64_t physical_address;
+ uint8_t reserved_2[8];
+ } ue_error;
+
+ struct {
+ enum MCE_SlbErrorType slb_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } slb_error;
+
+ struct {
+ enum MCE_EratErrorType erat_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } erat_error;
+
+ struct {
+ enum MCE_TlbErrorType tlb_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } tlb_error;
+ } u;
+};
+
+struct mce_error_info {
+ enum MCE_ErrorType error_type:8;
+ union {
+ enum MCE_UeErrorType ue_error_type:8;
+ enum MCE_SlbErrorType slb_error_type:8;
+ enum MCE_EratErrorType erat_error_type:8;
+ enum MCE_TlbErrorType tlb_error_type:8;
+ } u;
+ uint8_t reserved[2];
+};
+
+#define MAX_MC_EVT 100
+
+/* Release flags for get_mce_event() */
+#define MCE_EVENT_RELEASE true
+#define MCE_EVENT_DONTRELEASE false
+
+extern void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err, uint64_t addr);
+extern int get_mce_event(struct machine_check_event *mce, bool release);
+extern void release_mce_event(void);
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a1aba53..c3ae108 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 0000000..aeecdf1
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,164 @@
+/*
+ * Machine check exception handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <asm/mce.h>
+
+static DEFINE_PER_CPU(int, mce_nest_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+
+static void mce_set_error_info(struct machine_check_event *mce,
+ struct mce_error_info *mce_err)
+{
+ mce->error_type = mce_err->error_type;
+ switch (mce_err->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+ break;
+ case MCE_ERROR_TYPE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err,
+ uint64_t addr)
+{
+ uint64_t srr1;
+ int index = __get_cpu_var(mce_nest_count)++;
+ struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
+
+ /*
+ * Return if we don't have enough space to log mce event.
+ * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+ * the check below will stop buffer overrun.
+ */
+ if (index >= MAX_MC_EVT)
+ return;
+
+ /* Populate generic machine check info */
+ mce->version = MCE_V1;
+ mce->srr0 = regs->nip;
+ mce->srr1 = regs->msr;
+ mce->gpr3 = regs->gpr[3];
+ mce->in_use = 1;
+
+ mce->initiator = MCE_INITIATOR_CPU;
+ if (handled)
+ mce->disposition = MCE_DISPOSITION_RECOVERED;
+ else
+ mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+ mce->severity = MCE_SEV_ERROR_SYNC;
+
+ srr1 = regs->msr;
+
+ /*
+ * Populate the mce error_type and type-specific error_type.
+ */
+ mce_set_error_info(mce, mce_err);
+
+ if (!addr)
+ return;
+
+ if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+ mce->u.tlb_error.effective_address_provided = true;
+ mce->u.tlb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+ mce->u.slb_error.effective_address_provided = true;
+ mce->u.slb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+ mce->u.erat_error.effective_address_provided = true;
+ mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+ mce->u.ue_error.effective_address_provided = true;
+ mce->u.ue_error.effective_address = addr;
+ }
+ return;
+}
+
+/*
+ * get_mce_event:
+ * mce Pointer to machine_check_event structure to be filled.
+ * release Flag to indicate whether to free the event slot or not.
+ * 0 <= do not release the mce event. Caller will invoke
+ * release_mce_event() once event has been consumed.
+ * 1 <= release the slot.
+ *
+ * return 1 = success
+ * 0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+ int index = __get_cpu_var(mce_nest_count) - 1;
+ struct machine_check_event *mc_evt;
+ int ret = 0;
+
+ /* Sanity check */
+ if (index < 0)
+ return ret;
+
+ /* Check if we have MCE info to process. */
+ if (index < MAX_MC_EVT) {
+ mc_evt = &__get_cpu_var(mce_event[index]);
+ /* Copy the event structure and release the original */
+ if (mce)
+ *mce = *mc_evt;
+ if (release)
+ mc_evt->in_use = 0;
+ ret = 1;
+ }
+ /* Decrement the count to free the slot. */
+ if (release)
+ __get_cpu_var(mce_nest_count)--;
+
+ return ret;
+}
+
+void release_mce_event(void)
+{
+ get_mce_event(NULL, true);
+}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 60a217f..b36e777 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -133,22 +133,116 @@ static long mce_handle_ierror_p7(uint64_t srr1)
return handled;
}
+static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_UE:
+ case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ }
+}
+
+static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ if (dsisr & P7_DSISR_MC_UE) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
- uint64_t srr1;
+ uint64_t srr1, addr;
long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
srr1 = regs->msr;
- if (P7_SRR1_MC_LOADSTORE(srr1))
+ /*
+ * Handle memory errors depending whether this was a load/store or
+ * ifetch exception. Also, populate the mce error_type and
+ * type-specific error_type from either SRR1 or DSISR, depending
+ * whether this was a load/store or ifetch exception
+ */
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
handled = mce_handle_derror_p7(regs->dsisr);
- else
+ mce_get_derror_p7(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
handled = mce_handle_ierror_p7(srr1);
+ mce_get_ierror_p7(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
- /* TODO: Decode machine check reason. */
+ save_mce_event(regs, handled, &mce_error_info, addr);
return handled;
}
+static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ mce_get_derror_p7(mce_err, dsisr);
+ if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
static long mce_handle_ierror_p8(uint64_t srr1)
{
long handled = 0;
@@ -169,16 +263,22 @@ static long mce_handle_derror_p8(uint64_t dsisr)
long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- uint64_t srr1;
+ uint64_t srr1, addr;
long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
srr1 = regs->msr;
- if (P7_SRR1_MC_LOADSTORE(srr1))
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
handled = mce_handle_derror_p8(regs->dsisr);
- else
+ mce_get_derror_p8(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
handled = mce_handle_ierror_p8(srr1);
+ mce_get_ierror_p8(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
- /* TODO: Decode machine check reason. */
+ save_mce_event(regs, handled, &mce_error_info, addr);
return handled;
}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 5c427b4..768a9f9 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/kernel.h>
#include <asm/opal.h>
+#include <asm/mce.h>
/* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42))
@@ -67,9 +68,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
{
unsigned long srr1 = vcpu->arch.shregs.msr;
-#ifdef CONFIG_PPC_POWERNV
- struct opal_machine_check_event *opal_evt;
-#endif
+ struct machine_check_event mce_evt;
long handled = 1;
if (srr1 & SRR1_MC_LDSTERR) {
@@ -109,22 +108,31 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
handled = 0;
}
-#ifdef CONFIG_PPC_POWERNV
/*
- * See if OPAL has already handled the condition.
- * We assume that if the condition is recovered then OPAL
+ * See if we have already handled the condition in the linux host.
+ * We assume that if the condition is recovered then linux host
* will have generated an error log event that we will pick
* up and log later.
+ * Don't release mce event now. In case if condition is not
+ * recovered we do guest exit and go back to linux host machine
+ * check handler. Hence we need make sure that current mce event
+ * is available for linux host to consume.
*/
- opal_evt = local_paca->opal_mc_evt;
- if (opal_evt->version == OpalMCE_V1 &&
- (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
- opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
+ goto out;
+
+ if (mce_evt.version == MCE_V1 &&
+ (mce_evt.severity == MCE_SEV_NO_ERROR ||
+ mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
handled = 1;
+out:
+ /*
+ * If we have handled the error, then release the mce event because
+ * we will be delivering machine check to guest.
+ */
if (handled)
- opal_evt->in_use = 0;
-#endif
+ release_mce_event();
return handled;
}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 106301f..bcbbcdc 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <asm/opal.h>
#include <asm/firmware.h>
+#include <asm/mce.h>
#include "powernv.h"
@@ -245,8 +246,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
int opal_machine_check(struct pt_regs *regs)
{
- struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt;
- struct opal_machine_check_event evt;
+ struct machine_check_event evt;
const char *level, *sevstr, *subtype;
static const char *opal_mc_ue_types[] = {
"Indeterminate",
@@ -271,30 +271,29 @@ int opal_machine_check(struct pt_regs *regs)
"Multihit",
};
- /* Copy the event structure and release the original */
- evt = *opal_evt;
- opal_evt->in_use = 0;
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
/* Print things out */
- if (evt.version != OpalMCE_V1) {
+ if (evt.version != MCE_V1) {
pr_err("Machine Check Exception, Unknown event version %d !\n",
evt.version);
return 0;
}
switch(evt.severity) {
- case OpalMCE_SEV_NO_ERROR:
+ case MCE_SEV_NO_ERROR:
level = KERN_INFO;
sevstr = "Harmless";
break;
- case OpalMCE_SEV_WARNING:
+ case MCE_SEV_WARNING:
level = KERN_WARNING;
sevstr = "";
break;
- case OpalMCE_SEV_ERROR_SYNC:
+ case MCE_SEV_ERROR_SYNC:
level = KERN_ERR;
sevstr = "Severe";
break;
- case OpalMCE_SEV_FATAL:
+ case MCE_SEV_FATAL:
default:
level = KERN_ERR;
sevstr = "Fatal";
@@ -302,12 +301,12 @@ int opal_machine_check(struct pt_regs *regs)
}
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt.disposition == OpalMCE_DISPOSITION_RECOVERED ?
+ evt.disposition == MCE_DISPOSITION_RECOVERED ?
"Recovered" : "[Not recovered");
printk("%s Initiator: %s\n", level,
- evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown");
+ evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch(evt.error_type) {
- case OpalMCE_ERROR_TYPE_UE:
+ case MCE_ERROR_TYPE_UE:
subtype = evt.u.ue_error.ue_error_type <
ARRAY_SIZE(opal_mc_ue_types) ?
opal_mc_ue_types[evt.u.ue_error.ue_error_type]
@@ -320,7 +319,7 @@ int opal_machine_check(struct pt_regs *regs)
printk("%s Physial address: %016llx\n",
level, evt.u.ue_error.physical_address);
break;
- case OpalMCE_ERROR_TYPE_SLB:
+ case MCE_ERROR_TYPE_SLB:
subtype = evt.u.slb_error.slb_error_type <
ARRAY_SIZE(opal_mc_slb_types) ?
opal_mc_slb_types[evt.u.slb_error.slb_error_type]
@@ -330,7 +329,7 @@ int opal_machine_check(struct pt_regs *regs)
printk("%s Effective address: %016llx\n",
level, evt.u.slb_error.effective_address);
break;
- case OpalMCE_ERROR_TYPE_ERAT:
+ case MCE_ERROR_TYPE_ERAT:
subtype = evt.u.erat_error.erat_error_type <
ARRAY_SIZE(opal_mc_erat_types) ?
opal_mc_erat_types[evt.u.erat_error.erat_error_type]
@@ -340,7 +339,7 @@ int opal_machine_check(struct pt_regs *regs)
printk("%s Effective address: %016llx\n",
level, evt.u.erat_error.effective_address);
break;
- case OpalMCE_ERROR_TYPE_TLB:
+ case MCE_ERROR_TYPE_TLB:
subtype = evt.u.tlb_error.tlb_error_type <
ARRAY_SIZE(opal_mc_tlb_types) ?
opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
@@ -351,11 +350,11 @@ int opal_machine_check(struct pt_regs *regs)
level, evt.u.tlb_error.effective_address);
break;
default:
- case OpalMCE_ERROR_TYPE_UNKNOWN:
+ case MCE_ERROR_TYPE_UNKNOWN:
printk("%s Error type: Unknown\n", level);
break;
}
- return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1;
+ return evt.severity == MCE_SEV_FATAL ? 0 : 1;
}
static irqreturn_t opal_interrupt(int irq, void *data)
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 10/12] powerpc/book3s: Queue up and process delayed MCE events.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (8 preceding siblings ...)
2013-09-16 12:42 ` [RFC PATCH v4 09/12] powerpc/book3s: Decode and save machine check event Mahesh J Salgaonkar
@ 2013-09-16 12:42 ` Mahesh J Salgaonkar
2013-09-16 12:42 ` [RFC PATCH v4 11/12] powerpc/powernv: Remove machine check handling in OPAL Mahesh J Salgaonkar
2013-09-16 12:42 ` [RFC PATCH v4 12/12] powerpc/powernv: Machine check exception handling Mahesh J Salgaonkar
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:42 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
When machine check real mode handler can not continue into host kernel
in V mode, it returns from the interrupt and we loose MCE event which
never gets logged. In such a situation queue up the MCE event so that
we can log it later when we get back into host kernel with r1 pointing to
kernel stack e.g. during syscall exit.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/mce.h | 3 +
arch/powerpc/kernel/entry_64.S | 5 +
arch/powerpc/kernel/exceptions-64s.S | 7 +-
arch/powerpc/kernel/mce.c | 154 +++++++++++++++++++++++++++++++++
arch/powerpc/platforms/powernv/opal.c | 97 ---------------------
5 files changed, 168 insertions(+), 98 deletions(-)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 87cad2a..3276b40 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -190,5 +190,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t addr);
extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
+extern void machine_check_queue_event(void);
+extern void machine_check_process_queued_event(void);
+extern void machine_check_print_event_info(struct machine_check_event *evt);
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2bd0b88..71bcd41 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -183,6 +183,11 @@ syscall_exit:
bl .do_show_syscall_exit
ld r3,RESULT(r1)
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ bl .machine_check_process_queued_event
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
CURRENT_THREAD_INFO(r12, r1)
ld r8,_MSR(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0a92ba4..ce57cec 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -824,7 +824,8 @@ BEGIN_FTR_SECTION
/* Supervisor state loss */
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
-3: MACHINE_CHECK_HANDLER_WINDUP
+3: bl .machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
GET_PACA(r13)
ld r1,PACAR1(r13)
b .power7_enter_nap_mode
@@ -864,8 +865,10 @@ BEGIN_FTR_SECTION
2:
/*
* Return from MC interrupt.
- * TODO: Queue up the MCE event so that we can log it later.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
*/
+ bl .machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
rfid
9:
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index aeecdf1..1cca4b6 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -31,6 +31,10 @@
static DEFINE_PER_CPU(int, mce_nest_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+/* Queue for delayed MCE events. */
+static DEFINE_PER_CPU(int, mce_queue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
{
@@ -162,3 +166,153 @@ void release_mce_event(void)
{
get_mce_event(NULL, true);
}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+ int index;
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return;
+
+ index = __get_cpu_var(mce_queue_count)++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __get_cpu_var(mce_queue_count)--;
+ return;
+ }
+ __get_cpu_var(mce_event_queue[index]) = evt;
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+void machine_check_process_queued_event(void)
+{
+ int index;
+
+ preempt_disable();
+ /*
+ * For now just print it to console.
+ * TODO: log this error event to FSP or nvram.
+ */
+ while (__get_cpu_var(mce_queue_count) > 0) {
+ index = __get_cpu_var(mce_queue_count) - 1;
+ machine_check_print_event_info(
+ &__get_cpu_var(mce_event_queue[index]));
+ __get_cpu_var(mce_queue_count)--;
+ }
+ preempt_enable();
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt)
+{
+ const char *level, *sevstr, *subtype;
+ static const char *mc_ue_types[] = {
+ "Indeterminate",
+ "Instruction fetch",
+ "Page table walk ifetch",
+ "Load/Store",
+ "Page table walk Load/Store",
+ };
+ static const char *mc_slb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_erat_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_tlb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+
+ /* Print things out */
+ if (evt->version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt->version);
+ return;
+ }
+ switch(evt->severity) {
+ case MCE_SEV_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case MCE_SEV_WARNING:
+ level = KERN_WARNING;
+ sevstr = "";
+ break;
+ case MCE_SEV_ERROR_SYNC:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case MCE_SEV_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+ printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "[Not recovered");
+ printk("%s Initiator: %s\n", level,
+ evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
+ switch(evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ subtype = evt->u.ue_error.ue_error_type <
+ ARRAY_SIZE(mc_ue_types) ?
+ mc_ue_types[evt->u.ue_error.ue_error_type]
+ : "Unknown";
+ printk("%s Error type: UE [%s]\n", level, subtype);
+ if (evt->u.ue_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.ue_error.effective_address);
+ if (evt->u.ue_error.physical_address_provided)
+ printk("%s Physial address: %016llx\n",
+ level, evt->u.ue_error.physical_address);
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ subtype = evt->u.slb_error.slb_error_type <
+ ARRAY_SIZE(mc_slb_types) ?
+ mc_slb_types[evt->u.slb_error.slb_error_type]
+ : "Unknown";
+ printk("%s Error type: SLB [%s]\n", level, subtype);
+ if (evt->u.slb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.slb_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ subtype = evt->u.erat_error.erat_error_type <
+ ARRAY_SIZE(mc_erat_types) ?
+ mc_erat_types[evt->u.erat_error.erat_error_type]
+ : "Unknown";
+ printk("%s Error type: ERAT [%s]\n", level, subtype);
+ if (evt->u.erat_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.erat_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ subtype = evt->u.tlb_error.tlb_error_type <
+ ARRAY_SIZE(mc_tlb_types) ?
+ mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+ : "Unknown";
+ printk("%s Error type: TLB [%s]\n", level, subtype);
+ if (evt->u.tlb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.tlb_error.effective_address);
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ printk("%s Error type: Unknown\n", level);
+ break;
+ }
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index bcbbcdc..f789514 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -247,29 +247,6 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
- const char *level, *sevstr, *subtype;
- static const char *opal_mc_ue_types[] = {
- "Indeterminate",
- "Instruction fetch",
- "Page table walk ifetch",
- "Load/Store",
- "Page table walk Load/Store",
- };
- static const char *opal_mc_slb_types[] = {
- "Indeterminate",
- "Parity",
- "Multihit",
- };
- static const char *opal_mc_erat_types[] = {
- "Indeterminate",
- "Parity",
- "Multihit",
- };
- static const char *opal_mc_tlb_types[] = {
- "Indeterminate",
- "Parity",
- "Multihit",
- };
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return 0;
@@ -280,80 +257,8 @@ int opal_machine_check(struct pt_regs *regs)
evt.version);
return 0;
}
- switch(evt.severity) {
- case MCE_SEV_NO_ERROR:
- level = KERN_INFO;
- sevstr = "Harmless";
- break;
- case MCE_SEV_WARNING:
- level = KERN_WARNING;
- sevstr = "";
- break;
- case MCE_SEV_ERROR_SYNC:
- level = KERN_ERR;
- sevstr = "Severe";
- break;
- case MCE_SEV_FATAL:
- default:
- level = KERN_ERR;
- sevstr = "Fatal";
- break;
- }
+ machine_check_print_event_info(&evt);
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt.disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "[Not recovered");
- printk("%s Initiator: %s\n", level,
- evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
- switch(evt.error_type) {
- case MCE_ERROR_TYPE_UE:
- subtype = evt.u.ue_error.ue_error_type <
- ARRAY_SIZE(opal_mc_ue_types) ?
- opal_mc_ue_types[evt.u.ue_error.ue_error_type]
- : "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
- if (evt.u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.ue_error.effective_address);
- if (evt.u.ue_error.physical_address_provided)
- printk("%s Physial address: %016llx\n",
- level, evt.u.ue_error.physical_address);
- break;
- case MCE_ERROR_TYPE_SLB:
- subtype = evt.u.slb_error.slb_error_type <
- ARRAY_SIZE(opal_mc_slb_types) ?
- opal_mc_slb_types[evt.u.slb_error.slb_error_type]
- : "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
- if (evt.u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.slb_error.effective_address);
- break;
- case MCE_ERROR_TYPE_ERAT:
- subtype = evt.u.erat_error.erat_error_type <
- ARRAY_SIZE(opal_mc_erat_types) ?
- opal_mc_erat_types[evt.u.erat_error.erat_error_type]
- : "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
- if (evt.u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.erat_error.effective_address);
- break;
- case MCE_ERROR_TYPE_TLB:
- subtype = evt.u.tlb_error.tlb_error_type <
- ARRAY_SIZE(opal_mc_tlb_types) ?
- opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
- : "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
- if (evt.u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.tlb_error.effective_address);
- break;
- default:
- case MCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
- break;
- }
return evt.severity == MCE_SEV_FATAL ? 0 : 1;
}
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 11/12] powerpc/powernv: Remove machine check handling in OPAL.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (9 preceding siblings ...)
2013-09-16 12:42 ` [RFC PATCH v4 10/12] powerpc/book3s: Queue up and process delayed MCE events Mahesh J Salgaonkar
@ 2013-09-16 12:42 ` Mahesh J Salgaonkar
2013-09-16 12:42 ` [RFC PATCH v4 12/12] powerpc/powernv: Machine check exception handling Mahesh J Salgaonkar
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:42 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Now that we are ready to handle machine check directly in linux, do not
register with firmware to handle machine check exception.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
arch/powerpc/platforms/powernv/opal.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f789514..0170d19 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -83,14 +83,10 @@ static int __init opal_register_exception_handlers(void)
if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
return -ENODEV;
- /* Hookup some exception handlers. We use the fwnmi area at 0x7000
- * to provide the glue space to OPAL
+ /* Hookup some exception handlers except machine check. We use the
+ * fwnmi area at 0x7000 to provide the glue space to OPAL
*/
glue = 0x7000;
- opal_register_exception_handler(OPAL_MACHINE_CHECK_HANDLER,
- __pa(opal_mc_secondary_handler[0]),
- glue);
- glue += 128;
opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
0, glue);
glue += 128;
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [RFC PATCH v4 12/12] powerpc/powernv: Machine check exception handling.
2013-09-16 12:40 [RFC PATCH v4 00/12] Machine check handling in linux host Mahesh J Salgaonkar
` (10 preceding siblings ...)
2013-09-16 12:42 ` [RFC PATCH v4 11/12] powerpc/powernv: Remove machine check handling in OPAL Mahesh J Salgaonkar
@ 2013-09-16 12:42 ` Mahesh J Salgaonkar
11 siblings, 0 replies; 13+ messages in thread
From: Mahesh J Salgaonkar @ 2013-09-16 12:42 UTC (permalink / raw)
To: linuxppc-dev, Paul Mackerras, Benjamin Herrenschmidt
Cc: Jeremy Kerr, Anton Blanchard
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Add basic error handling in machine check exception handler.
- If MSR_RI isn't set, we can not recover.
- Check if disposition set to OpalMCE_DISPOSITION_RECOVERED.
- Check if address at fault is inside kernel address space, if not then send
SIGBUS to process if we hit exception when in userspace.
- If address at fault is not provided then and if we get a synchronous machine
check while in userspace then kill the task.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/mce.h | 1 +
arch/powerpc/kernel/mce.c | 27 +++++++++++++++++++++
arch/powerpc/platforms/powernv/opal.c | 43 ++++++++++++++++++++++++++++++++-
3 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 3276b40..a2b8c7b 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -193,5 +193,6 @@ extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_process_queued_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt);
+extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 1cca4b6..3100509 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -316,3 +316,30 @@ void machine_check_print_event_info(struct machine_check_event *evt)
break;
}
}
+
+uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+{
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ if (evt->u.ue_error.effective_address_provided)
+ return evt->u.ue_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ if (evt->u.slb_error.effective_address_provided)
+ return evt->u.slb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (evt->u.erat_error.effective_address_provided)
+ return evt->u.erat_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (evt->u.tlb_error.effective_address_provided)
+ return evt->u.tlb_error.effective_address;
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ break;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 0170d19..2070970 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <asm/opal.h>
#include <asm/firmware.h>
#include <asm/mce.h>
@@ -240,6 +241,44 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
return written;
}
+static int opal_recover_mce(struct pt_regs *regs,
+ struct machine_check_event *evt)
+{
+ int recovered = 0;
+ uint64_t ea = get_mce_fault_addr(evt);
+
+ if (!(regs->msr & MSR_RI)) {
+ /* If MSR_RI isn't set, we cannot recover */
+ recovered = 0;
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+ /* Platform corrected itself */
+ recovered = 1;
+ } else if (ea && !is_kernel_addr(ea)) {
+ /*
+ * Faulting address is not in kernel text. We should be fine.
+ * We need to find which process uses this address.
+ * For now, kill the task if we have received exception when
+ * in userspace.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ */
+ if (user_mode(regs) && !is_global_init(current)) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else
+ recovered = 0;
+ } else if (user_mode(regs) && !is_global_init(current) &&
+ evt->severity == MCE_SEV_ERROR_SYNC) {
+ /*
+ * If we have received a synchronous error when in userspace
+ * kill the task.
+ */
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ }
+ return recovered;
+}
+
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
@@ -255,7 +294,9 @@ int opal_machine_check(struct pt_regs *regs)
}
machine_check_print_event_info(&evt);
- return evt.severity == MCE_SEV_FATAL ? 0 : 1;
+ if (opal_recover_mce(regs, &evt))
+ return 1;
+ return 0;
}
static irqreturn_t opal_interrupt(int irq, void *data)
^ permalink raw reply related [flat|nested] 13+ messages in thread