All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Fixes for SLB to C series
@ 2018-09-28 16:00 Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 1/4] powerpc/64: add struct int_regs to save additional registers on stack Nicholas Piggin
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-09-28 16:00 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Aneesh Kumar K . V, Nicholas Piggin

These are some fixes I've got so far to sovle hangs and multi hits
particularly on P8 with 256MB segments (but can also be reproduced
on P9).

I'm not yet sure these solve all the problems, and they need some
good review and testing. So far they have been solid for me.

Thanks,
Nick

Nicholas Piggin (4):
  powerpc/64: add struct int_regs to save additional registers on stack
  powerpc/64: interrupts save PPR on stack rather than thread_struct
  powerpc/64s/hash: Fix preloading of SLB entries
  powerpc/64s/hash: add more barriers for slb preloading

 arch/powerpc/include/asm/exception-64s.h |  9 ++--
 arch/powerpc/include/asm/processor.h     | 12 +++---
 arch/powerpc/include/asm/ptrace.h        | 18 +++++---
 arch/powerpc/kernel/asm-offsets.c        | 23 ++++++----
 arch/powerpc/kernel/entry_64.S           | 15 +++----
 arch/powerpc/kernel/process.c            | 54 ++++++++++++------------
 arch/powerpc/kernel/ptrace.c             |  4 +-
 arch/powerpc/kernel/stacktrace.c         |  2 +-
 arch/powerpc/mm/slb.c                    | 48 ++++++++++++++++++---
 9 files changed, 116 insertions(+), 69 deletions(-)

-- 
2.18.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] powerpc/64: add struct int_regs to save additional registers on stack
  2018-09-28 16:00 [PATCH 0/4] Fixes for SLB to C series Nicholas Piggin
@ 2018-09-28 16:00 ` Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 2/4] powerpc/64: interrupts save PPR on stack rather than thread_struct Nicholas Piggin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-09-28 16:00 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Aneesh Kumar K . V, Nicholas Piggin

struct pt_regs is part of the user ABI and also the fundametal
structure for saving registers at interrupt.

The generic kernel code makes it difficult to completely decouple
these, but it's easy enough to add additional space required to save
more registers. Create a struct int_stack with struct pt_regs at
offset 0.

This is required for a following fix to save the PPR SPR on stack.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/ptrace.h | 17 +++++++---
 arch/powerpc/kernel/asm-offsets.c | 21 ++++++++-----
 arch/powerpc/kernel/process.c     | 52 ++++++++++++++++---------------
 arch/powerpc/kernel/stacktrace.c  |  2 +-
 4 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 447cbd1bee99..1a98cd8c49f6 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -26,7 +26,6 @@
 #include <uapi/asm/ptrace.h>
 #include <asm/asm-const.h>
 
-
 #ifdef __powerpc64__
 
 /*
@@ -44,7 +43,7 @@
 #define STACK_FRAME_OVERHEAD	112	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
 #define STACK_FRAME_REGS_MARKER	ASM_CONST(0x7265677368657265)
-#define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + \
+#define STACK_INT_FRAME_SIZE	(sizeof(struct int_regs) + \
 				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
 #define STACK_FRAME_MARKER	12
 
@@ -76,6 +75,11 @@
 
 #ifndef __ASSEMBLY__
 
+struct int_regs {
+	/* pt_regs must be offset 0 so r1 + STACK_FRAME_OVERHEAD points to it */
+	struct pt_regs pt_regs;
+};
+
 #define GET_IP(regs)		((regs)->nip)
 #define GET_USP(regs)		((regs)->gpr[1])
 #define GET_FP(regs)		(0)
@@ -119,8 +123,11 @@ extern int ptrace_get_reg(struct task_struct *task, int regno,
 extern int ptrace_put_reg(struct task_struct *task, int regno,
 			  unsigned long data);
 
-#define current_pt_regs() \
-	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+#define current_int_regs() \
+ ((struct int_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
+#define current_pt_regs() (&current_int_regs()->pt_regs)
+
 /*
  * We use the least-significant bit of the trap field to indicate
  * whether we have saved the full set of registers, or only a
@@ -137,7 +144,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
 #define TRAP(regs)		((regs)->trap & ~0xF)
 #ifdef __powerpc64__
 #define NV_REG_POISON		0xdeadbeefdeadbeefUL
-#define CHECK_FULL_REGS(regs)	BUG_ON(regs->trap & 1)
+#define CHECK_FULL_REGS(regs)	BUG_ON((regs)->trap & 1)
 #else
 #define NV_REG_POISON		0xdeadbeef
 #define CHECK_FULL_REGS(regs)						      \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ba9d0fc98730..8db740a3a8c7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -72,8 +72,13 @@
 #include <asm/fixmap.h>
 #endif
 
-#define STACK_PT_REGS_OFFSET(sym, val)	\
-	DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
+#define STACK_INT_REGS_OFFSET(sym, val)				\
+	DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct int_regs, val))
+
+#define STACK_PT_REGS_OFFSET(sym, val)				\
+	DEFINE(sym, STACK_FRAME_OVERHEAD +			\
+			offsetof(struct int_regs, pt_regs) +	\
+			offsetof(struct pt_regs, val))
 
 int main(void)
 {
@@ -150,7 +155,7 @@ int main(void)
 	OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
 	/* Local pt_regs on stack for Transactional Memory funcs. */
 	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
-	       sizeof(struct pt_regs) + 16);
+	       sizeof(struct int_regs) + 16);
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 	OFFSET(TI_FLAGS, thread_info, flags);
@@ -264,11 +269,11 @@ int main(void)
 
 	/* Interrupt register frame */
 	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct int_regs));
 #ifdef CONFIG_PPC64
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
-	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct int_regs) + 16);
+	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct int_regs) + 16);
 #endif /* CONFIG_PPC64 */
 	STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
 	STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
@@ -315,8 +320,8 @@ int main(void)
 	STACK_PT_REGS_OFFSET(SOFTE, softe);
 
 	/* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
-	DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
-	DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+	DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct int_regs));
+	DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct int_regs)+8);
 #endif /* CONFIG_PPC64 */
 
 #if defined(CONFIG_PPC32)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 03c2e1f134bc..532e9a83e526 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1627,7 +1627,7 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long kthread_arg, struct task_struct *p)
 {
-	struct pt_regs *childregs, *kregs;
+	struct int_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
 	extern void ret_from_kernel_thread(void);
 	void (*f)(void);
@@ -1637,44 +1637,44 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	klp_init_thread_info(ti);
 
 	/* Copy registers */
-	sp -= sizeof(struct pt_regs);
-	childregs = (struct pt_regs *) sp;
+	sp -= sizeof(struct int_regs);
+	childregs = (struct int_regs *) sp;
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
-		memset(childregs, 0, sizeof(struct pt_regs));
-		childregs->gpr[1] = sp + sizeof(struct pt_regs);
+		memset(childregs, 0, sizeof(struct int_regs));
+		childregs->pt_regs.gpr[1] = sp + sizeof(struct int_regs);
 		/* function */
 		if (usp)
-			childregs->gpr[14] = ppc_function_entry((void *)usp);
+			childregs->pt_regs.gpr[14] = ppc_function_entry((void *)usp);
 #ifdef CONFIG_PPC64
 		clear_tsk_thread_flag(p, TIF_32BIT);
-		childregs->softe = IRQS_ENABLED;
+		childregs->pt_regs.softe = IRQS_ENABLED;
 #endif
-		childregs->gpr[15] = kthread_arg;
+		childregs->pt_regs.gpr[15] = kthread_arg;
 		p->thread.regs = NULL;	/* no user register state */
 		ti->flags |= _TIF_RESTOREALL;
 		f = ret_from_kernel_thread;
 	} else {
 		/* user thread */
-		struct pt_regs *regs = current_pt_regs();
-		CHECK_FULL_REGS(regs);
+		struct int_regs *regs = current_int_regs();
+		CHECK_FULL_REGS(&regs->pt_regs);
 		*childregs = *regs;
 		if (usp)
-			childregs->gpr[1] = usp;
-		p->thread.regs = childregs;
-		childregs->gpr[3] = 0;  /* Result from fork() */
+			childregs->pt_regs.gpr[1] = usp;
+		p->thread.regs = &childregs->pt_regs;
+		childregs->pt_regs.gpr[3] = 0;  /* Result from fork() */
 		if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_PPC64
 			if (!is_32bit_task())
-				childregs->gpr[13] = childregs->gpr[6];
+				childregs->pt_regs.gpr[13] = childregs->pt_regs.gpr[6];
 			else
 #endif
-				childregs->gpr[2] = childregs->gpr[6];
+				childregs->pt_regs.gpr[2] = childregs->pt_regs.gpr[6];
 		}
 
 		f = ret_from_fork;
 	}
-	childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
+	childregs->pt_regs.msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
 	sp -= STACK_FRAME_OVERHEAD;
 
 	/*
@@ -1686,8 +1686,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	 * system call, using the stack frame created above.
 	 */
 	((unsigned long *)sp)[0] = 0;
-	sp -= sizeof(struct pt_regs);
-	kregs = (struct pt_regs *) sp;
+	sp -= sizeof(struct int_regs);
+	kregs = (struct int_regs *) sp;
 	sp -= STACK_FRAME_OVERHEAD;
 	p->thread.ksp = sp;
 #ifdef CONFIG_PPC32
@@ -1715,7 +1715,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	p->thread.tidr = 0;
 #endif
-	kregs->nip = ppc_function_entry(f);
+	kregs->pt_regs.nip = ppc_function_entry(f);
 	return 0;
 }
 
@@ -1739,8 +1739,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	 * set.  Do it now.
 	 */
 	if (!current->thread.regs) {
-		struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
-		current->thread.regs = regs - 1;
+		struct int_regs *iregs = task_stack_page(current) + THREAD_SIZE;
+		current->thread.regs = &(iregs - 1)->pt_regs;
 	}
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -2106,11 +2106,13 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
 		 */
 		if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
 		    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
-			struct pt_regs *regs = (struct pt_regs *)
-				(sp + STACK_FRAME_OVERHEAD);
-			lr = regs->link;
+			struct int_regs *regs;
+			regs = (struct int_regs *)(sp + STACK_FRAME_OVERHEAD);
+			lr = regs->pt_regs.link;
 			printk("--- interrupt: %lx at %pS\n    LR = %pS\n",
-			       regs->trap, (void *)regs->nip, (void *)lr);
+			       regs->pt_regs.trap,
+			       (void *)regs->pt_regs.nip,
+			       (void *)lr);
 			firstframe = 1;
 		}
 
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2c50b55138f..463dedc4d664 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -120,7 +120,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
 		 * an unreliable stack trace until it's been
 		 * _switch()'ed to for the first time.
 		 */
-		stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+		stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct int_regs);
 	} else {
 		/*
 		 * idle tasks have a custom stack layout,
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] powerpc/64: interrupts save PPR on stack rather than thread_struct
  2018-09-28 16:00 [PATCH 0/4] Fixes for SLB to C series Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 1/4] powerpc/64: add struct int_regs to save additional registers on stack Nicholas Piggin
@ 2018-09-28 16:00 ` Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 3/4] powerpc/64s/hash: Fix preloading of SLB entries Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 4/4] powerpc/64s/hash: add more barriers for slb preloading Nicholas Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-09-28 16:00 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Aneesh Kumar K . V, Nicholas Piggin

PPR is the odd register out when it comes to interrupt handling,
it is saved in current->thread.ppr while all others are saved on
the stack.

The difficulty with this is that accessing thread.ppr can cause a
SLB fault, but the SLB fault handler implementation in C change had
assumed the normal exception entry handlers would not cause an SLB
fault.

Fix this by allocating room in the interrupt stack to save PPR.

Fixes: 5e46e29e6a97 ("powerpc/64s/hash: convert SLB miss handlers to C")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/exception-64s.h |  9 ++++-----
 arch/powerpc/include/asm/processor.h     | 12 +++++++-----
 arch/powerpc/include/asm/ptrace.h        |  1 +
 arch/powerpc/kernel/asm-offsets.c        |  2 +-
 arch/powerpc/kernel/entry_64.S           | 15 +++++----------
 arch/powerpc/kernel/process.c            |  2 +-
 arch/powerpc/kernel/ptrace.c             |  4 ++--
 7 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 47578b79f0fb..3b4767ed3ec5 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -228,11 +228,10 @@
  * PPR save/restore macros used in exceptions_64s.S  
  * Used for P7 or later processors
  */
-#define SAVE_PPR(area, ra, rb)						\
+#define SAVE_PPR(area, ra)						\
 BEGIN_FTR_SECTION_NESTED(940)						\
-	ld	ra,PACACURRENT(r13);					\
-	ld	rb,area+EX_PPR(r13);	/* Read PPR from paca */	\
-	std	rb,TASKTHREADPPR(ra);					\
+	ld	ra,area+EX_PPR(r13);	/* Read PPR from paca */	\
+	std	ra,_PPR(r1);						\
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
 
 #define RESTORE_PPR_PACA(area, ra)					\
@@ -500,7 +499,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 3:	EXCEPTION_PROLOG_COMMON_1();					   \
 	beq	4f;			/* if from kernel mode		*/ \
 	ACCOUNT_CPU_USER_ENTRY(r13, r9, r10);				   \
-	SAVE_PPR(area, r9, r10);					   \
+	SAVE_PPR(area, r9);						   \
 4:	EXCEPTION_PROLOG_COMMON_2(area)					   \
 	EXCEPTION_PROLOG_COMMON_3(n)					   \
 	ACCOUNT_STOLEN_TIME
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 350c584ca179..07251598056c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -32,9 +32,9 @@
 /* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
 #define PPR_PRIORITY 3
 #ifdef __ASSEMBLY__
-#define INIT_PPR (PPR_PRIORITY << 50)
+#define DEFAULT_PPR (PPR_PRIORITY << 50)
 #else
-#define INIT_PPR ((u64)PPR_PRIORITY << 50)
+#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50)
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PPC64 */
 
@@ -247,7 +247,11 @@ struct thread_struct {
 #ifdef CONFIG_PPC64
 	unsigned long	ksp_vsid;
 #endif
-	struct pt_regs	*regs;		/* Pointer to saved register state */
+	union {
+		struct int_regs	*iregs;	/* Pointer to saved register state */
+		struct pt_regs	*regs;	/* Pointer to saved register state */
+	};
+
 	mm_segment_t	addr_limit;	/* for get_fs() validation */
 #ifdef CONFIG_BOOKE
 	/* BookE base exception scratch space; align on cacheline */
@@ -342,7 +346,6 @@ struct thread_struct {
 	 * onwards.
 	 */
 	int		dscr_inherit;
-	unsigned long	ppr;	/* used to save/restore SMT priority */
 	unsigned long	tidr;
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -390,7 +393,6 @@ struct thread_struct {
 	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
 	.addr_limit = KERNEL_DS, \
 	.fpexc_mode = 0, \
-	.ppr = INIT_PPR, \
 	.fscr = FSCR_TAR | FSCR_EBB \
 }
 #endif
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 1a98cd8c49f6..9a5a1cc85bd0 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -78,6 +78,7 @@
 struct int_regs {
 	/* pt_regs must be offset 0 so r1 + STACK_FRAME_OVERHEAD points to it */
 	struct pt_regs pt_regs;
+	unsigned long ppr;
 };
 
 #define GET_IP(regs)		((regs)->nip)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8db740a3a8c7..32908a08908b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -88,7 +88,6 @@ int main(void)
 #ifdef CONFIG_PPC64
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
-	OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
 #else
 	OFFSET(THREAD_INFO, task_struct, stack);
 	DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
@@ -274,6 +273,7 @@ int main(void)
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct int_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct int_regs) + 16);
+	STACK_INT_REGS_OFFSET(_PPR, ppr);
 #endif /* CONFIG_PPC64 */
 	STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
 	STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 77a888bfcb53..ce448f753d2e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -386,10 +386,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 4:	/* Anything else left to do? */
 BEGIN_FTR_SECTION
-	lis	r3,INIT_PPR@highest	/* Set thread.ppr = 3 */
-	ld	r10,PACACURRENT(r13)
+	lis	r3,DEFAULT_PPR@highest	/* Set default PPR */
 	sldi	r3,r3,32	/* bits 11-13 are used for ppr */
-	std	r3,TASKTHREADPPR(r10)
+	std	r3,_PPR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -938,12 +937,6 @@ fast_exception_return:
 	andi.	r0,r3,MSR_RI
 	beq-	.Lunrecov_restore
 
-	/* Load PPR from thread struct before we clear MSR:RI */
-BEGIN_FTR_SECTION
-	ld	r2,PACACURRENT(r13)
-	ld	r2,TASKTHREADPPR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
 	/*
 	 * Clear RI before restoring r13.  If we are returning to
 	 * userspace and we take an exception after restoring r13,
@@ -964,7 +957,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	andi.	r0,r3,MSR_PR
 	beq	1f
 BEGIN_FTR_SECTION
-	mtspr	SPRN_PPR,r2	/* Restore PPR */
+	/* Restore PPR */
+	ld	r2,_PPR(r1)
+	mtspr	SPRN_PPR,r2
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
 	REST_GPR(13, r1)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 532e9a83e526..18a14fb8f759 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1711,7 +1711,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		p->thread.dscr = mfspr(SPRN_DSCR);
 	}
 	if (cpu_has_feature(CPU_FTR_HAS_PPR))
-		p->thread.ppr = INIT_PPR;
+		childregs->ppr = DEFAULT_PPR;
 
 	p->thread.tidr = 0;
 #endif
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 9667666eb18e..57c04176f686 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1609,7 +1609,7 @@ static int ppr_get(struct task_struct *target,
 		      void *kbuf, void __user *ubuf)
 {
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.ppr, 0, sizeof(u64));
+				   &target->thread.iregs->ppr, 0, sizeof(u64));
 }
 
 static int ppr_set(struct task_struct *target,
@@ -1618,7 +1618,7 @@ static int ppr_set(struct task_struct *target,
 		      const void *kbuf, const void __user *ubuf)
 {
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.ppr, 0, sizeof(u64));
+				  &target->thread.iregs->ppr, 0, sizeof(u64));
 }
 
 static int dscr_get(struct task_struct *target,
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] powerpc/64s/hash: Fix preloading of SLB entries
  2018-09-28 16:00 [PATCH 0/4] Fixes for SLB to C series Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 1/4] powerpc/64: add struct int_regs to save additional registers on stack Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 2/4] powerpc/64: interrupts save PPR on stack rather than thread_struct Nicholas Piggin
@ 2018-09-28 16:00 ` Nicholas Piggin
  2018-09-28 16:00 ` [PATCH 4/4] powerpc/64s/hash: add more barriers for slb preloading Nicholas Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-09-28 16:00 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Aneesh Kumar K . V, Nicholas Piggin

slb_setup_new_exec and preload_new_slb_context assumed if an address
missed the preload cache, then it would not be in the SLB and could
be added. This is wrong if the preload cache has started to overflow.
This can cause SLB multi-hits on user addresses.

That assumption came from an earlier version of the patch which
cleared the preload cache when copying the task, but even that was
technically wrong because some user accesses occur before these
preloads, and the preloads themselves could overflow the cache
depending on the size.

Fixes: 89ca4e126a3f ("powerpc/64s/hash: Add a SLB preload cache")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/slb.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index b438220c4336..c1425853af5d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -311,6 +311,13 @@ void slb_setup_new_exec(void)
 	struct mm_struct *mm = current->mm;
 	unsigned long exec = 0x10000000;
 
+	/*
+	 * preload cache can only be used to determine whether a SLB
+	 * entry exists if it does not start to overflow.
+	 */
+	if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
+		return;
+
 	/*
 	 * We have no good place to clear the slb preload cache on exec,
 	 * flush_thread is about the earliest arch hook but that happens
@@ -345,6 +352,10 @@ void preload_new_slb_context(unsigned long start, unsigned long sp)
 	struct mm_struct *mm = current->mm;
 	unsigned long heap = mm->start_brk;
 
+	/* see above */
+	if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
+		return;
+
 	/* Userspace entry address. */
 	if (!is_kernel_addr(start)) {
 		if (preload_add(ti, start))
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] powerpc/64s/hash: add more barriers for slb preloading
  2018-09-28 16:00 [PATCH 0/4] Fixes for SLB to C series Nicholas Piggin
                   ` (2 preceding siblings ...)
  2018-09-28 16:00 ` [PATCH 3/4] powerpc/64s/hash: Fix preloading of SLB entries Nicholas Piggin
@ 2018-09-28 16:00 ` Nicholas Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-09-28 16:00 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Aneesh Kumar K . V, Nicholas Piggin

In several places, more care has to be taken to prevent compiler or
CPU re-ordering of memory accesses into critical sections that must
not take SLB faults.

Fixes: 5e46e29e6a97 ("powerpc/64s/hash: convert SLB miss handlers to C")
Fixes: 89ca4e126a3f ("powerpc/64s/hash: Add a SLB preload cache")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/slb.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index c1425853af5d..f93ed8afbac6 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -344,6 +344,9 @@ void slb_setup_new_exec(void)
 		if (preload_add(ti, mm->mmap_base))
 			slb_allocate_user(mm, mm->mmap_base);
 	}
+
+	/* see switch_slb */
+	asm volatile("isync" : : : "memory");
 }
 
 void preload_new_slb_context(unsigned long start, unsigned long sp)
@@ -373,6 +376,9 @@ void preload_new_slb_context(unsigned long start, unsigned long sp)
 		if (preload_add(ti, heap))
 			slb_allocate_user(mm, heap);
 	}
+
+	/* see switch_slb */
+	asm volatile("isync" : : : "memory");
 }
 
 
@@ -389,6 +395,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
 	 */
 	hard_irq_disable();
+	asm volatile("isync" : : : "memory");
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		/*
 		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
@@ -396,7 +403,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		 * switch_slb wants. So ARCH_300 does not use the slb
 		 * cache.
 		 */
-		asm volatile("isync ; " PPC_SLBIA(3)" ; isync");
+		asm volatile(PPC_SLBIA(3));
 	} else {
 		unsigned long offset = get_paca()->slb_cache_ptr;
 
@@ -404,7 +411,6 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		    offset <= SLB_CACHE_ENTRIES) {
 			unsigned long slbie_data = 0;
 
-			asm volatile("isync" : : : "memory");
 			for (i = 0; i < offset; i++) {
 				/* EA */
 				slbie_data = (unsigned long)
@@ -419,7 +425,6 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
 				asm volatile("slbie %0" : : "r" (slbie_data));
 
-			asm volatile("isync" : : : "memory");
 		} else {
 			struct slb_shadow *p = get_slb_shadow();
 			unsigned long ksp_esid_data =
@@ -427,8 +432,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 			unsigned long ksp_vsid_data =
 				be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
 
-			asm volatile("isync\n"
-				     PPC_SLBIA(1) "\n"
+			asm volatile(PPC_SLBIA(1) "\n"
 				     "slbmte	%0,%1\n"
 				     "isync"
 				     :: "r"(ksp_vsid_data),
@@ -464,6 +468,13 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 
 		slb_allocate_user(mm, ea);
 	}
+
+	/*
+	 * Synchronize slbmte preloads with possible subsequent user memory
+	 * address accesses by the kernel (user mode won't happen until
+	 * rfid, which is safe).
+	 */
+	asm volatile("isync" : : : "memory");
 }
 
 void slb_set_size(u16 size)
@@ -625,6 +636,17 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
 	if (!vsid)
 		return -EFAULT;
 
+	/*
+	 * There must not be a kernel SLB fault in alloc_slb_index or before
+	 * slbmte here or the allocation bitmaps could get out of whack with
+	 * the SLB.
+	 *
+	 * User SLB faults or preloads take this path which might get inlined
+	 * into the caller, so add compiler barriers here to ensure unsafe
+	 * memory accesses do not come between
+	 */
+	barrier();
+
 	index = alloc_slb_index(kernel);
 
 	vsid_data = __mk_vsid_data(vsid, ssize, flags);
@@ -633,10 +655,13 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
 	/*
 	 * No need for an isync before or after this slbmte. The exception
 	 * we enter with and the rfid we exit with are context synchronizing.
-	 * Also we only handle user segments here.
+	 * User preloads should add isync afterwards in case the kernel
+	 * accesses user memory before it returns to userspace with rfid.
 	 */
 	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
 
+	barrier();
+
 	if (!kernel)
 		slb_cache_update(esid_data);
 
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-09-28 16:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-28 16:00 [PATCH 0/4] Fixes for SLB to C series Nicholas Piggin
2018-09-28 16:00 ` [PATCH 1/4] powerpc/64: add struct int_regs to save additional registers on stack Nicholas Piggin
2018-09-28 16:00 ` [PATCH 2/4] powerpc/64: interrupts save PPR on stack rather than thread_struct Nicholas Piggin
2018-09-28 16:00 ` [PATCH 3/4] powerpc/64s/hash: Fix preloading of SLB entries Nicholas Piggin
2018-09-28 16:00 ` [PATCH 4/4] powerpc/64s/hash: add more barriers for slb preloading Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.