linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4.9 backport 1/3] powerpc/64s: Fix conversion of slb_miss_common to use RFI_TO_USER/KERNEL
       [not found] <20180222065738.GB2428@kroah.com>
@ 2018-02-22 12:35 ` Michael Ellerman
  2018-02-22 12:35   ` [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions Michael Ellerman
  2018-02-22 12:35   ` [PATCH v4.4 backport 3/3] powerpc/64s: Improve RFI L1-D cache flush fallback Michael Ellerman
  0 siblings, 2 replies; 7+ messages in thread
From: Michael Ellerman @ 2018-02-22 12:35 UTC (permalink / raw)
  To: stable, greg; +Cc: npiggin, linuxppc-dev, linux, corsac

The back port of commit c7305645eb0c ("powerpc/64s: Convert
slb_miss_common to use RFI_TO_USER/KERNEL") missed a hunk needed to
restore cr6.

Fixes: 48cc95d4e4d6 ("powerpc/64s: Convert slb_miss_common to use RFI_TO_USER/KERNEL")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 96db6c3adebe..d0be752ea86c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -680,6 +680,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 .machine	push
 .machine	"power4"
 	mtcrf	0x80,r9
+	mtcrf	0x02,r9		/* I/D indication is in cr6 */
 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
 .machine	pop
 
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
  2018-02-22 12:35 ` [PATCH v4.9 backport 1/3] powerpc/64s: Fix conversion of slb_miss_common to use RFI_TO_USER/KERNEL Michael Ellerman
@ 2018-02-22 12:35   ` Michael Ellerman
  2018-02-23 12:11     ` Michael Ellerman
  2018-02-22 12:35   ` [PATCH v4.4 backport 3/3] powerpc/64s: Improve RFI L1-D cache flush fallback Michael Ellerman
  1 sibling, 1 reply; 7+ messages in thread
From: Michael Ellerman @ 2018-02-22 12:35 UTC (permalink / raw)
  To: stable, greg; +Cc: npiggin, linuxppc-dev, linux, corsac

From: Nicholas Piggin <npiggin@gmail.com>

commit 222f20f140623ef6033491d0103ee0875fe87d35 upstream.

This commit does simple conversions of rfi/rfid to the new macros that
include the expected destination context. By simple we mean cases
where there is a single well known destination context, and it's
simply a matter of substituting the instruction for the appropriate
macro.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Backport to 4.9, use RFI_TO_KERNEL in idle_book3s.S]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/exception-64s.h |  2 +-
 arch/powerpc/kernel/entry_64.S           | 14 +++++++++-----
 arch/powerpc/kernel/exceptions-64s.S     | 22 +++++++++++-----------
 arch/powerpc/kernel/idle_book3s.S        |  7 ++++---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  7 +++----
 arch/powerpc/kvm/book3s_rmhandlers.S     |  7 +++++--
 arch/powerpc/kvm/book3s_segment.S        |  4 ++--
 7 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index cab6d2a46c41..903e76a9f158 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -242,7 +242,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mtspr	SPRN_##h##SRR1,r10;					\
-	h##rfid;							\
+	h##RFI_TO_KERNEL;						\
 	b	.	/* prevent speculative execution */
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	__EXCEPTION_PROLOG_PSERIES_1(label, h)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c33b69d10919..2dc52e6d2af4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -39,6 +39,11 @@
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
 
 /*
  * System calls.
@@ -396,8 +401,7 @@ tabort_syscall:
 	mtmsrd	r10, 1
 	mtspr	SPRN_SRR0, r11
 	mtspr	SPRN_SRR1, r12
-
-	rfid
+	RFI_TO_USER
 	b	.	/* prevent speculative execution */
 #endif
 
@@ -1073,7 +1077,7 @@ _GLOBAL(enter_rtas)
 	
 	mtspr	SPRN_SRR0,r5
 	mtspr	SPRN_SRR1,r6
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
 rtas_return_loc:
@@ -1098,7 +1102,7 @@ rtas_return_loc:
 
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
 	.align	3
@@ -1169,7 +1173,7 @@ _GLOBAL(enter_prom)
 	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
 	andc	r11,r11,r12
 	mtsrr1	r11
-	rfid
+	RFI_TO_KERNEL
 #endif /* CONFIG_PPC_BOOK3E */
 
 1:	/* Return from OF */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index d0be752ea86c..29892500e646 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -244,7 +244,7 @@ BEGIN_FTR_SECTION
 	LOAD_HANDLER(r12, machine_check_handle_early)
 1:	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r11
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 2:
 	/* Stack overflow. Stay on emergency stack and panic.
@@ -280,7 +280,7 @@ machine_check_pSeries_0:
 	mtspr	SPRN_SRR0,r12
 	mfspr	r12,SPRN_SRR1
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
@@ -446,7 +446,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	li	r3,MSR_ME
 	andc	r10,r10,r3		/* Turn off MSR_ME */
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 2:
 	/*
@@ -464,7 +464,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 */
 	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
-	rfid
+	RFI_TO_USER_OR_KERNEL
 9:
 	/* Deliver the machine check to host kernel in V mode. */
 	MACHINE_CHECK_HANDLER_WINDUP
@@ -706,7 +706,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	mtspr	SPRN_SRR0,r10
 	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 
 EXC_COMMON_BEGIN(unrecov_slb)
@@ -893,7 +893,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
 	mtspr	SPRN_SRR0,r10 ; 				\
 	ld	r10,PACAKMSR(r13) ;				\
 	mtspr	SPRN_SRR1,r10 ; 				\
-	rfid ; 							\
+	RFI_TO_KERNEL ;						\
 	b	. ;	/* prevent speculative execution */
 
 #define SYSCALL_PSERIES_3					\
@@ -901,7 +901,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
 1:	mfspr	r12,SPRN_SRR1 ;					\
 	xori	r12,r12,MSR_LE ;				\
 	mtspr	SPRN_SRR1,r12 ;					\
-	rfid ;		/* return to userspace */		\
+	RFI_TO_USER ;	/* return to userspace */		\
 	b	. ;	/* prevent speculative execution */
 
 #if defined(CONFIG_RELOCATABLE)
@@ -1276,7 +1276,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r12,PACA_EXGEN+EX_R12(r13)
 	ld	r13,PACA_EXGEN+EX_R13(r13)
-	HRFID
+	HRFI_TO_UNKNOWN
 	b	.
 #endif
 
@@ -1350,7 +1350,7 @@ masked_##_H##interrupt:					\
 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
 	GET_SCRATCH0(r13);				\
-	##_H##rfid;					\
+	##_H##RFI_TO_KERNEL;				\
 	b	.
 
 /*
@@ -1372,7 +1372,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
 	addi	r13, r13, 4
 	mtspr	SPRN_SRR0, r13
 	GET_SCRATCH0(r13)
-	rfid
+	RFI_TO_KERNEL
 	b	.
 
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1384,7 +1384,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
 	addi	r13, r13, 4
 	mtspr	SPRN_HSRR0, r13
 	GET_SCRATCH0(r13)
-	hrfid
+	HRFI_TO_KERNEL
 	b	.
 #endif
 
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index b350ac5e3111..d92c95333435 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -9,6 +9,7 @@
  */
 
 #include <linux/threads.h>
+#include <asm/exception-64s.h>
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cputable.h>
@@ -178,7 +179,7 @@ _GLOBAL(pnv_powersave_common)
 	mtmsrd	r6, 1		/* clear RI before setting SRR0/1 */
 	mtspr	SPRN_SRR0, r5
 	mtspr	SPRN_SRR1, r7
-	rfid
+	RFI_TO_KERNEL
 
 	.globl pnv_enter_arch207_idle_mode
 pnv_enter_arch207_idle_mode:
@@ -668,7 +669,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	mtcr	r6
 	mtspr	SPRN_SRR1,r4
 	mtspr	SPRN_SRR0,r5
-	rfid
+	RFI_TO_KERNEL
 
 /*
  * R3 here contains the value that will be returned to the caller
@@ -689,4 +690,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	mtcr	r6
 	mtspr	SPRN_SRR1,r4
 	mtspr	SPRN_SRR0,r5
-	rfid
+	RFI_TO_KERNEL
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0447a22a4df6..55fbc0c78721 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -65,7 +65,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	mtmsrd	r0,1		/* clear RI in MSR */
 	mtsrr0	r5
 	mtsrr1	r6
-	RFI
+	RFI_TO_KERNEL
 
 kvmppc_call_hv_entry:
 	ld	r4, HSTATE_KVM_VCPU(r13)
@@ -171,7 +171,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtsrr0	r8
 	mtsrr1	r7
 	beq	cr1, 13f		/* machine check */
-	RFI
+	RFI_TO_KERNEL
 
 	/* On POWER7, we have external interrupts set to use HSRR0/1 */
 11:	mtspr	SPRN_HSRR0, r8
@@ -1018,8 +1018,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
-
-	hrfid
+	HRFI_TO_GUEST
 	b	.
 
 secondary_too_late:
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 42a4b237df5f..34a5adeff084 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
 
 #define FUNC(name)		name
 
+#define RFI_TO_KERNEL	RFI
+#define RFI_TO_GUEST	RFI
+
 .macro INTERRUPT_TRAMPOLINE intno
 
 .global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
 	GET_SCRATCH0(r13)
 
 	/* And get back into the code */
-	RFI
+	RFI_TO_KERNEL
 #endif
 
 /*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
 	ori	r5, r5, MSR_EE
 	mtsrr0	r7
 	mtsrr1	r6
-	RFI
+	RFI_TO_KERNEL
 
 #include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index ca8f174289bb..7c982956d709 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
 	PPC_LL	r9, SVCPU_R9(r3)
 	PPC_LL	r3, (SVCPU_R3)(r3)
 
-	RFI
+	RFI_TO_GUEST
 kvmppc_handler_trampoline_enter_end:
 
 
@@ -389,5 +389,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
 	beqa	BOOK3S_INTERRUPT_DOORBELL
 
-	RFI
+	RFI_TO_KERNEL
 kvmppc_handler_trampoline_exit_end:
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v4.4 backport 3/3] powerpc/64s: Improve RFI L1-D cache flush fallback
  2018-02-22 12:35 ` [PATCH v4.9 backport 1/3] powerpc/64s: Fix conversion of slb_miss_common to use RFI_TO_USER/KERNEL Michael Ellerman
  2018-02-22 12:35   ` [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions Michael Ellerman
@ 2018-02-22 12:35   ` Michael Ellerman
  2018-02-22 14:26     ` Nicholas Piggin
  1 sibling, 1 reply; 7+ messages in thread
From: Michael Ellerman @ 2018-02-22 12:35 UTC (permalink / raw)
  To: stable, greg; +Cc: npiggin, linuxppc-dev, linux, corsac

From: Nicholas Piggin <npiggin@gmail.com>

commit bdcb1aefc5b3f7d0f1dc8b02673602bca2ff7a4b upstream.

The fallback RFI flush is used when firmware does not provide a way
to flush the cache. It's a "displacement flush" that evicts useful
data by displacing it with an uninteresting buffer.

The flush has to take care to work with implementation specific cache
replacment policies, so the recipe has been in flux. The initial
slow but conservative approach is to touch all lines of a congruence
class, with dependencies between each load. It has since been
determined that a linear pattern of loads without dependencies is
sufficient, and is significantly faster.

Measuring the speed of a null syscall with RFI fallback flush enabled
gives the relative improvement:

P8 - 1.83x
P9 - 1.75x

The flush also becomes simpler and more adaptable to different cache
geometries.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Backport to 4.9]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h      |  3 +-
 arch/powerpc/kernel/asm-offsets.c    |  3 +-
 arch/powerpc/kernel/exceptions-64s.S | 76 +++++++++++++++++-------------------
 arch/powerpc/kernel/setup_64.c       | 13 +-----
 4 files changed, 39 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ea43897183fd..c75ee2d886fc 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -212,8 +212,7 @@ struct paca_struct {
 	 */
 	u64 exrfi[13] __aligned(0x80);
 	void *rfi_flush_fallback_area;
-	u64 l1d_flush_congruence;
-	u64 l1d_flush_sets;
+	u64 l1d_flush_size;
 #endif
 };
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 64bcbd580495..14fbbd9035ca 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -242,8 +242,7 @@ int main(void)
 	DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
 	DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area));
 	DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi));
-	DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence));
-	DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets));
+	DEFINE(PACA_L1D_FLUSH_SIZE, offsetof(struct paca_struct, l1d_flush_size));
 #endif
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
 	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 29892500e646..7614d1dd2c0b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1602,39 +1602,37 @@ rfi_flush_fallback:
 	std	r9,PACA_EXRFI+EX_R9(r13)
 	std	r10,PACA_EXRFI+EX_R10(r13)
 	std	r11,PACA_EXRFI+EX_R11(r13)
-	std	r12,PACA_EXRFI+EX_R12(r13)
-	std	r8,PACA_EXRFI+EX_R13(r13)
 	mfctr	r9
 	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SETS(r13)
-	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
-	/*
-	 * The load adresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-	addi	r12,r12,8
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
 	mtctr	r11
 	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
 	/* order ld/st prior to dcbt stop all streams with flushing */
 	sync
-1:	li	r8,0
-	.rept	8 /* 8-way set associative */
-	ldx	r11,r10,r8
-	add	r8,r8,r12
-	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
-	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
-	.endr
-	addi	r10,r10,128 /* 128 byte cache line */
+
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
 	bdnz	1b
 
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
 	ld	r11,PACA_EXRFI+EX_R11(r13)
-	ld	r12,PACA_EXRFI+EX_R12(r13)
-	ld	r8,PACA_EXRFI+EX_R13(r13)
 	GET_SCRATCH0(r13);
 	rfid
 
@@ -1645,39 +1643,37 @@ hrfi_flush_fallback:
 	std	r9,PACA_EXRFI+EX_R9(r13)
 	std	r10,PACA_EXRFI+EX_R10(r13)
 	std	r11,PACA_EXRFI+EX_R11(r13)
-	std	r12,PACA_EXRFI+EX_R12(r13)
-	std	r8,PACA_EXRFI+EX_R13(r13)
 	mfctr	r9
 	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SETS(r13)
-	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
-	/*
-	 * The load adresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-	addi	r12,r12,8
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
 	mtctr	r11
 	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
 	/* order ld/st prior to dcbt stop all streams with flushing */
 	sync
-1:	li	r8,0
-	.rept	8 /* 8-way set associative */
-	ldx	r11,r10,r8
-	add	r8,r8,r12
-	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
-	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
-	.endr
-	addi	r10,r10,128 /* 128 byte cache line */
+
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
 	bdnz	1b
 
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
 	ld	r11,PACA_EXRFI+EX_R11(r13)
-	ld	r12,PACA_EXRFI+EX_R12(r13)
-	ld	r8,PACA_EXRFI+EX_R13(r13)
 	GET_SCRATCH0(r13);
 	hrfid
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7c30a91c1f86..5243501d95ef 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -745,19 +745,8 @@ static void init_fallback_flush(void)
 	memset(l1d_flush_fallback_area, 0, l1d_size * 2);
 
 	for_each_possible_cpu(cpu) {
-		/*
-		 * The fallback flush is currently coded for 8-way
-		 * associativity. Different associativity is possible, but it
-		 * will be treated as 8-way and may not evict the lines as
-		 * effectively.
-		 *
-		 * 128 byte lines are mandatory.
-		 */
-		u64 c = l1d_size / 8;
-
 		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
-		paca[cpu].l1d_flush_congruence = c;
-		paca[cpu].l1d_flush_sets = c / 128;
+		paca[cpu].l1d_flush_size = l1d_size;
 	}
 }
 
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v4.4 backport 3/3] powerpc/64s: Improve RFI L1-D cache flush fallback
  2018-02-22 12:35   ` [PATCH v4.4 backport 3/3] powerpc/64s: Improve RFI L1-D cache flush fallback Michael Ellerman
@ 2018-02-22 14:26     ` Nicholas Piggin
  0 siblings, 0 replies; 7+ messages in thread
From: Nicholas Piggin @ 2018-02-22 14:26 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: stable, greg, linuxppc-dev, linux, corsac

On Thu, 22 Feb 2018 23:35:45 +1100
Michael Ellerman <mpe@ellerman.id.au> wrote:

> From: Nicholas Piggin <npiggin@gmail.com>
> 
> commit bdcb1aefc5b3f7d0f1dc8b02673602bca2ff7a4b upstream.
> 
> The fallback RFI flush is used when firmware does not provide a way
> to flush the cache. It's a "displacement flush" that evicts useful
> data by displacing it with an uninteresting buffer.
> 
> The flush has to take care to work with implementation specific cache
> replacment policies, so the recipe has been in flux. The initial
> slow but conservative approach is to touch all lines of a congruence
> class, with dependencies between each load. It has since been
> determined that a linear pattern of loads without dependencies is
> sufficient, and is significantly faster.
> 
> Measuring the speed of a null syscall with RFI fallback flush enabled
> gives the relative improvement:
> 
> P8 - 1.83x
> P9 - 1.75x
> 
> The flush also becomes simpler and more adaptable to different cache
> geometries.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> [mpe: Backport to 4.9]
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Thanks for doing these. They all look okay to me.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
  2018-02-22 12:35   ` [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions Michael Ellerman
@ 2018-02-23 12:11     ` Michael Ellerman
  2018-02-23 12:58       ` Greg KH
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Ellerman @ 2018-02-23 12:11 UTC (permalink / raw)
  To: stable, greg; +Cc: npiggin, linuxppc-dev, linux, corsac

Michael Ellerman <mpe@ellerman.id.au> writes:
> Subject: [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
                     ^
                   4.9

Gah, sorry just realised these last two have "v4.4" in the subject, but
it should be "4.9".

cheers

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
  2018-02-23 12:11     ` Michael Ellerman
@ 2018-02-23 12:58       ` Greg KH
  2018-02-24  2:51         ` Michael Ellerman
  0 siblings, 1 reply; 7+ messages in thread
From: Greg KH @ 2018-02-23 12:58 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: stable, npiggin, linuxppc-dev, linux, corsac

On Fri, Feb 23, 2018 at 11:11:12PM +1100, Michael Ellerman wrote:
> Michael Ellerman <mpe@ellerman.id.au> writes:
> > Subject: [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
>                      ^
>                    4.9
> 
> Gah, sorry just realised these last two have "v4.4" in the subject, but
> it should be "4.9".

Ah, good, I was wondering about that :)

I'll apply these after lunch...

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
  2018-02-23 12:58       ` Greg KH
@ 2018-02-24  2:51         ` Michael Ellerman
  0 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2018-02-24  2:51 UTC (permalink / raw)
  To: Greg KH; +Cc: stable, npiggin, linuxppc-dev, linux, corsac

Greg KH <greg@kroah.com> writes:

> On Fri, Feb 23, 2018 at 11:11:12PM +1100, Michael Ellerman wrote:
>> Michael Ellerman <mpe@ellerman.id.au> writes:
>> > Subject: [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions
>>                      ^
>>                    4.9
>> 
>> Gah, sorry just realised these last two have "v4.4" in the subject, but
>> it should be "4.9".
>
> Ah, good, I was wondering about that :)

Sorry for the confusion, I need to automate generation of the patch
subjects obviously :)

> I'll apply these after lunch...

Thanks.

cheers

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-02-24  2:52 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20180222065738.GB2428@kroah.com>
2018-02-22 12:35 ` [PATCH v4.9 backport 1/3] powerpc/64s: Fix conversion of slb_miss_common to use RFI_TO_USER/KERNEL Michael Ellerman
2018-02-22 12:35   ` [PATCH v4.4 backport 2/3] powerpc/64s: Simple RFI macro conversions Michael Ellerman
2018-02-23 12:11     ` Michael Ellerman
2018-02-23 12:58       ` Greg KH
2018-02-24  2:51         ` Michael Ellerman
2018-02-22 12:35   ` [PATCH v4.4 backport 3/3] powerpc/64s: Improve RFI L1-D cache flush fallback Michael Ellerman
2018-02-22 14:26     ` Nicholas Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).