All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
@ 2012-01-18 14:35 Laurentiu Tudor
  2012-01-18 21:10 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 14+ messages in thread
From: Laurentiu Tudor @ 2012-01-18 14:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Laurentiu Tudor

This patch adds a menuconfig option that allows controlling
the lazy interrupt disabling feature implemented by this
commit:

commit d04c56f73c30a5e593202ecfcf25ed43d42363a2
Author: Paul Mackerras
Date:   Wed Oct 4 16:47:49 2006 +1000

    [POWERPC] Lazy interrupt disabling for 64-bit machines

The code in 'powerpc/include/asm/hw_irq.h' was rearranged and
cleaned-up a bit in order to reduce the number of needed #ifdef's.

Signed-off-by: Laurentiu Tudor <Laurentiu.Tudor@freescale.com>
---
Patch is against
git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git next

 arch/powerpc/Kconfig                 |   16 ++++++
 arch/powerpc/include/asm/hw_irq.h    |   86 ++++++++++++++--------------------
 arch/powerpc/include/asm/irqflags.h  |   22 +++++++++
 arch/powerpc/include/asm/paca.h      |    4 ++
 arch/powerpc/kernel/asm-offsets.c    |    3 +
 arch/powerpc/kernel/entry_64.S       |    4 ++
 arch/powerpc/kernel/exceptions-64e.S |   36 +++++++++++---
 arch/powerpc/kernel/head_64.S        |   10 ++++
 arch/powerpc/kernel/idle_book3e.S    |    7 +++
 arch/powerpc/kernel/irq.c            |    5 ++
 arch/powerpc/kernel/setup_64.c       |    2 +
 11 files changed, 138 insertions(+), 57 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ce5e045..2792278 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -609,6 +609,22 @@ config SECCOMP
 
 	  If unsure, say Y. Only embedded should say N here.
 
+config PPC_LAZY_EE
+	bool
+	prompt "Lazy interrupt disabling" if PPC_BOOK3E_64
+	default y if PPC_BOOK3S_64
+	help
+	  Local interrupt disabling functions don't disable
+	  interrupts right away and instead just clear an
+	  internal 'interrupts are enabled' flag. If an
+	  interrupt is triggered during this time, the
+	  interrupt handling code checks the flag, and if
+	  interrupts are supposed to be off, disables them
+	  for real and returns, skipping interrupt handling.
+	  When interrupts are enabled back, the interrupt
+	  fires again and this time gets handled.
+
+	  If unsure, say N.
 endmenu
 
 config ISA_DMA_API
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index bb712c9..6f32593 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -13,7 +13,20 @@
 
 extern void timer_interrupt(struct pt_regs *);
 
+#ifdef CONFIG_BOOKE
+#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory");
+#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory");
+#else
 #ifdef CONFIG_PPC64
+#define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
+#define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
+#else
+#define __hard_irq_enable()	mtmsr(mfmsr() | MSR_EE)
+#define __hard_irq_disable()	mtmsr(mfmsr() & ~MSR_EE)
+#endif
+#endif /* CONFIG_BOOKE */
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_LAZY_EE)
 #include <asm/paca.h>
 
 static inline unsigned long arch_local_save_flags(void)
@@ -49,9 +62,11 @@ static inline void arch_local_irq_enable(void)
 	arch_local_irq_restore(1);
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static inline void hard_irq_disable(void)
 {
-	return arch_local_irq_disable();
+	__hard_irq_disable();
+	get_paca()->soft_enabled = 0;
+	get_paca()->hard_enabled = 0;
 }
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
@@ -59,29 +74,7 @@ static inline bool arch_irqs_disabled_flags(unsigned long flags)
 	return flags == 0;
 }
 
-static inline bool arch_irqs_disabled(void)
-{
-	return arch_irqs_disabled_flags(arch_local_save_flags());
-}
-
-#ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory");
-#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory");
-#else
-#define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
-#define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
-#endif
-
-#define  hard_irq_disable()			\
-	do {					\
-		__hard_irq_disable();		\
-		get_paca()->soft_enabled = 0;	\
-		get_paca()->hard_enabled = 0;	\
-	} while(0)
-
-#else /* CONFIG_PPC64 */
-
-#define SET_MSR_EE(x)	mtmsr(x)
+#else /* CONFIG_PPC64 && CONFIG_PPC_LAZY_EE */
 
 static inline unsigned long arch_local_save_flags(void)
 {
@@ -97,34 +90,24 @@ static inline void arch_local_irq_restore(unsigned long flags)
 #endif
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static inline void arch_local_irq_enable(void)
 {
-	unsigned long flags = arch_local_save_flags();
-#ifdef CONFIG_BOOKE
-	asm volatile("wrteei 0" : : : "memory");
-#else
-	SET_MSR_EE(flags & ~MSR_EE);
-#endif
-	return flags;
+	__hard_irq_enable();
 }
 
-static inline void arch_local_irq_disable(void)
+static inline unsigned long arch_local_irq_disable(void)
 {
-#ifdef CONFIG_BOOKE
-	asm volatile("wrteei 0" : : : "memory");
-#else
-	arch_local_irq_save();
-#endif
+	unsigned long flags;
+
+	flags = arch_local_save_flags();
+	__hard_irq_disable();
+
+	return flags;
 }
 
-static inline void arch_local_irq_enable(void)
+static inline void hard_irq_disable(void)
 {
-#ifdef CONFIG_BOOKE
-	asm volatile("wrteei 1" : : : "memory");
-#else
-	unsigned long msr = mfmsr();
-	SET_MSR_EE(msr | MSR_EE);
-#endif
+	__hard_irq_disable();
 }
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
@@ -132,15 +115,18 @@ static inline bool arch_irqs_disabled_flags(unsigned long flags)
 	return (flags & MSR_EE) == 0;
 }
 
+#endif /* CONFIG_PPC64 && CONFIG_PPC_LAZY_EE */
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	return arch_local_irq_disable();
+}
+
 static inline bool arch_irqs_disabled(void)
 {
 	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
-#define hard_irq_disable()		arch_local_irq_disable()
-
-#endif /* CONFIG_PPC64 */
-
 #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
 
 /*
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index b0b06d8..9685b75c 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -39,6 +39,8 @@
 #define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)
 #define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)
 
+#ifdef CONFIG_PPC_LAZY_EE
+
 #define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)		\
 	cmpdi	en,0;					\
 	bne	95f;					\
@@ -51,12 +53,32 @@
 	TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f);	\
 	stb	en,PACASOFTIRQEN(r13);		\
 96:
+
+#else /* CONFIG_PPC_LAZY_EE */
+
+#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)		\
+	cmpdi	en,0;					\
+	bne	95f;					\
+	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)	\
+	b	skip;					\
+95:	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)	\
+	li	en,1;
+#define TRACE_AND_RESTORE_IRQ(en)		\
+	TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f);	\
+96:
+
+#endif /* CONFIG_PPC_LAZY_EE */
+
 #else
 #define TRACE_ENABLE_INTS
 #define TRACE_DISABLE_INTS
 #define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)
+#ifdef CONFIG_PPC_LAZY_EE
 #define TRACE_AND_RESTORE_IRQ(en)		\
 	stb	en,PACASOFTIRQEN(r13)
+#else
+#define TRACE_AND_RESTORE_IRQ(en)
+#endif
 #endif
 #endif
 
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 269c05a..b74c2b1 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -131,8 +131,12 @@ struct paca_struct {
 	u64 saved_r1;			/* r1 save for RTAS calls or PM */
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u16 trap_save;			/* Used when bad stack is encountered */
+
+#ifdef CONFIG_PPC_LAZY_EE
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
+#endif
+
 	u8 io_sync;			/* writel() needs spin_unlock sync */
 	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 	u8 nap_state_lost;		/* NV GPR values lost in power7_idle */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 04caee7..5b1fac9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -146,8 +146,11 @@ int main(void)
 	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
 	DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
+#ifdef CONFIG_PPC_LAZY_EE
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
+#endif
+
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d834425..ce7620b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -124,8 +124,10 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	ld	r12,_MSR(r1)
 #endif /* CONFIG_TRACE_IRQFLAGS */
 	li	r10,1
+#ifdef CONFIG_PPC_LAZY_EE
 	stb	r10,PACASOFTIRQEN(r13)
 	stb	r10,PACAHARDIRQEN(r13)
+#endif
 	std	r10,SOFTE(r1)
 #ifdef CONFIG_PPC_ISERIES
 BEGIN_FW_FTR_SECTION
@@ -602,10 +604,12 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
+#ifdef CONFIG_PPC_LAZY_EE
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
 	stb	r4,PACAHARDIRQEN(r13)
+#endif
 
 #ifdef CONFIG_PPC_BOOK3E
 	b	.exception_return_book3e
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 429983c..d17abb3 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -96,11 +96,6 @@
 #define PROLOG_ADDITION_NONE_DBG
 #define PROLOG_ADDITION_NONE_MC
 
-#define PROLOG_ADDITION_MASKABLE_GEN					    \
-	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
-	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
-	beq	masked_interrupt_book3e;
-
 #define PROLOG_ADDITION_2REGS_GEN					    \
 	std	r14,PACA_EXGEN+EX_R14(r13);				    \
 	std	r15,PACA_EXGEN+EX_R15(r13)
@@ -120,11 +115,29 @@
 	std	r14,PACA_EXMC+EX_R14(r13);				    \
 	std	r15,PACA_EXMC+EX_R15(r13)
 
+#ifdef CONFIG_PPC_LAZY_EE
+#define PROLOG_ADDITION_MASKABLE_GEN					    \
+	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
+	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
+	beq	masked_interrupt_book3e;
+
 #define PROLOG_ADDITION_DOORBELL_GEN					    \
 	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
 	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
 	beq	masked_doorbell_book3e
-
+#else /* CONFIG_PPC_LAZY_EE */
+#define PROLOG_ADDITION_MASKABLE_GEN
+#define PROLOG_ADDITION_DOORBELL_GEN
+#endif /* CONFIG_PPC_LAZY_EE */
+
+#ifdef CONFIG_PPC_LAZY_EE
+#define GET_IRQ_SOFT_ENABLED						    \
+	lbz	r11,PACASOFTIRQEN(r13)
+#else
+#define GET_IRQ_SOFT_ENABLED						    \
+	mfmsr	r11;							    \
+	rlwimi	r11,r11,0,16,16
+#endif
 
 /* Core exception code for all exceptions except TLB misses.
  * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
@@ -148,7 +161,7 @@
 	mfspr	r8,SPRN_XER;		/* save XER in stackframe */	    \
 	ld	r9,excf+EX_R1(r13);	/* load orig r1 back from PACA */   \
 	lwz	r10,excf+EX_CR(r13);	/* load orig CR back from PACA	*/  \
-	lbz	r11,PACASOFTIRQEN(r13);	/* get current IRQ softe */	    \
+	GET_IRQ_SOFT_ENABLED;		/* get current IRQ softe */	    \
 	ld	r12,exception_marker@toc(r2);				    \
 	li	r0,0;							    \
 	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
@@ -169,11 +182,18 @@
 
 /* Variants for the "ints" argument */
 #define INTS_KEEP
+
+#ifdef CONFIG_PPC_LAZY_EE
 #define INTS_DISABLE_SOFT						    \
 	stb	r0,PACASOFTIRQEN(r13);	/* mark interrupts soft-disabled */ \
 	TRACE_DISABLE_INTS;
 #define INTS_DISABLE_HARD						    \
 	stb	r0,PACAHARDIRQEN(r13); /* and hard disabled */
+#else
+#define INTS_DISABLE_SOFT
+#define INTS_DISABLE_HARD
+#endif
+
 #define INTS_DISABLE_ALL						    \
 	INTS_DISABLE_SOFT						    \
 	INTS_DISABLE_HARD
@@ -553,6 +573,7 @@ kernel_dbg_exc:
 	MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
 
 
+#ifdef CONFIG_PPC_LAZY_EE
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
  * clear paca->hard_enabled and return.
@@ -577,6 +598,7 @@ masked_interrupt_book3e_common:
 	mfspr	r13,SPRN_SPRG_GEN_SCRATCH;
 	rfi
 	b	.
+#endif /* CONFIG_PPC_LAZY_EE */
 
 /*
  * This is called from 0x300 and 0x400 handlers after the prologs with
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 06c7251..017d669 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -559,12 +559,14 @@ _GLOBAL(pmac_secondary_start)
 	add	r13,r13,r4		/* for this processor.		*/
 	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
 
+#ifdef CONFIG_PPC_LAZY_EE
 	/* Mark interrupts soft and hard disabled (they might be enabled
 	 * in the PACA when doing hotplug)
 	 */
 	li	r0,0
 	stb	r0,PACASOFTIRQEN(r13)
 	stb	r0,PACAHARDIRQEN(r13)
+#endif
 
 	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
@@ -621,14 +623,18 @@ __secondary_start:
 #ifdef CONFIG_PPC_ISERIES
 BEGIN_FW_FTR_SECTION
 	ori	r4,r4,MSR_EE
+#ifdef CONFIG_PPC_LAZY_EE
 	li	r8,1
 	stb	r8,PACAHARDIRQEN(r13)
+#endif
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
 #endif
+#ifdef CONFIG_PPC_LAZY_EE
 BEGIN_FW_FTR_SECTION
 	stb	r7,PACAHARDIRQEN(r13)
 END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
 	stb	r7,PACASOFTIRQEN(r13)
+#endif
 
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
@@ -775,8 +781,10 @@ _INIT_GLOBAL(start_here_common)
 
 	/* Load up the kernel context */
 5:
+#ifdef CONFIG_PPC_LAZY_EE
 	li	r5,0
 	stb	r5,PACASOFTIRQEN(r13)	/* Soft Disabled */
+#endif
 #ifdef CONFIG_PPC_ISERIES
 BEGIN_FW_FTR_SECTION
 	mfmsr	r5
@@ -785,7 +793,9 @@ BEGIN_FW_FTR_SECTION
 	li	r5,1
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
 #endif
+#ifdef CONFIG_PPC_LAZY_EE
 	stb	r5,PACAHARDIRQEN(r13)	/* Hard Disabled on others */
+#endif
 
 	bl	.start_kernel
 
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 16c002d..34b09e0 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -28,6 +28,7 @@ _GLOBAL(book3e_idle)
 	/* Hard disable interrupts */
 	wrteei	0
 
+#ifdef CONFIG_PPC_LAZY_EE
 	/* Now check if an interrupt came in while we were soft disabled
 	 * since we may otherwise lose it (doorbells etc...). We know
 	 * that since PACAHARDIRQEN will have been cleared in that case.
@@ -35,6 +36,7 @@ _GLOBAL(book3e_idle)
 	lbz	r3,PACAHARDIRQEN(r13)
 	cmpwi	cr0,r3,0
 	beqlr
+#endif
 
 	/* Now we are going to mark ourselves as soft and hard enables in
 	 * order to be able to take interrupts while asleep. We inform lockdep
@@ -44,9 +46,12 @@ _GLOBAL(book3e_idle)
 	stdu    r1,-128(r1)
 	bl	.trace_hardirqs_on
 #endif
+
+#ifdef CONFIG_PPC_LAZY_EE
 	li	r0,1
 	stb	r0,PACASOFTIRQEN(r13)
 	stb	r0,PACAHARDIRQEN(r13)
+#endif
 	
 	/* Interrupts will make use return to LR, so get something we want
 	 * in there
@@ -56,10 +61,12 @@ _GLOBAL(book3e_idle)
 	/* Hard disable interrupts again */
 	wrteei	0
 
+#ifdef CONFIG_PPC_LAZY_EE
 	/* Mark them off again in the PACA as well */
 	li	r0,0
 	stb	r0,PACASOFTIRQEN(r13)
 	stb	r0,PACAHARDIRQEN(r13)
+#endif
 
 	/* Tell lockdep about it */
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 701d4ac..e2d1784 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -99,6 +99,8 @@ EXPORT_SYMBOL(irq_desc);
 
 int distribute_irqs = 1;
 
+#ifdef CONFIG_PPC_LAZY_EE
+
 static inline notrace unsigned long get_hard_enabled(void)
 {
 	unsigned long enabled;
@@ -193,6 +195,9 @@ notrace void arch_local_irq_restore(unsigned long en)
 	__hard_irq_enable();
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
+
+#endif /* CONFIG_PPC_LAZY_EE */
+
 #endif /* CONFIG_PPC64 */
 
 int arch_show_interrupts(struct seq_file *p, int prec)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4cb8f1e..06f5b19 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -233,8 +233,10 @@ void __init early_setup(unsigned long dt_ptr)
 #ifdef CONFIG_SMP
 void early_setup_secondary(void)
 {
+#ifdef CONFIG_PPC_LAZY_EE
 	/* Mark interrupts enabled in PACA */
 	get_paca()->soft_enabled = 0;
+#endif
 
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu_secondary();
-- 
1.7.5.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-18 14:35 [PATCH] powerpc/booke64: Configurable lazy interrupt disabling Laurentiu Tudor
@ 2012-01-18 21:10 ` Benjamin Herrenschmidt
  2012-01-19 13:18   ` Tudor Laurentiu
  2012-01-19 19:21   ` Stuart Yoder
  0 siblings, 2 replies; 14+ messages in thread
From: Benjamin Herrenschmidt @ 2012-01-18 21:10 UTC (permalink / raw)
  To: Laurentiu Tudor; +Cc: linuxppc-dev

On Wed, 2012-01-18 at 16:35 +0200, Laurentiu Tudor wrote:
> This patch adds a menuconfig option that allows controlling
> the lazy interrupt disabling feature implemented by this
> commit:
> 
> commit d04c56f73c30a5e593202ecfcf25ed43d42363a2
> Author: Paul Mackerras
> Date:   Wed Oct 4 16:47:49 2006 +1000
> 
>     [POWERPC] Lazy interrupt disabling for 64-bit machines
> 
> The code in 'powerpc/include/asm/hw_irq.h' was rearranged and
> cleaned-up a bit in order to reduce the number of needed #ifdef's.

It's still nasty. Do you have numbers showing that it's worth disabling
on BookE ?

Cheers,
Ben.

> Signed-off-by: Laurentiu Tudor <Laurentiu.Tudor@freescale.com>
> ---
> Patch is against
> git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git next
> 
>  arch/powerpc/Kconfig                 |   16 ++++++
>  arch/powerpc/include/asm/hw_irq.h    |   86 ++++++++++++++--------------------
>  arch/powerpc/include/asm/irqflags.h  |   22 +++++++++
>  arch/powerpc/include/asm/paca.h      |    4 ++
>  arch/powerpc/kernel/asm-offsets.c    |    3 +
>  arch/powerpc/kernel/entry_64.S       |    4 ++
>  arch/powerpc/kernel/exceptions-64e.S |   36 +++++++++++---
>  arch/powerpc/kernel/head_64.S        |   10 ++++
>  arch/powerpc/kernel/idle_book3e.S    |    7 +++
>  arch/powerpc/kernel/irq.c            |    5 ++
>  arch/powerpc/kernel/setup_64.c       |    2 +
>  11 files changed, 138 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index ce5e045..2792278 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -609,6 +609,22 @@ config SECCOMP
>  
>  	  If unsure, say Y. Only embedded should say N here.
>  
> +config PPC_LAZY_EE
> +	bool
> +	prompt "Lazy interrupt disabling" if PPC_BOOK3E_64
> +	default y if PPC_BOOK3S_64
> +	help
> +	  Local interrupt disabling functions don't disable
> +	  interrupts right away and instead just clear an
> +	  internal 'interrupts are enabled' flag. If an
> +	  interrupt is triggered during this time, the
> +	  interrupt handling code checks the flag, and if
> +	  interrupts are supposed to be off, disables them
> +	  for real and returns, skipping interrupt handling.
> +	  When interrupts are enabled back, the interrupt
> +	  fires again and this time gets handled.
> +
> +	  If unsure, say N.
>  endmenu
>  
>  config ISA_DMA_API
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index bb712c9..6f32593 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -13,7 +13,20 @@
>  
>  extern void timer_interrupt(struct pt_regs *);
>  
> +#ifdef CONFIG_BOOKE
> +#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory");
> +#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory");
> +#else
>  #ifdef CONFIG_PPC64
> +#define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
> +#define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
> +#else
> +#define __hard_irq_enable()	mtmsr(mfmsr() | MSR_EE)
> +#define __hard_irq_disable()	mtmsr(mfmsr() & ~MSR_EE)
> +#endif
> +#endif /* CONFIG_BOOKE */
> +
> +#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_LAZY_EE)
>  #include <asm/paca.h>
>  
>  static inline unsigned long arch_local_save_flags(void)
> @@ -49,9 +62,11 @@ static inline void arch_local_irq_enable(void)
>  	arch_local_irq_restore(1);
>  }
>  
> -static inline unsigned long arch_local_irq_save(void)
> +static inline void hard_irq_disable(void)
>  {
> -	return arch_local_irq_disable();
> +	__hard_irq_disable();
> +	get_paca()->soft_enabled = 0;
> +	get_paca()->hard_enabled = 0;
>  }
>  
>  static inline bool arch_irqs_disabled_flags(unsigned long flags)
> @@ -59,29 +74,7 @@ static inline bool arch_irqs_disabled_flags(unsigned long flags)
>  	return flags == 0;
>  }
>  
> -static inline bool arch_irqs_disabled(void)
> -{
> -	return arch_irqs_disabled_flags(arch_local_save_flags());
> -}
> -
> -#ifdef CONFIG_PPC_BOOK3E
> -#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory");
> -#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory");
> -#else
> -#define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
> -#define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
> -#endif
> -
> -#define  hard_irq_disable()			\
> -	do {					\
> -		__hard_irq_disable();		\
> -		get_paca()->soft_enabled = 0;	\
> -		get_paca()->hard_enabled = 0;	\
> -	} while(0)
> -
> -#else /* CONFIG_PPC64 */
> -
> -#define SET_MSR_EE(x)	mtmsr(x)
> +#else /* CONFIG_PPC64 && CONFIG_PPC_LAZY_EE */
>  
>  static inline unsigned long arch_local_save_flags(void)
>  {
> @@ -97,34 +90,24 @@ static inline void arch_local_irq_restore(unsigned long flags)
>  #endif
>  }
>  
> -static inline unsigned long arch_local_irq_save(void)
> +static inline void arch_local_irq_enable(void)
>  {
> -	unsigned long flags = arch_local_save_flags();
> -#ifdef CONFIG_BOOKE
> -	asm volatile("wrteei 0" : : : "memory");
> -#else
> -	SET_MSR_EE(flags & ~MSR_EE);
> -#endif
> -	return flags;
> +	__hard_irq_enable();
>  }
>  
> -static inline void arch_local_irq_disable(void)
> +static inline unsigned long arch_local_irq_disable(void)
>  {
> -#ifdef CONFIG_BOOKE
> -	asm volatile("wrteei 0" : : : "memory");
> -#else
> -	arch_local_irq_save();
> -#endif
> +	unsigned long flags;
> +
> +	flags = arch_local_save_flags();
> +	__hard_irq_disable();
> +
> +	return flags;
>  }
>  
> -static inline void arch_local_irq_enable(void)
> +static inline void hard_irq_disable(void)
>  {
> -#ifdef CONFIG_BOOKE
> -	asm volatile("wrteei 1" : : : "memory");
> -#else
> -	unsigned long msr = mfmsr();
> -	SET_MSR_EE(msr | MSR_EE);
> -#endif
> +	__hard_irq_disable();
>  }
>  
>  static inline bool arch_irqs_disabled_flags(unsigned long flags)
> @@ -132,15 +115,18 @@ static inline bool arch_irqs_disabled_flags(unsigned long flags)
>  	return (flags & MSR_EE) == 0;
>  }
>  
> +#endif /* CONFIG_PPC64 && CONFIG_PPC_LAZY_EE */
> +
> +static inline unsigned long arch_local_irq_save(void)
> +{
> +	return arch_local_irq_disable();
> +}
> +
>  static inline bool arch_irqs_disabled(void)
>  {
>  	return arch_irqs_disabled_flags(arch_local_save_flags());
>  }
>  
> -#define hard_irq_disable()		arch_local_irq_disable()
> -
> -#endif /* CONFIG_PPC64 */
> -
>  #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
>  
>  /*
> diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
> index b0b06d8..9685b75c 100644
> --- a/arch/powerpc/include/asm/irqflags.h
> +++ b/arch/powerpc/include/asm/irqflags.h
> @@ -39,6 +39,8 @@
>  #define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)
>  #define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)
>  
> +#ifdef CONFIG_PPC_LAZY_EE
> +
>  #define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)		\
>  	cmpdi	en,0;					\
>  	bne	95f;					\
> @@ -51,12 +53,32 @@
>  	TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f);	\
>  	stb	en,PACASOFTIRQEN(r13);		\
>  96:
> +
> +#else /* CONFIG_PPC_LAZY_EE */
> +
> +#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)		\
> +	cmpdi	en,0;					\
> +	bne	95f;					\
> +	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)	\
> +	b	skip;					\
> +95:	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)	\
> +	li	en,1;
> +#define TRACE_AND_RESTORE_IRQ(en)		\
> +	TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f);	\
> +96:
> +
> +#endif /* CONFIG_PPC_LAZY_EE */
> +
>  #else
>  #define TRACE_ENABLE_INTS
>  #define TRACE_DISABLE_INTS
>  #define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)
> +#ifdef CONFIG_PPC_LAZY_EE
>  #define TRACE_AND_RESTORE_IRQ(en)		\
>  	stb	en,PACASOFTIRQEN(r13)
> +#else
> +#define TRACE_AND_RESTORE_IRQ(en)
> +#endif
>  #endif
>  #endif
>  
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 269c05a..b74c2b1 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -131,8 +131,12 @@ struct paca_struct {
>  	u64 saved_r1;			/* r1 save for RTAS calls or PM */
>  	u64 saved_msr;			/* MSR saved here by enter_rtas */
>  	u16 trap_save;			/* Used when bad stack is encountered */
> +
> +#ifdef CONFIG_PPC_LAZY_EE
>  	u8 soft_enabled;		/* irq soft-enable flag */
>  	u8 hard_enabled;		/* set if irqs are enabled in MSR */
> +#endif
> +
>  	u8 io_sync;			/* writel() needs spin_unlock sync */
>  	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
>  	u8 nap_state_lost;		/* NV GPR values lost in power7_idle */
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 04caee7..5b1fac9 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -146,8 +146,11 @@ int main(void)
>  	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
>  	DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
>  	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
> +#ifdef CONFIG_PPC_LAZY_EE
>  	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
>  	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
> +#endif
> +
>  	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
>  #ifdef CONFIG_PPC_MM_SLICES
>  	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index d834425..ce7620b 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -124,8 +124,10 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
>  	ld	r12,_MSR(r1)
>  #endif /* CONFIG_TRACE_IRQFLAGS */
>  	li	r10,1
> +#ifdef CONFIG_PPC_LAZY_EE
>  	stb	r10,PACASOFTIRQEN(r13)
>  	stb	r10,PACAHARDIRQEN(r13)
> +#endif
>  	std	r10,SOFTE(r1)
>  #ifdef CONFIG_PPC_ISERIES
>  BEGIN_FW_FTR_SECTION
> @@ -602,10 +604,12 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
>  2:
>  	TRACE_AND_RESTORE_IRQ(r5);
>  
> +#ifdef CONFIG_PPC_LAZY_EE
>  	/* extract EE bit and use it to restore paca->hard_enabled */
>  	ld	r3,_MSR(r1)
>  	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
>  	stb	r4,PACAHARDIRQEN(r13)
> +#endif
>  
>  #ifdef CONFIG_PPC_BOOK3E
>  	b	.exception_return_book3e
> diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
> index 429983c..d17abb3 100644
> --- a/arch/powerpc/kernel/exceptions-64e.S
> +++ b/arch/powerpc/kernel/exceptions-64e.S
> @@ -96,11 +96,6 @@
>  #define PROLOG_ADDITION_NONE_DBG
>  #define PROLOG_ADDITION_NONE_MC
>  
> -#define PROLOG_ADDITION_MASKABLE_GEN					    \
> -	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
> -	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
> -	beq	masked_interrupt_book3e;
> -
>  #define PROLOG_ADDITION_2REGS_GEN					    \
>  	std	r14,PACA_EXGEN+EX_R14(r13);				    \
>  	std	r15,PACA_EXGEN+EX_R15(r13)
> @@ -120,11 +115,29 @@
>  	std	r14,PACA_EXMC+EX_R14(r13);				    \
>  	std	r15,PACA_EXMC+EX_R15(r13)
>  
> +#ifdef CONFIG_PPC_LAZY_EE
> +#define PROLOG_ADDITION_MASKABLE_GEN					    \
> +	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
> +	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
> +	beq	masked_interrupt_book3e;
> +
>  #define PROLOG_ADDITION_DOORBELL_GEN					    \
>  	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
>  	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
>  	beq	masked_doorbell_book3e
> -
> +#else /* CONFIG_PPC_LAZY_EE */
> +#define PROLOG_ADDITION_MASKABLE_GEN
> +#define PROLOG_ADDITION_DOORBELL_GEN
> +#endif /* CONFIG_PPC_LAZY_EE */
> +
> +#ifdef CONFIG_PPC_LAZY_EE
> +#define GET_IRQ_SOFT_ENABLED						    \
> +	lbz	r11,PACASOFTIRQEN(r13)
> +#else
> +#define GET_IRQ_SOFT_ENABLED						    \
> +	mfmsr	r11;							    \
> +	rlwimi	r11,r11,0,16,16
> +#endif
>  
>  /* Core exception code for all exceptions except TLB misses.
>   * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
> @@ -148,7 +161,7 @@
>  	mfspr	r8,SPRN_XER;		/* save XER in stackframe */	    \
>  	ld	r9,excf+EX_R1(r13);	/* load orig r1 back from PACA */   \
>  	lwz	r10,excf+EX_CR(r13);	/* load orig CR back from PACA	*/  \
> -	lbz	r11,PACASOFTIRQEN(r13);	/* get current IRQ softe */	    \
> +	GET_IRQ_SOFT_ENABLED;		/* get current IRQ softe */	    \
>  	ld	r12,exception_marker@toc(r2);				    \
>  	li	r0,0;							    \
>  	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
> @@ -169,11 +182,18 @@
>  
>  /* Variants for the "ints" argument */
>  #define INTS_KEEP
> +
> +#ifdef CONFIG_PPC_LAZY_EE
>  #define INTS_DISABLE_SOFT						    \
>  	stb	r0,PACASOFTIRQEN(r13);	/* mark interrupts soft-disabled */ \
>  	TRACE_DISABLE_INTS;
>  #define INTS_DISABLE_HARD						    \
>  	stb	r0,PACAHARDIRQEN(r13); /* and hard disabled */
> +#else
> +#define INTS_DISABLE_SOFT
> +#define INTS_DISABLE_HARD
> +#endif
> +
>  #define INTS_DISABLE_ALL						    \
>  	INTS_DISABLE_SOFT						    \
>  	INTS_DISABLE_HARD
> @@ -553,6 +573,7 @@ kernel_dbg_exc:
>  	MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
>  
> 
> +#ifdef CONFIG_PPC_LAZY_EE
>  /*
>   * An interrupt came in while soft-disabled; clear EE in SRR1,
>   * clear paca->hard_enabled and return.
> @@ -577,6 +598,7 @@ masked_interrupt_book3e_common:
>  	mfspr	r13,SPRN_SPRG_GEN_SCRATCH;
>  	rfi
>  	b	.
> +#endif /* CONFIG_PPC_LAZY_EE */
>  
>  /*
>   * This is called from 0x300 and 0x400 handlers after the prologs with
> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> index 06c7251..017d669 100644
> --- a/arch/powerpc/kernel/head_64.S
> +++ b/arch/powerpc/kernel/head_64.S
> @@ -559,12 +559,14 @@ _GLOBAL(pmac_secondary_start)
>  	add	r13,r13,r4		/* for this processor.		*/
>  	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
>  
> +#ifdef CONFIG_PPC_LAZY_EE
>  	/* Mark interrupts soft and hard disabled (they might be enabled
>  	 * in the PACA when doing hotplug)
>  	 */
>  	li	r0,0
>  	stb	r0,PACASOFTIRQEN(r13)
>  	stb	r0,PACAHARDIRQEN(r13)
> +#endif
>  
>  	/* Create a temp kernel stack for use before relocation is on.	*/
>  	ld	r1,PACAEMERGSP(r13)
> @@ -621,14 +623,18 @@ __secondary_start:
>  #ifdef CONFIG_PPC_ISERIES
>  BEGIN_FW_FTR_SECTION
>  	ori	r4,r4,MSR_EE
> +#ifdef CONFIG_PPC_LAZY_EE
>  	li	r8,1
>  	stb	r8,PACAHARDIRQEN(r13)
> +#endif
>  END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
>  #endif
> +#ifdef CONFIG_PPC_LAZY_EE
>  BEGIN_FW_FTR_SECTION
>  	stb	r7,PACAHARDIRQEN(r13)
>  END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
>  	stb	r7,PACASOFTIRQEN(r13)
> +#endif
>  
>  	mtspr	SPRN_SRR0,r3
>  	mtspr	SPRN_SRR1,r4
> @@ -775,8 +781,10 @@ _INIT_GLOBAL(start_here_common)
>  
>  	/* Load up the kernel context */
>  5:
> +#ifdef CONFIG_PPC_LAZY_EE
>  	li	r5,0
>  	stb	r5,PACASOFTIRQEN(r13)	/* Soft Disabled */
> +#endif
>  #ifdef CONFIG_PPC_ISERIES
>  BEGIN_FW_FTR_SECTION
>  	mfmsr	r5
> @@ -785,7 +793,9 @@ BEGIN_FW_FTR_SECTION
>  	li	r5,1
>  END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
>  #endif
> +#ifdef CONFIG_PPC_LAZY_EE
>  	stb	r5,PACAHARDIRQEN(r13)	/* Hard Disabled on others */
> +#endif
>  
>  	bl	.start_kernel
>  
> diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
> index 16c002d..34b09e0 100644
> --- a/arch/powerpc/kernel/idle_book3e.S
> +++ b/arch/powerpc/kernel/idle_book3e.S
> @@ -28,6 +28,7 @@ _GLOBAL(book3e_idle)
>  	/* Hard disable interrupts */
>  	wrteei	0
>  
> +#ifdef CONFIG_PPC_LAZY_EE
>  	/* Now check if an interrupt came in while we were soft disabled
>  	 * since we may otherwise lose it (doorbells etc...). We know
>  	 * that since PACAHARDIRQEN will have been cleared in that case.
> @@ -35,6 +36,7 @@ _GLOBAL(book3e_idle)
>  	lbz	r3,PACAHARDIRQEN(r13)
>  	cmpwi	cr0,r3,0
>  	beqlr
> +#endif
>  
>  	/* Now we are going to mark ourselves as soft and hard enables in
>  	 * order to be able to take interrupts while asleep. We inform lockdep
> @@ -44,9 +46,12 @@ _GLOBAL(book3e_idle)
>  	stdu    r1,-128(r1)
>  	bl	.trace_hardirqs_on
>  #endif
> +
> +#ifdef CONFIG_PPC_LAZY_EE
>  	li	r0,1
>  	stb	r0,PACASOFTIRQEN(r13)
>  	stb	r0,PACAHARDIRQEN(r13)
> +#endif
>  	
>  	/* Interrupts will make use return to LR, so get something we want
>  	 * in there
> @@ -56,10 +61,12 @@ _GLOBAL(book3e_idle)
>  	/* Hard disable interrupts again */
>  	wrteei	0
>  
> +#ifdef CONFIG_PPC_LAZY_EE
>  	/* Mark them off again in the PACA as well */
>  	li	r0,0
>  	stb	r0,PACASOFTIRQEN(r13)
>  	stb	r0,PACAHARDIRQEN(r13)
> +#endif
>  
>  	/* Tell lockdep about it */
>  #ifdef CONFIG_TRACE_IRQFLAGS
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 701d4ac..e2d1784 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -99,6 +99,8 @@ EXPORT_SYMBOL(irq_desc);
>  
>  int distribute_irqs = 1;
>  
> +#ifdef CONFIG_PPC_LAZY_EE
> +
>  static inline notrace unsigned long get_hard_enabled(void)
>  {
>  	unsigned long enabled;
> @@ -193,6 +195,9 @@ notrace void arch_local_irq_restore(unsigned long en)
>  	__hard_irq_enable();
>  }
>  EXPORT_SYMBOL(arch_local_irq_restore);
> +
> +#endif /* CONFIG_PPC_LAZY_EE */
> +
>  #endif /* CONFIG_PPC64 */
>  
>  int arch_show_interrupts(struct seq_file *p, int prec)
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 4cb8f1e..06f5b19 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -233,8 +233,10 @@ void __init early_setup(unsigned long dt_ptr)
>  #ifdef CONFIG_SMP
>  void early_setup_secondary(void)
>  {
> +#ifdef CONFIG_PPC_LAZY_EE
>  	/* Mark interrupts enabled in PACA */
>  	get_paca()->soft_enabled = 0;
> +#endif
>  
>  	/* Initialize the hash table or TLB handling */
>  	early_init_mmu_secondary();

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-18 21:10 ` Benjamin Herrenschmidt
@ 2012-01-19 13:18   ` Tudor Laurentiu
  2012-01-19 19:21   ` Stuart Yoder
  1 sibling, 0 replies; 14+ messages in thread
From: Tudor Laurentiu @ 2012-01-19 13:18 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

Hi Ben,

On 01/18/2012 11:10 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2012-01-18 at 16:35 +0200, Laurentiu Tudor wrote:
>> This patch adds a menuconfig option that allows controlling
>> the lazy interrupt disabling feature implemented by this
>> commit:
>>
>> commit d04c56f73c30a5e593202ecfcf25ed43d42363a2
>> Author: Paul Mackerras
>> Date:   Wed Oct 4 16:47:49 2006 +1000
>>
>>      [POWERPC] Lazy interrupt disabling for 64-bit machines
>>
>> The code in 'powerpc/include/asm/hw_irq.h' was rearranged and
>> cleaned-up a bit in order to reduce the number of needed #ifdef's.
>
> It's still nasty.

Yep, there is still room for improvement. Perhaps trimming some of those 
various interrupt enable/disable functions & macros. However I don't 
think I have enough experience to do this ... so, any suggestions are 
welcomed.

> Do you have numbers showing that it's worth disabling
> on BookE ?

On fsl's p5020 there should be some performance gains because disabling 
"lazy ee" allows enabling the "external proxy" feature.
The thing is I can't prove this because I have no idea what benchmark to 
run. Let me know if you guys know of any relevant benchmark I could try.

---
Best Regards, Laurentiu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-18 21:10 ` Benjamin Herrenschmidt
  2012-01-19 13:18   ` Tudor Laurentiu
@ 2012-01-19 19:21   ` Stuart Yoder
  2012-01-19 19:29     ` Stuart Yoder
  2012-01-20 23:02     ` Benjamin Herrenschmidt
  1 sibling, 2 replies; 14+ messages in thread
From: Stuart Yoder @ 2012-01-19 19:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Laurentiu Tudor, linuxppc-dev

On Wed, Jan 18, 2012 at 3:10 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Wed, 2012-01-18 at 16:35 +0200, Laurentiu Tudor wrote:
>> This patch adds a menuconfig option that allows controlling
>> the lazy interrupt disabling feature implemented by this
>> commit:
>>
>> commit d04c56f73c30a5e593202ecfcf25ed43d42363a2
>> Author: Paul Mackerras
>> Date: =A0 Wed Oct 4 16:47:49 2006 +1000
>>
>> =A0 =A0 [POWERPC] Lazy interrupt disabling for 64-bit machines
>>
>> The code in 'powerpc/include/asm/hw_irq.h' was rearranged and
>> cleaned-up a bit in order to reduce the number of needed #ifdef's.
>
> It's still nasty. Do you have numbers showing that it's worth disabling
> on BookE ?

It's not just about bare metal performance-- some platforms such as
the Freescale
Topaz hypervisor don't provide legacy IACK type interrupt
acknowledgment, and expect
the guest to support the external proxy mechanism.

With Topaz, interrupts go directly to guests and we don't want to require a
trap/hcall to do an IACK, as that adds potentially thousands of cycles of
latency to every interrupt.

As you know, with external proxy interrupts are acknowledged by the
hardware and it becomes problematic to replay the interrupt in
the context of lazy EE when interrupts are re-enabled.   The interrupt
will not fire again when you enable EE.

That is currently the issue, as we can't run the 64-bit kernel on Topaz.
Our option are:
  1) to provide an option to disable lazy EE
  2) do some kind of hack to replay interrupts with lazy EE
  3) change Topaz to support legacy IACK, but this gets ugly for
      various reasons.

Providing a config option to disable lazy EE seemed to be a good
approach.

Stuart

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-19 19:21   ` Stuart Yoder
@ 2012-01-19 19:29     ` Stuart Yoder
  2012-01-20 23:05       ` Benjamin Herrenschmidt
  2012-01-20 23:02     ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 14+ messages in thread
From: Stuart Yoder @ 2012-01-19 19:29 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Laurentiu Tudor, linuxppc-dev

On Thu, Jan 19, 2012 at 1:21 PM, Stuart Yoder <b08248@gmail.com> wrote:
> On Wed, Jan 18, 2012 at 3:10 PM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
>> On Wed, 2012-01-18 at 16:35 +0200, Laurentiu Tudor wrote:
>>> This patch adds a menuconfig option that allows controlling
>>> the lazy interrupt disabling feature implemented by this
>>> commit:
>>>
>>> commit d04c56f73c30a5e593202ecfcf25ed43d42363a2
>>> Author: Paul Mackerras
>>> Date: =A0 Wed Oct 4 16:47:49 2006 +1000
>>>
>>> =A0 =A0 [POWERPC] Lazy interrupt disabling for 64-bit machines
>>>
>>> The code in 'powerpc/include/asm/hw_irq.h' was rearranged and
>>> cleaned-up a bit in order to reduce the number of needed #ifdef's.
>>
>> It's still nasty. Do you have numbers showing that it's worth disabling
>> on BookE ?
>
> It's not just about bare metal performance-- some platforms such as
> the Freescale
> Topaz hypervisor don't provide legacy IACK type interrupt
> acknowledgment, and expect
> the guest to support the external proxy mechanism.
>
> With Topaz, interrupts go directly to guests and we don't want to require=
 a
> trap/hcall to do an IACK, as that adds potentially thousands of cycles of
> latency to every interrupt.
>
> As you know, with external proxy interrupts are acknowledged by the
> hardware and it becomes problematic to replay the interrupt in
> the context of lazy EE when interrupts are re-enabled. =A0 The interrupt
> will not fire again when you enable EE.
>
> That is currently the issue, as we can't run the 64-bit kernel on Topaz.
> Our option are:
> =A01) to provide an option to disable lazy EE
> =A02) do some kind of hack to replay interrupts with lazy EE
> =A03) change Topaz to support legacy IACK, but this gets ugly for
> =A0 =A0 =A0various reasons.
>
> Providing a config option to disable lazy EE seemed to be a good
> approach.

Also, Scott had posted an approach to do option #2 a while back,
but as I  recall there was some negative feedback about this.  See:
<http://lists.ozlabs.org/pipermail/linuxppc-dev/2011-August/092103.html>

Stuart

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-19 19:21   ` Stuart Yoder
  2012-01-19 19:29     ` Stuart Yoder
@ 2012-01-20 23:02     ` Benjamin Herrenschmidt
  2012-01-23 19:31       ` Scott Wood
  1 sibling, 1 reply; 14+ messages in thread
From: Benjamin Herrenschmidt @ 2012-01-20 23:02 UTC (permalink / raw)
  To: Stuart Yoder; +Cc: Laurentiu Tudor, linuxppc-dev


> With Topaz, interrupts go directly to guests and we don't want to require a
> trap/hcall to do an IACK, as that adds potentially thousands of cycles of
> latency to every interrupt.
> 
> As you know, with external proxy interrupts are acknowledged by the
> hardware and it becomes problematic to replay the interrupt in
> the context of lazy EE when interrupts are re-enabled.   The interrupt
> will not fire again when you enable EE.

So you have a broken HW and broken HV design to start with ... :-)

> That is currently the issue, as we can't run the 64-bit kernel on Topaz.
> Our option are:
>   1) to provide an option to disable lazy EE
>   2) do some kind of hack to replay interrupts with lazy EE
>   3) change Topaz to support legacy IACK, but this gets ugly for
>       various reasons.
> 
> Providing a config option to disable lazy EE seemed to be a good
> approach.

A config option is bad because it gets us yet another step toward
non-generic kernels.

Why don't you look into what would be needed to do a replay ? There are
many ways to achieve this, using the DEC is one that wouldn't involve
the hypervisor for example. But the case of an interrupt occurring while
masked is actually pretty low key so the overhead of an hcall at that
point wouldn't be too bad I beleive, so you could also implement some
kind of retry hcall which can internally be implemented using a guest
doorbell or something similar.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-19 19:29     ` Stuart Yoder
@ 2012-01-20 23:05       ` Benjamin Herrenschmidt
  2012-01-23 19:21         ` Scott Wood
  0 siblings, 1 reply; 14+ messages in thread
From: Benjamin Herrenschmidt @ 2012-01-20 23:05 UTC (permalink / raw)
  To: Stuart Yoder; +Cc: Laurentiu Tudor, linuxppc-dev

On Thu, 2012-01-19 at 13:29 -0600, Stuart Yoder wrote:
> Also, Scott had posted an approach to do option #2 a while back,
> but as I  recall there was some negative feedback about this.  See:
> <http://lists.ozlabs.org/pipermail/linuxppc-dev/2011-August/092103.html>

I see... the problems with doorbells are workable tho. A reject hcall
could also raise the CPU priority to avoid lower priority interrupts for
example. The decrementer option works too.

Another approach is to do an hcall into the interrupt re-enable path
when an interrupt occurred while masked, which is what we do on i or
ps3, which could then trigger a replay.

Traditionally EE's have always been "level sensitive" on PowerPC, the
way you changed that is arguably an utterly broken HW design and I am
not too keen on changing our core interrupt handling to deal with it via
ifdef's if we can find a less invasive solution.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-20 23:05       ` Benjamin Herrenschmidt
@ 2012-01-23 19:21         ` Scott Wood
  2012-01-23 20:50           ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Wood @ 2012-01-23 19:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Laurentiu Tudor, linuxppc-dev, Stuart Yoder

On 01/20/2012 05:05 PM, Benjamin Herrenschmidt wrote:
> On Thu, 2012-01-19 at 13:29 -0600, Stuart Yoder wrote:
>> Also, Scott had posted an approach to do option #2 a while back,
>> but as I  recall there was some negative feedback about this.  See:
>> <http://lists.ozlabs.org/pipermail/linuxppc-dev/2011-August/092103.html>
> 
> I see... the problems with doorbells are workable tho. A reject hcall
> could also raise the CPU priority to avoid lower priority interrupts for
> example. The decrementer option works too.
> 
> Another approach is to do an hcall into the interrupt re-enable path
> when an interrupt occurred while masked, which is what we do on i or
> ps3, which could then trigger a replay.

Here's what I previously wrote to you about this:
> If we never hard-enable with a pending interrupt (go straight to the
> exception entry), we can leave the interrupt in EPR.  If we hard-enable
> even briefly, we can't guarantee that another (higher priority)
> interrupt won't come in before the doorbell, so we need to be able to
> queue up a number of interrupts equal to the number of priorities in use.
> 
> Care will need to be taken to dequeue in proper order, whether the
> interrupt comes in via extirq or doorbell (e.g. if we take an extirq on
> hard-enable, and then the doorbell fires when that interrupt
> hard-enables, we don't want to run the lower-priority queued interrupt
> until after the high prio handler is done).
> 
> BTW, for non-booke, when is DEC checked when interrupts are hard-enabled
> as part of exception return?  Likewise with the PS3 HV thing.  I only
> see the iseries check in the exception path.

Perhaps the issues with a higher priority interrupt intervening can be
addressed by messing around with current task priority at the MPIC (with
an hcall introduced for the hv case, since currently task priority is
not exposed to the guest).  I haven't had time to revisit this, and
don't expect to soon.  If someone else wants to try, fine.  In the
meantime, lazy EE is causing problems.

I'd still like to know the answer to that last question.  It's difficult
to determine how to correctly implement this stuff for our hardware if
it looks like there are holes in the scheme for hardware that this is
supposed to already work with.

> Traditionally EE's have always been "level sensitive" on PowerPC,

You mean besides the places you already have to work around, such as
doorbells and book3s decrementers and other hypervisors... and you force
all functions that enable interrupts to create a stack frame to deal
with this.

> the way you changed that is arguably an utterly broken HW design

Just because you don't like it, and it doesn't play well with your hack,
doesn't make it "utterly broken".

> and I am not too keen on changing our core interrupt handling to deal with it via
> ifdef's if we can find a less invasive solution.

Wheras having to significantly change the way interrupts work because
the register size doubled is so totally reasonable...

What is the compelling reason for forcing lazy EE on us?  Why is it tied
to 64-bit?

-Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-20 23:02     ` Benjamin Herrenschmidt
@ 2012-01-23 19:31       ` Scott Wood
  0 siblings, 0 replies; 14+ messages in thread
From: Scott Wood @ 2012-01-23 19:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Laurentiu Tudor, linuxppc-dev, Stuart Yoder

On 01/20/2012 05:02 PM, Benjamin Herrenschmidt wrote:
> 
>> With Topaz, interrupts go directly to guests and we don't want to require a
>> trap/hcall to do an IACK, as that adds potentially thousands of cycles of
>> latency to every interrupt.
>>
>> As you know, with external proxy interrupts are acknowledged by the
>> hardware and it becomes problematic to replay the interrupt in
>> the context of lazy EE when interrupts are re-enabled.   The interrupt
>> will not fire again when you enable EE.
> 
> So you have a broken HW and broken HV design to start with ... :-)

That "breakage" significantly reduces the overhead of handling
interrupts in a guest, since we don't need to involve the hypervisor (at
least on the entry side -- there's a hack to avoid it on EOI as well,
though that involves exposing some MPIC registers directly; users can
decide whether it's worth enabling for their use case).

-Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-23 19:21         ` Scott Wood
@ 2012-01-23 20:50           ` Benjamin Herrenschmidt
  2012-01-25 14:32             ` Tudor Laurentiu
  2012-01-30 21:47             ` Scott Wood
  0 siblings, 2 replies; 14+ messages in thread
From: Benjamin Herrenschmidt @ 2012-01-23 20:50 UTC (permalink / raw)
  To: Scott Wood; +Cc: Laurentiu Tudor, linuxppc-dev, Stuart Yoder

On Mon, 2012-01-23 at 13:21 -0600, Scott Wood wrote:

> > BTW, for non-booke, when is DEC checked when interrupts are hard-enabled
> > as part of exception return?  Likewise with the PS3 HV thing.  I only
> > see the iseries check in the exception path.

Exception return restores them to their previous state, so shouldn't be
a problem unless your exception takes long enough to overflow the DEC in
which case you have other problems. The PS/3 case might be broken.

> Perhaps the issues with a higher priority interrupt intervening can be
> addressed by messing around with current task priority at the MPIC (with
> an hcall introduced for the hv case, since currently task priority is
> not exposed to the guest).  I haven't had time to revisit this, and
> don't expect to soon.  If someone else wants to try, fine.  In the
> meantime, lazy EE is causing problems.

Or by storing pending interrupts in an array.

> I'd still like to know the answer to that last question.  It's difficult
> to determine how to correctly implement this stuff for our hardware if
> it looks like there are holes in the scheme for hardware that this is
> supposed to already work with.
> 
> > Traditionally EE's have always been "level sensitive" on PowerPC,
> 
> You mean besides the places you already have to work around, such as
> doorbells 

Which you introduced as well... this is especially ironic since BookE
had the decrementer done right in the first place :-)

> and book3s decrementers 

Book3s decrementer is level sensitive based on the sign bit of the
decrementer (a bit odd but heh....) at least on 64-bit processors.

> and other hypervisors... 

I wouldn't take the PS3 HV and legacy iseries HV as good design
examples :-) The later was working around limited HW functionality at
the time as well. 

> and you force
> all functions that enable interrupts to create a stack frame to deal
> with this.

Right, but overall this is still more efficient performance wise on most
processors than whacking the MSR.

However the main thing is that this significantly improves the quality
of the samples obtained from performance interrupts which can now act as
pseudo-NMI up to a certain point.

It also helps with Alex PR KVM but that's just a detail really.

> > the way you changed that is arguably an utterly broken HW design
> 
> Just because you don't like it, and it doesn't play well with your hack,
> doesn't make it "utterly broken".

Deal with it. The "hack" has been there for long enough and works well.
I don't want compile-time conditional to change that behaviour.

> > and I am not too keen on changing our core interrupt handling to deal with it via
> > ifdef's if we can find a less invasive solution.
> 
> Wheras having to significantly change the way interrupts work because
> the register size doubled is so totally reasonable...

We were very tempted at some point to do lazy EE for 32-bit as well,
eventually decided we didn't care enough :-)

> What is the compelling reason for forcing lazy EE on us?  Why is it tied
> to 64-bit?

Because that's where we historically implemented it and because iSeries
more/less required to begin with. And I don't want to have a split
scheme, especially not a compile time one.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-23 20:50           ` Benjamin Herrenschmidt
@ 2012-01-25 14:32             ` Tudor Laurentiu
  2012-01-30 21:47             ` Scott Wood
  1 sibling, 0 replies; 14+ messages in thread
From: Tudor Laurentiu @ 2012-01-25 14:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Stuart Yoder

On 01/23/2012 10:50 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2012-01-23 at 13:21 -0600, Scott Wood wrote:
>
>>> BTW, for non-booke, when is DEC checked when interrupts are hard-enabled
>>> as part of exception return?  Likewise with the PS3 HV thing.  I only
>>> see the iseries check in the exception path.
>
> Exception return restores them to their previous state, so shouldn't be
> a problem unless your exception takes long enough to overflow the DEC in
> which case you have other problems. The PS/3 case might be broken.
>
>> Perhaps the issues with a higher priority interrupt intervening can be
>> addressed by messing around with current task priority at the MPIC (with
>> an hcall introduced for the hv case, since currently task priority is
>> not exposed to the guest).  I haven't had time to revisit this, and
>> don't expect to soon.  If someone else wants to try, fine.  In the
>> meantime, lazy EE is causing problems.
>
> Or by storing pending interrupts in an array.
>
>> I'd still like to know the answer to that last question.  It's difficult
>> to determine how to correctly implement this stuff for our hardware if
>> it looks like there are holes in the scheme for hardware that this is
>> supposed to already work with.
>>
>>> Traditionally EE's have always been "level sensitive" on PowerPC,
>>
>> You mean besides the places you already have to work around, such as
>> doorbells
>
> Which you introduced as well... this is especially ironic since BookE
> had the decrementer done right in the first place :-)
>
>> and book3s decrementers
>
> Book3s decrementer is level sensitive based on the sign bit of the
> decrementer (a bit odd but heh....) at least on 64-bit processors.
>
>> and other hypervisors...
>
> I wouldn't take the PS3 HV and legacy iseries HV as good design
> examples :-) The later was working around limited HW functionality at
> the time as well.
>
>> and you force
>> all functions that enable interrupts to create a stack frame to deal
>> with this.
>
> Right, but overall this is still more efficient performance wise on most
> processors than whacking the MSR.
>
> However the main thing is that this significantly improves the quality
> of the samples obtained from performance interrupts which can now act as
> pseudo-NMI up to a certain point.
>
> It also helps with Alex PR KVM but that's just a detail really.
>
>>> the way you changed that is arguably an utterly broken HW design
>>
>> Just because you don't like it, and it doesn't play well with your hack,
>> doesn't make it "utterly broken".
>
> Deal with it. The "hack" has been there for long enough and works well.
> I don't want compile-time conditional to change that behaviour.
>
>>> and I am not too keen on changing our core interrupt handling to deal with it via
>>> ifdef's if we can find a less invasive solution.
>>
>> Wheras having to significantly change the way interrupts work because
>> the register size doubled is so totally reasonable...
>
> We were very tempted at some point to do lazy EE for 32-bit as well,
> eventually decided we didn't care enough :-)
>
>> What is the compelling reason for forcing lazy EE on us?  Why is it tied
>> to 64-bit?
>
> Because that's where we historically implemented it and because iSeries
> more/less required to begin with. And I don't want to have a split
> scheme, especially not a compile time one.

I'm thinking that we could get rid of the compile time option by using 
the "feature fixup" mechanism.

---
Best Regards, Laurentiu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-23 20:50           ` Benjamin Herrenschmidt
  2012-01-25 14:32             ` Tudor Laurentiu
@ 2012-01-30 21:47             ` Scott Wood
  2012-01-30 22:15               ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 14+ messages in thread
From: Scott Wood @ 2012-01-30 21:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Laurentiu Tudor, linuxppc-dev, Stuart Yoder

On 01/23/2012 02:50 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2012-01-23 at 13:21 -0600, Scott Wood wrote:
>> Perhaps the issues with a higher priority interrupt intervening can be
>> addressed by messing around with current task priority at the MPIC (with
>> an hcall introduced for the hv case, since currently task priority is
>> not exposed to the guest).  I haven't had time to revisit this, and
>> don't expect to soon.  If someone else wants to try, fine.  In the
>> meantime, lazy EE is causing problems.
> 
> Or by storing pending interrupts in an array.

Only the first one will happen in a context where we want to store.  The
issue is if we get another higher priority interrupt when we enable, and
that enables interrupts and we get the doorbell that wants to run the
saved irq.  If we get priorities out of order we'll EOI the wrong interrupt.

IIRC we now never enable interrupts while servicing one (are individual
handlers banned from doing this too?), in which case it shouldn't be an
issue.  I'm a bit hesitant to rely on that, but oh well.  Beats having
to add CTPR support to the hypervisor just for this.  We could throw a
WARN_ONCE if we see a stored interrupt when we take an external
interrupt exception.

>> and book3s decrementers 
> 
> Book3s decrementer is level sensitive based on the sign bit of the
> decrementer (a bit odd but heh....) at least on 64-bit processors.

So what's up with "On server, re-trigger the decrementer if it went
negative since some processors only trigger on edge transitions of the
sign bit" in arch_local_irq_restore()?

>> and other hypervisors... 
> 
> I wouldn't take the PS3 HV and legacy iseries HV as good design
> examples :-) The later was working around limited HW functionality at
> the time as well. 

Just pointing out we're not the first. :-)

>> and you force
>> all functions that enable interrupts to create a stack frame to deal
>> with this.
> 
> Right, but overall this is still more efficient performance wise on most
> processors than whacking the MSR.

Laurentiu ran lmbench on e5500 with/without lazy EE and the results were
mixed.  No large differences either way, but probably at least as many
tests were slower with lazy EE as were faster with lazy EE.  Or possibly
there was no significant difference and it was just noise from one run
to another (I'm not sure how many times he ran it or what the variation
was).

He did claim a noticeable increase in networking performance with
external proxy enabled.

I guess hard-EE is worse on some other chips?

> However the main thing is that this significantly improves the quality
> of the samples obtained from performance interrupts which can now act as
> pseudo-NMI up to a certain point.

Which is compensation for the hardware not doing it right with a proper
critical interrupt or equivalent, but yeah, that's a benefit.

>> What is the compelling reason for forcing lazy EE on us?  Why is it tied
>> to 64-bit?
> 
> Because that's where we historically implemented it and because iSeries
> more/less required to begin with. And I don't want to have a split
> scheme, especially not a compile time one.

We can probably live with it in this case -- the patch to disable lazy
EE was largely an artifact of my not having time to try a new approach,
and other people here wanting some fix sooner.

In general, though, I hope that the history of previously having 64-bit
to yourself doesn't mean that our 64-bit chips are treated second class
citizens, having to live with design decisions oriented around the chips
that got there first, with a mandate that there be no special kernel
builds, even just for optimization[1].  No, I don't want to go back to
one kernel per board, but some build-time configuration is reasonable on
embedded IMHO, as long as the possibilities are limited.  We're already
running a different build from book3s.

If the issue is just that you think making this particular feature
configurable would be a mess, fine (though I think it would have been
managable).

-Scott

[1] The hypervisor's issues with guest IACK should be fixable with an
hv-internal CTPR hack if anyone cares enough, but there would be a
performance cost to not using external proxy.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-30 21:47             ` Scott Wood
@ 2012-01-30 22:15               ` Benjamin Herrenschmidt
  2012-01-30 23:13                 ` Scott Wood
  0 siblings, 1 reply; 14+ messages in thread
From: Benjamin Herrenschmidt @ 2012-01-30 22:15 UTC (permalink / raw)
  To: Scott Wood; +Cc: Laurentiu Tudor, linuxppc-dev, Stuart Yoder

On Mon, 2012-01-30 at 15:47 -0600, Scott Wood wrote:

> Only the first one will happen in a context where we want to store.  The
> issue is if we get another higher priority interrupt when we enable, and
> that enables interrupts and we get the doorbell that wants to run the
> saved irq.  If we get priorities out of order we'll EOI the wrong interrupt.

Hrm, ok, what about in handle_masked we just "save" it onto some kind of
PACA local stack ? Then on enable, before actually turning EE back, we
see if there's something there and we hit do_IRQ() if there is. Your
get_irq() would preferrably pop things out of that little stack.

Any hole in that scheme ?

I'm thinking about reworking the lazy EE code, so maybe leave me a
couple of days. You did find a real bug on PS3 I believe and potentially
with Anton's dec replay stuff as well, when the enable is implicit in
the exception return path.

I'm thinking about breaking down that into a lower level function
returning whether we need a DEC replay, IRQ replay or nothing, and call
it in two contexts:

 - From arch...restore(), if we need to replay, we then tail-call an asm
helper which will generate an irq stack frame and call do_IRQ() or
timer_interrupt().

 - From exception restore, if we need to replay, we re-use the existing
stack frame, change the TRAP value and move on to
do_IRQ/timer_interrupt.

I may not have time to do that this week (hint hint, if you have
time ... :-) but that's what's on my mind atm.

> IIRC we now never enable interrupts while servicing one (are individual
> handlers banned from doing this too?), 

No I think they still can.

> in which case it shouldn't be an issue. 

> I'm a bit hesitant to rely on that, but oh well.  Beats having
> to add CTPR support to the hypervisor just for this.  We could throw a
> WARN_ONCE if we see a stored interrupt when we take an external
> interrupt exception.
>
> >> and book3s decrementers 
> > 
> > Book3s decrementer is level sensitive based on the sign bit of the
> > decrementer (a bit odd but heh....) at least on 64-bit processors.
> 
> So what's up with "On server, re-trigger the decrementer if it went
> negative since some processors only trigger on edge transitions of the
> sign bit" in arch_local_irq_restore()?

Ask Anton or Paulus, at least on P7 it's level :-) I think maybe old
Power3 had it different.

> >> and other hypervisors... 
> > 
> > I wouldn't take the PS3 HV and legacy iseries HV as good design
> > examples :-) The later was working around limited HW functionality at
> > the time as well. 
> 
> Just pointing out we're not the first. :-)

Yeah yeah ok :-) I hope you do see my point however of not wanting to
get into an ifdef mess all over again... If we can get that stuff
reasonably efficiently NOP'ed out, that would do, tho I suppose one of
your concerns is the generation of a stack frame for re-enabling.

One possibility would be to inline the part that tests if the hw irq
happened, and use asm to branch out of line to something that will then
make up a stack frame separately. It's a bit gross but would remove the
cost of creating stack frames in callers.

> >> and you force
> >> all functions that enable interrupts to create a stack frame to deal
> >> with this.
> > 
> > Right, but overall this is still more efficient performance wise on most
> > processors than whacking the MSR.
> 
> Laurentiu ran lmbench on e5500 with/without lazy EE and the results were
> mixed.  No large differences either way, but probably at least as many
> tests were slower with lazy EE as were faster with lazy EE.  Or possibly
> there was no significant difference and it was just noise from one run
> to another (I'm not sure how many times he ran it or what the variation
> was).
> 
> He did claim a noticeable increase in networking performance with
> external proxy enabled.

Hrm, any decent networking HW should mitigate interrupts and mostly rely
on polling... you must have been doing something wrong :-)

> I guess hard-EE is worse on some other chips?

Hard EE is bad on server chips afaik, tho mtmsrd x,1 does mitigate the
damage.

> > However the main thing is that this significantly improves the quality
> > of the samples obtained from performance interrupts which can now act as
> > pseudo-NMI up to a certain point.
> 
> Which is compensation for the hardware not doing it right with a proper
> critical interrupt or equivalent, but yeah, that's a benefit.

Right, server has no concept really of critical interrupts.

> >> What is the compelling reason for forcing lazy EE on us?  Why is it tied
> >> to 64-bit?
> > 
> > Because that's where we historically implemented it and because iSeries
> > more/less required to begin with. And I don't want to have a split
> > scheme, especially not a compile time one.
> 
> We can probably live with it in this case -- the patch to disable lazy
> EE was largely an artifact of my not having time to try a new approach,
> and other people here wanting some fix sooner.
> 
> In general, though, I hope that the history of previously having 64-bit
> to yourself doesn't mean that our 64-bit chips are treated second class
> citizens, having to live with design decisions oriented around the chips
> that got there first, with a mandate that there be no special kernel
> builds, even just for optimization[1]. 

You can always do your own one-processor one-arch build of course, I
want to keep the -possiblity- of a common build as much as possible (tho
I do know that we can't do a common build with 64E and 64S today, I'm
still trying to keep that door open). Oh and we do have 64E chips too
btw :-)

So don't worry too much there, and if you really can't fix the problem
with Lazy EE in a satifactory way we can re-visit. But I dislike too
much conditional in those code path, it just makes things harder to
maintain.

>  No, I don't want to go back to
> one kernel per board, but some build-time configuration is reasonable on
> embedded IMHO, as long as the possibilities are limited.  We're already
> running a different build from book3s.
> 
> If the issue is just that you think making this particular feature
> configurable would be a mess, fine (though I think it would have been
> managable).
> 
> -Scott
> 
> [1] The hypervisor's issues with guest IACK should be fixable with an
> hv-internal CTPR hack if anyone cares enough, but there would be a
> performance cost to not using external proxy.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling
  2012-01-30 22:15               ` Benjamin Herrenschmidt
@ 2012-01-30 23:13                 ` Scott Wood
  0 siblings, 0 replies; 14+ messages in thread
From: Scott Wood @ 2012-01-30 23:13 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Laurentiu Tudor, linuxppc-dev, Stuart Yoder

On 01/30/2012 04:15 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2012-01-30 at 15:47 -0600, Scott Wood wrote:
> 
>> Only the first one will happen in a context where we want to store.  The
>> issue is if we get another higher priority interrupt when we enable, and
>> that enables interrupts and we get the doorbell that wants to run the
>> saved irq.  If we get priorities out of order we'll EOI the wrong interrupt.
> 
> Hrm, ok, what about in handle_masked we just "save" it onto some kind of
> PACA local stack ? Then on enable, before actually turning EE back, we
> see if there's something there and we hit do_IRQ() if there is. Your
> get_irq() would preferrably pop things out of that little stack.
> 
> Any hole in that scheme ?

If we never enable EE, there's no need for a stack -- we disable EE on
the first interrupt and can leave it in EPR.  It's similar to my
original patch, but with the exception hack replaced with a call to
do_IRQ().  The quality of the regs you pass (if any) may suffer, which
is why I did the exception hack, but I can live with that if you can.

>> IIRC we now never enable interrupts while servicing one (are individual
>> handlers banned from doing this too?), 
> 
> No I think they still can.

OK.  Another option could be to use the doorbell and store EPR
somewhere, but make sure if we get a real interrupt and there's a
pending interrupt stored, we clear it out and process both in proper
order.  When the doorbell eventually fires it's a nop.  Testing this
would require some effort, though.  Better to stick with the simple
scheme where we never enable EE with a pending interrupt.

>>> However the main thing is that this significantly improves the quality
>>> of the samples obtained from performance interrupts which can now act as
>>> pseudo-NMI up to a certain point.
>>
>> Which is compensation for the hardware not doing it right with a proper
>> critical interrupt or equivalent, but yeah, that's a benefit.
> 
> Right, server has no concept really of critical interrupts.

Would be nice if the embedded version used critical, though (or could be
configured to do so).

-Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-01-30 23:13 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-18 14:35 [PATCH] powerpc/booke64: Configurable lazy interrupt disabling Laurentiu Tudor
2012-01-18 21:10 ` Benjamin Herrenschmidt
2012-01-19 13:18   ` Tudor Laurentiu
2012-01-19 19:21   ` Stuart Yoder
2012-01-19 19:29     ` Stuart Yoder
2012-01-20 23:05       ` Benjamin Herrenschmidt
2012-01-23 19:21         ` Scott Wood
2012-01-23 20:50           ` Benjamin Herrenschmidt
2012-01-25 14:32             ` Tudor Laurentiu
2012-01-30 21:47             ` Scott Wood
2012-01-30 22:15               ` Benjamin Herrenschmidt
2012-01-30 23:13                 ` Scott Wood
2012-01-20 23:02     ` Benjamin Herrenschmidt
2012-01-23 19:31       ` Scott Wood

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.