All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/11] preempt_count rework -v3
@ 2013-09-17  9:10 Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Peter Zijlstra
                   ` (12 more replies)
  0 siblings, 13 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

These patches optimize preempt_enable by firstly folding the preempt and
need_resched tests into one -- this should work for all architectures. And
secondly by providing per-arch preempt_count implementations; with x86 using
per-cpu preempt_count for fastest access.

These patches have been boot tested on CONFIG_PREEMPT=y x86_64 and survive
building a x86_64-defconfig kernel.

   text    data     bss     filename
11387014  1454776 1187840 defconfig-build/vmlinux.before
11352294  1454776 1187840 defconfig-build/vmlinux.after

void kick_process(struct task_struct *p)
{
	int cpu;

	preempt_disable();
	cpu = task_cpu(p);
	if ((cpu != smp_processor_id()) && task_curr(p))
		smp_send_reschedule(cpu);
	preempt_enable();
}

before:

  ffffffff81073f00 <kick_process>:
  ffffffff81073f00:       55                      push   %rbp
I ffffffff81073f01:       65 48 8b 04 25 f0 b7    mov    %gs:0xb7f0,%rax
  ffffffff81073f08:       00 00 
I ffffffff81073f0a:       83 80 44 e0 ff ff 01    addl   $0x1,-0x1fbc(%rax)
  ffffffff81073f11:       48 89 e5                mov    %rsp,%rbp
  ffffffff81073f14:       48 8b 47 08             mov    0x8(%rdi),%rax
  ffffffff81073f18:       8b 50 18                mov    0x18(%rax),%edx
  ffffffff81073f1b:       65 8b 04 25 1c b0 00    mov    %gs:0xb01c,%eax
  ffffffff81073f22:       00 
  ffffffff81073f23:       39 c2                   cmp    %eax,%edx
  ffffffff81073f25:       74 1b                   je     ffffffff81073f42 <kick_process+0x42>
  ffffffff81073f27:       89 d1                   mov    %edx,%ecx
  ffffffff81073f29:       48 c7 c0 40 2c 01 00    mov    $0x12c40,%rax
  ffffffff81073f30:       48 8b 0c cd 00 25 cc    mov    -0x7e33db00(,%rcx,8),%rcx
  ffffffff81073f37:       81 
  ffffffff81073f38:       48 3b bc 08 00 08 00    cmp    0x800(%rax,%rcx,1),%rdi
  ffffffff81073f3f:       00 
  ffffffff81073f40:       74 2e                   je     ffffffff81073f70 <kick_process+0x70>
D ffffffff81073f42:       65 48 8b 04 25 f0 b7    mov    %gs:0xb7f0,%rax
  ffffffff81073f49:       00 00 
D ffffffff81073f4b:       83 a8 44 e0 ff ff 01    subl   $0x1,-0x1fbc(%rax)
D ffffffff81073f52:       48 8b 80 38 e0 ff ff    mov    -0x1fc8(%rax),%rax
D ffffffff81073f59:       a8 08                   test   $0x8,%al
D ffffffff81073f5b:       75 02                   jne    ffffffff81073f5f <kick_process+0x5f>
  ffffffff81073f5d:       5d                      pop    %rbp
  ffffffff81073f5e:       c3                      retq   
D ffffffff81073f5f:       e8 9c 6f 75 00          callq  ffffffff817caf00 <preempt_schedule>
  ffffffff81073f64:       5d                      pop    %rbp
  ffffffff81073f65:       c3                      retq   
  ffffffff81073f66:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  ffffffff81073f6d:       00 00 00 
  ffffffff81073f70:       89 d7                   mov    %edx,%edi
  ffffffff81073f72:       ff 15 a8 99 ba 00       callq  *0xba99a8(%rip)        # ffffffff81c1d920 <smp_ops+0x20>
  ffffffff81073f78:       eb c8                   jmp    ffffffff81073f42 <kick_process+0x42>
  ffffffff81073f7a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
   
   
after:
   
  ffffffff8106f5e0 <kick_process>:
  ffffffff8106f5e0:       55                      push   %rbp
I ffffffff8106f5e1:       65 ff 04 25 e0 b7 00    incl   %gs:0xb7e0
  ffffffff8106f5e8:       00 
  ffffffff8106f5e9:       48 89 e5                mov    %rsp,%rbp
  ffffffff8106f5ec:       48 8b 47 08             mov    0x8(%rdi),%rax
  ffffffff8106f5f0:       8b 50 18                mov    0x18(%rax),%edx
  ffffffff8106f5f3:       65 8b 04 25 1c b0 00    mov    %gs:0xb01c,%eax
  ffffffff8106f5fa:       00 
  ffffffff8106f5fb:       39 c2                   cmp    %eax,%edx
  ffffffff8106f5fd:       74 1b                   je     ffffffff8106f61a <kick_process+0x3a>
  ffffffff8106f5ff:       89 d1                   mov    %edx,%ecx
  ffffffff8106f601:       48 c7 c0 40 2c 01 00    mov    $0x12c40,%rax
  ffffffff8106f608:       48 8b 0c cd 00 25 cc    mov    -0x7e33db00(,%rcx,8),%rcx
  ffffffff8106f60f:       81 
  ffffffff8106f610:       48 3b bc 08 00 08 00    cmp    0x800(%rax,%rcx,1),%rdi
  ffffffff8106f617:       00 
  ffffffff8106f618:       74 26                   je     ffffffff8106f640 <kick_process+0x60>
D ffffffff8106f61a:       65 ff 0c 25 e0 b7 00    decl   %gs:0xb7e0
  ffffffff8106f621:       00 
D ffffffff8106f622:       74 0c                   je     ffffffff8106f630 <kick_process+0x50>
  ffffffff8106f624:       5d                      pop    %rbp
  ffffffff8106f625:       c3                      retq   
  ffffffff8106f626:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  ffffffff8106f62d:       00 00 00 
D ffffffff8106f630:       e8 fb b4 f9 ff          callq  ffffffff8100ab30 <___preempt_schedule>
  ffffffff8106f635:       5d                      pop    %rbp
  ffffffff8106f636:       c3                      retq   
  ffffffff8106f637:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  ffffffff8106f63e:       00 00 
  ffffffff8106f640:       89 d7                   mov    %edx,%edi
  ffffffff8106f642:       ff 15 d8 e2 ba 00       callq  *0xbae2d8(%rip)        # ffffffff81c1d920 <smp_ops+0x20>
  ffffffff8106f648:       eb d0                   jmp    ffffffff8106f61a <kick_process+0x3a>
  ffffffff8106f64a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-18 18:44   ` Linus Torvalds
  2013-09-17  9:10 ` [PATCH 02/11] sched, rcu: Make RCU use resched_cpu() Peter Zijlstra
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-rmwcc.patch --]
[-- Type: text/plain, Size: 8130 bytes --]

Linus suggested using asm goto to get rid of the typical SETcc + TEST
instruction pair -- which also clobbers an extra register -- for our
typical modify_and_test() functions.

Because asm goto doesn't allow output fields it has to include an
unconditinal memory clobber when it changes a memory variable to force
a reload.

Luckily all atomic ops already imply a compiler barrier to go along
with their memory barrier semantics.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/atomic.h      |   29 ++++----------------------
 arch/x86/include/asm/atomic64_64.h |   28 +++----------------------
 arch/x86/include/asm/bitops.h      |   24 +++------------------
 arch/x86/include/asm/local.h       |   28 +++----------------------
 arch/x86/include/asm/rmwcc.h       |   41 +++++++++++++++++++++++++++++++++++++
 5 files changed, 58 insertions(+), 92 deletions(-)

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -6,6 +6,7 @@
 #include <asm/processor.h>
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
+#include <asm/rmwcc.h>
 
 /*
  * Atomic operations that C can't guarantee us.  Useful for
@@ -76,12 +77,7 @@ static inline void atomic_sub(int i, ato
  */
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e");
 }
 
 /**
@@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *
  */
 static inline int atomic_dec_and_test(atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "decl %0; sete %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
 }
 
 /**
@@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(at
  */
 static inline int atomic_inc_and_test(atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "incl %0; sete %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
 }
 
 /**
@@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(at
  */
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
-		     : "+m" (v->counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s");
 }
 
 /**
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -72,12 +72,7 @@ static inline void atomic64_sub(long i,
  */
 static inline int atomic64_sub_and_test(long i, atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "er" (i), "m" (v->counter) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e");
 }
 
 /**
@@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64
  */
 static inline int atomic64_dec_and_test(atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "decq %0; sete %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "m" (v->counter) : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
 }
 
 /**
@@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(
  */
 static inline int atomic64_inc_and_test(atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "incq %0; sete %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "m" (v->counter) : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
 }
 
 /**
@@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(
  */
 static inline int atomic64_add_negative(long i, atomic64_t *v)
 {
-	unsigned char c;
-
-	asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
-		     : "=m" (v->counter), "=qm" (c)
-		     : "er" (i), "m" (v->counter) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s");
 }
 
 /**
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -14,6 +14,7 @@
 
 #include <linux/compiler.h>
 #include <asm/alternative.h>
+#include <asm/rmwcc.h>
 
 #if BITS_PER_LONG == 32
 # define _BITOPS_LONG_SHIFT 5
@@ -204,12 +205,7 @@ static inline void change_bit(long nr, v
  */
 static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
-
-	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
-		     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
-	return oldbit;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c");
 }
 
 /**
@@ -255,13 +251,7 @@ static inline int __test_and_set_bit(lon
  */
 static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
-
-	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
-	return oldbit;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c");
 }
 
 /**
@@ -314,13 +304,7 @@ static inline int __test_and_change_bit(
  */
 static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
-
-	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
-	return oldbit;
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c");
 }
 
 static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -52,12 +52,7 @@ static inline void local_sub(long i, loc
  */
 static inline int local_sub_and_test(long i, local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_SUB "%2,%0; sete %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e");
 }
 
 /**
@@ -70,12 +65,7 @@ static inline int local_sub_and_test(lon
  */
 static inline int local_dec_and_test(local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_DEC "%0; sete %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
 }
 
 /**
@@ -88,12 +78,7 @@ static inline int local_dec_and_test(loc
  */
 static inline int local_inc_and_test(local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_INC "%0; sete %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : : "memory");
-	return c != 0;
+	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
 }
 
 /**
@@ -107,12 +92,7 @@ static inline int local_inc_and_test(loc
  */
 static inline int local_add_negative(long i, local_t *l)
 {
-	unsigned char c;
-
-	asm volatile(_ASM_ADD "%2,%0; sets %1"
-		     : "+m" (l->a.counter), "=qm" (c)
-		     : "ir" (i) : "memory");
-	return c;
+	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s");
 }
 
 /**
--- /dev/null
+++ b/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_X86_RMWcc
+#define _ASM_X86_RMWcc
+
+#ifdef CC_HAVE_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...)				\
+do {									\
+	asm volatile goto (fullop "; j" cc " %l[cc_label]"		\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\
+	__GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, val, arg0, cc)			\
+	__GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val))
+
+#else /* !CC_HAVE_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...)				\
+do {									\
+	char c;								\
+	asm volatile (fullop "; set" cc " %1"				\
+			: "+m" (var), "=qm" (c)				\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
+	__GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, val, arg0, cc)			\
+	__GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val))
+
+#endif /* CC_HAVE_ASM_GOTO */
+
+#endif /* _ASM_X86_RMWcc */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 02/11] sched, rcu: Make RCU use resched_cpu()
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17 14:40   ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 03/11] sched: Remove {set,clear}_need_resched Peter Zijlstra
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra, Paul McKenney

[-- Attachment #1: peterz-rcu-resched_cpu.patch --]
[-- Type: text/plain, Size: 2110 bytes --]

We're going to deprecate and remove set_need_resched() for it will do
the wrong thing. Make an exception for RCU and allow it to use
resched_cpu() which will do the right thing.

Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 kernel/rcutree.c    |   15 ++++++++++++++-
 kernel/sched/core.c |    4 ++--
 2 files changed, 16 insertions(+), 3 deletions(-)

--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -898,6 +898,12 @@ static void print_other_cpu_stall(struct
 	force_quiescent_state(rsp);  /* Kick them all. */
 }
 
+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
 static void print_cpu_stall(struct rcu_state *rsp)
 {
 	int cpu;
@@ -927,7 +933,14 @@ static void print_cpu_stall(struct rcu_s
 				     3 * rcu_jiffies_till_stall_check() + 3;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
-	set_need_resched();  /* kick ourselves to get things going. */
+	/*
+	 * Attempt to revive the RCU machinery by forcing a context switch.
+	 *
+	 * A context switch would normally allow the RCU state machine to make
+	 * progress and it could be we're stuck in kernel space without context
+	 * switches for an entirely unreasonable amount of time.
+	 */
+	resched_cpu(smp_processor_id());
 }
 
 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -513,12 +513,11 @@ static inline void init_hrtick(void)
  * might also involve a cross-CPU call to trigger the scheduler on
  * the target CPU.
  */
-#ifdef CONFIG_SMP
 void resched_task(struct task_struct *p)
 {
 	int cpu;
 
-	assert_raw_spin_locked(&task_rq(p)->lock);
+	lockdep_assert_held(&task_rq(p)->lock);
 
 	if (test_tsk_need_resched(p))
 		return;
@@ -546,6 +545,7 @@ void resched_cpu(int cpu)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
+#ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * In the semi idle case, use the nearest busy cpu for migrating timers



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 03/11] sched: Remove {set,clear}_need_resched
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 02/11] sched, rcu: Make RCU use resched_cpu() Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 04/11] sched, idle: Fix the idle polling state logic Peter Zijlstra
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-tif_need_resched.patch --]
[-- Type: text/plain, Size: 1345 bytes --]

Preemption semantics are going to change which mandate a change.

All DRM usage sites are already broken and will not be affected (much)
by this change. DRM people are aware and will remove the last few
stragglers.

For now, leave an empty stub that generates a warning, once all users
are gone we can remove this.

Cc: airlied@linux.ie
Cc: daniel.vetter@ffwll.ch
Cc: paulmck@linux.vnet.ibm.com
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/thread_info.h |   15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,8 +104,19 @@ static inline int test_ti_thread_flag(st
 #define test_thread_flag(flag) \
 	test_ti_thread_flag(current_thread_info(), flag)
 
-#define set_need_resched()	set_thread_flag(TIF_NEED_RESCHED)
-#define clear_need_resched()	clear_thread_flag(TIF_NEED_RESCHED)
+static inline __deprecated void set_need_resched(void)
+{
+	/*
+	 * Use of this function in deprecated.
+	 *
+	 * As of this writing there are only a few users in the DRM tree left
+	 * all of which are wrong and can be removed without causing too much
+	 * grief.
+	 *
+	 * The DRM people are aware and are working on removing the last few
+	 * instances.
+	 */
+}
 
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04/11] sched, idle: Fix the idle polling state logic
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (2 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 03/11] sched: Remove {set,clear}_need_resched Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 05/11] sched: Introduce preempt_count accessor functions Peter Zijlstra
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-idle-need_resched.patch --]
[-- Type: text/plain, Size: 8388 bytes --]

Mike reported that commit 7d1a9417 ("x86: Use generic idle loop")
regressed several workloads and caused excessive reschedule
interrupts.

The patch in question failed to notice that the x86 code had an
inverted sense of the polling state versus the new generic code (x86:
default polling, generic: default !polling).

Fix the two prominent x86 mwait based idle drivers and introduce a few
new generic polling helpers (fixing the wrong smp_mb__after_clear_bit
usage).

Also switch the idle routines to using tif_need_resched() which is an
immediate TIF_NEED_RESCHED test as opposed to need_resched which will
end up being slightly different.

Cc: lenb@kernel.org
Cc: tglx@linutronix.de
Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/kernel/process.c     |    6 +--
 drivers/acpi/processor_idle.c |   46 +++++-------------------
 drivers/idle/intel_idle.c     |    2 -
 include/linux/sched.h         |   78 ++++++++++++++++++++++++++++++++++++++----
 include/linux/thread_info.h   |    2 +
 kernel/cpu/idle.c             |    9 ++--
 6 files changed, 91 insertions(+), 52 deletions(-)

--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
 		 * The switch back from broadcast mode needs to be
 		 * called with interrupts disabled.
 		 */
-		 local_irq_disable();
-		 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
-		 local_irq_enable();
+		local_irq_disable();
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		local_irq_enable();
 	} else
 		default_idle();
 }
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -119,17 +119,10 @@ static struct dmi_system_id processor_po
  */
 static void acpi_safe_halt(void)
 {
-	current_thread_info()->status &= ~TS_POLLING;
-	/*
-	 * TS_POLLING-cleared state must be visible before we
-	 * test NEED_RESCHED:
-	 */
-	smp_mb();
-	if (!need_resched()) {
+	if (!tif_need_resched()) {
 		safe_halt();
 		local_irq_disable();
 	}
-	current_thread_info()->status |= TS_POLLING;
 }
 
 #ifdef ARCH_APICTIMER_STOPS_ON_C3
@@ -737,6 +730,11 @@ static int acpi_idle_enter_c1(struct cpu
 	if (unlikely(!pr))
 		return -EINVAL;
 
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
+			return -EINVAL;
+	}
+
 	lapic_timer_state_broadcast(pr, cx, 1);
 	acpi_idle_do_entry(cx);
 
@@ -790,18 +788,9 @@ static int acpi_idle_enter_simple(struct
 	if (unlikely(!pr))
 		return -EINVAL;
 
-	if (cx->entry_method != ACPI_CSTATE_FFH) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we test
-		 * NEED_RESCHED:
-		 */
-		smp_mb();
-
-		if (unlikely(need_resched())) {
-			current_thread_info()->status |= TS_POLLING;
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
 			return -EINVAL;
-		}
 	}
 
 	/*
@@ -819,9 +808,6 @@ static int acpi_idle_enter_simple(struct
 
 	sched_clock_idle_wakeup_event(0);
 
-	if (cx->entry_method != ACPI_CSTATE_FFH)
-		current_thread_info()->status |= TS_POLLING;
-
 	lapic_timer_state_broadcast(pr, cx, 0);
 	return index;
 }
@@ -858,18 +844,9 @@ static int acpi_idle_enter_bm(struct cpu
 		}
 	}
 
-	if (cx->entry_method != ACPI_CSTATE_FFH) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we test
-		 * NEED_RESCHED:
-		 */
-		smp_mb();
-
-		if (unlikely(need_resched())) {
-			current_thread_info()->status |= TS_POLLING;
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
 			return -EINVAL;
-		}
 	}
 
 	acpi_unlazy_tlb(smp_processor_id());
@@ -915,9 +892,6 @@ static int acpi_idle_enter_bm(struct cpu
 
 	sched_clock_idle_wakeup_event(0);
 
-	if (cx->entry_method != ACPI_CSTATE_FFH)
-		current_thread_info()->status |= TS_POLLING;
-
 	lapic_timer_state_broadcast(pr, cx, 0);
 	return index;
 }
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -359,7 +359,7 @@ static int intel_idle(struct cpuidle_dev
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
-	if (!need_resched()) {
+	if (!current_set_polling_and_test()) {
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2479,34 +2479,98 @@ static inline int tsk_is_polling(struct
 {
 	return task_thread_info(p)->status & TS_POLLING;
 }
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
 {
 	current_thread_info()->status |= TS_POLLING;
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
-	smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
 }
 #elif defined(TIF_POLLING_NRFLAG)
 static inline int tsk_is_polling(struct task_struct *p)
 {
 	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
 }
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
 {
 	set_thread_flag(TIF_POLLING_NRFLAG);
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 *
+	 * XXX: assumes set/clear bit are identical barrier wise.
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	clear_thread_flag(TIF_POLLING_NRFLAG);
 }
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
 #else
 static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
 #endif
 
 /*
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -118,6 +118,8 @@ static inline __deprecated void set_need
 	 */
 }
 
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*
  * An arch can define its own version of set_restore_sigmask() to get the
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -44,7 +44,7 @@ static inline int cpu_idle_poll(void)
 	rcu_idle_enter();
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
-	while (!need_resched())
+	while (!tif_need_resched())
 		cpu_relax();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	rcu_idle_exit();
@@ -92,8 +92,7 @@ static void cpu_idle_loop(void)
 			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
 				cpu_idle_poll();
 			} else {
-				current_clr_polling();
-				if (!need_resched()) {
+				if (!current_clr_polling_and_test()) {
 					stop_critical_timings();
 					rcu_idle_enter();
 					arch_cpu_idle();
@@ -103,7 +102,7 @@ static void cpu_idle_loop(void)
 				} else {
 					local_irq_enable();
 				}
-				current_set_polling();
+				__current_set_polling();
 			}
 			arch_cpu_idle_exit();
 		}
@@ -129,7 +128,7 @@ void cpu_startup_entry(enum cpuhp_state
 	 */
 	boot_init_stack_canary();
 #endif
-	current_set_polling();
+	__current_set_polling();
 	arch_cpu_idle_prepare();
 	cpu_idle_loop();
 }



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 05/11] sched: Introduce preempt_count accessor functions
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (3 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 04/11] sched, idle: Fix the idle polling state logic Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 06/11] sched: Add NEED_RESCHED to the preempt_count Peter Zijlstra
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-preempt_count-accessors.patch --]
[-- Type: text/plain, Size: 4950 bytes --]

Replace the single preempt_count() 'function' that's an lvalue with
two proper functions:

 preempt_count() - returns the preempt_count value as rvalue
 preempt_count_ptr() - returns a pointer to the preempt_count

Then change all sites that modify the preempt count to use
preempt_count_ptr().

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/preempt.h |   20 ++++++++++++++------
 init/main.c             |    2 +-
 kernel/sched/core.c     |    4 ++--
 kernel/softirq.c        |    4 ++--
 kernel/timer.c          |    8 ++++----
 lib/smp_processor_id.c  |    3 +--
 6 files changed, 24 insertions(+), 17 deletions(-)

--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,19 +10,27 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+static __always_inline int preempt_count(void)
+{
+	return current_thread_info()->preempt_count;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &current_thread_info()->preempt_count;
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
 #else
-# define add_preempt_count(val)	do { preempt_count() += (val); } while (0)
-# define sub_preempt_count(val)	do { preempt_count() -= (val); } while (0)
+# define add_preempt_count(val)	do { *preempt_count_ptr() += (val); } while (0)
+# define sub_preempt_count(val)	do { *preempt_count_ptr() -= (val); } while (0)
 #endif
 
 #define inc_preempt_count() add_preempt_count(1)
 #define dec_preempt_count() sub_preempt_count(1)
 
-#define preempt_count()	(current_thread_info()->preempt_count)
-
 #ifdef CONFIG_PREEMPT
 
 asmlinkage void preempt_schedule(void);
@@ -81,9 +89,9 @@ do { \
 
 /* For debugging and tracer internals only! */
 #define add_preempt_count_notrace(val)			\
-	do { preempt_count() += (val); } while (0)
+	do { *preempt_count_ptr() += (val); } while (0)
 #define sub_preempt_count_notrace(val)			\
-	do { preempt_count() -= (val); } while (0)
+	do { *preempt_count_ptr() -= (val); } while (0)
 #define inc_preempt_count_notrace() add_preempt_count_notrace(1)
 #define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
 
--- a/init/main.c
+++ b/init/main.c
@@ -690,7 +690,7 @@ int __init_or_module do_one_initcall(ini
 
 	if (preempt_count() != count) {
 		sprintf(msgbuf, "preemption imbalance ");
-		preempt_count() = count;
+		*preempt_count_ptr() = count;
 	}
 	if (irqs_disabled()) {
 		strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2233,7 +2233,7 @@ void __kprobes add_preempt_count(int val
 	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
 		return;
 #endif
-	preempt_count() += val;
+	*preempt_count_ptr() += val;
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Spinlock count overflowing soon?
@@ -2264,7 +2264,7 @@ void __kprobes sub_preempt_count(int val
 
 	if (preempt_count() == val)
 		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
-	preempt_count() -= val;
+	*preempt_count_ptr() -= val;
 }
 EXPORT_SYMBOL(sub_preempt_count);
 
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -106,7 +106,7 @@ static void __local_bh_disable(unsigned
 	 * We must manually increment preempt_count here and manually
 	 * call the trace_preempt_off later.
 	 */
-	preempt_count() += cnt;
+	*preempt_count_ptr() += cnt;
 	/*
 	 * Were softirqs turned off above:
 	 */
@@ -256,7 +256,7 @@ asmlinkage void __do_softirq(void)
 				       " exited with %08x?\n", vec_nr,
 				       softirq_to_name[vec_nr], h->action,
 				       prev_count, preempt_count());
-				preempt_count() = prev_count;
+				*preempt_count_ptr() = prev_count;
 			}
 
 			rcu_bh_qs(cpu);
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1092,7 +1092,7 @@ static int cascade(struct tvec_base *bas
 static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 			  unsigned long data)
 {
-	int preempt_count = preempt_count();
+	int count = preempt_count();
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -1119,16 +1119,16 @@ static void call_timer_fn(struct timer_l
 
 	lock_map_release(&lockdep_map);
 
-	if (preempt_count != preempt_count()) {
+	if (count != preempt_count()) {
 		WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
-			  fn, preempt_count, preempt_count());
+			  fn, count, preempt_count());
 		/*
 		 * Restore the preempt count. That gives us a decent
 		 * chance to survive and extract information. If the
 		 * callback kept a lock held, bad luck, but not worse
 		 * than the BUG() we had.
 		 */
-		preempt_count() = preempt_count;
+		*preempt_count_ptr() = count;
 	}
 }
 
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -9,10 +9,9 @@
 
 notrace unsigned int debug_smp_processor_id(void)
 {
-	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
 
-	if (likely(preempt_count))
+	if (likely(preempt_count()))
 		goto out;
 
 	if (irqs_disabled())



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 06/11] sched: Add NEED_RESCHED to the preempt_count
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (4 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 05/11] sched: Introduce preempt_count accessor functions Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 07/11] sched, arch: Create asm/preempt.h Peter Zijlstra
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-preempt_count-need_resched.patch --]
[-- Type: text/plain, Size: 6806 bytes --]

In order to combine the preemption and need_resched test we need to
fold the need_resched information into the preempt_count value.

Since the NEED_RESCHED flag is set across CPUs this needs to be an
atomic operation, however we very much want to avoid making
preempt_count atomic, therefore we keep the existing TIF_NEED_RESCHED
infrastructure in place but at 3 sites test it and fold its value into
preempt_count; namely:

 - resched_task() when setting TIF_NEED_RESCHED on the current task
 - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a
                   remote task it follows it up with a reschedule IPI
                   and we can modify the cpu local preempt_count from
                   there.
 - cpu_idle_loop() for when resched_task() found tsk_is_polling().

We use an inverted bitmask to indicate need_resched so that a 0 means
both need_resched and !atomic.

Also remove the barrier() in preempt_enable() between
preempt_enable_no_resched() and preempt_check_resched() to avoid
having to reload the preemption value and allow the compiler to use
the flags of the previuos decrement. I couldn't come up with any sane
reason for this barrier() to be there as preempt_enable_no_resched()
already has a barrier() before doing the decrement.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/preempt.h   |   42 +++++++++++++++++++++++++++++++++++++-----
 include/linux/sched.h     |    2 +-
 kernel/context_tracking.c |    2 +-
 kernel/cpu/idle.c         |    7 +++++++
 kernel/sched/core.c       |   18 ++++++++++++++----
 5 files changed, 60 insertions(+), 11 deletions(-)

--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,9 +10,19 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+/*
+ * We use the MSB mostly because its available; see <linux/hardirq.h> for
+ * the other bits.
+ */
+#define PREEMPT_NEED_RESCHED	0x80000000
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
 static __always_inline int preempt_count(void)
 {
-	return current_thread_info()->preempt_count;
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
 }
 
 static __always_inline int *preempt_count_ptr(void)
@@ -20,6 +30,30 @@ static __always_inline int *preempt_coun
 	return &current_thread_info()->preempt_count;
 }
 
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
@@ -37,7 +71,7 @@ asmlinkage void preempt_schedule(void);
 
 #define preempt_check_resched() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule(); \
 } while (0)
 
@@ -47,7 +81,7 @@ void preempt_schedule_context(void);
 
 #define preempt_check_resched_context() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule_context(); \
 } while (0)
 #else
@@ -83,7 +117,6 @@ do { \
 #define preempt_enable() \
 do { \
 	preempt_enable_no_resched(); \
-	barrier(); \
 	preempt_check_resched(); \
 } while (0)
 
@@ -111,7 +144,6 @@ do { \
 #define preempt_enable_notrace() \
 do { \
 	preempt_enable_no_resched_notrace(); \
-	barrier(); \
 	preempt_check_resched_context(); \
 } while (0)
 
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2408,7 +2408,7 @@ static inline int signal_pending_state(l
 
 static inline int need_resched(void)
 {
-	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+	return unlikely(test_preempt_need_resched());
 }
 
 /*
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -115,7 +115,7 @@ void __sched notrace preempt_schedule_co
 {
 	enum ctx_state prev_ctx;
 
-	if (likely(!preemptible()))
+	if (likely(preempt_count() || irqs_disabled()))
 		return;
 
 	/*
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -105,6 +105,13 @@ static void cpu_idle_loop(void)
 				__current_set_polling();
 			}
 			arch_cpu_idle_exit();
+			/*
+			 * We need to test and propagate the TIF_NEED_RESCHED
+			 * bit here because we might not have send the
+			 * reschedule IPI to idle tasks.
+			 */
+			if (tif_need_resched())
+				set_preempt_need_resched();
 		}
 		tick_nohz_idle_exit();
 		schedule_preempt_disabled();
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -525,8 +525,10 @@ void resched_task(struct task_struct *p)
 	set_tsk_need_resched(p);
 
 	cpu = task_cpu(p);
-	if (cpu == smp_processor_id())
+	if (cpu == smp_processor_id()) {
+		set_preempt_need_resched();
 		return;
+	}
 
 	/* NEED_RESCHED must be visible before we test polling */
 	smp_mb();
@@ -1397,6 +1399,14 @@ static void sched_ttwu_pending(void)
 
 void scheduler_ipi(void)
 {
+	/*
+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
+	 * this IPI.
+	 */
+	if (tif_need_resched())
+		set_preempt_need_resched();
+
 	if (llist_empty(&this_rq()->wake_list)
 			&& !tick_nohz_full_cpu(smp_processor_id())
 			&& !got_nohz_idle_kick())
@@ -2431,6 +2441,7 @@ static void __sched __schedule(void)
 	put_prev_task(rq, prev);
 	next = pick_next_task(rq);
 	clear_tsk_need_resched(prev);
+	clear_preempt_need_resched();
 	rq->skip_clock_update = 0;
 
 	if (likely(prev != next)) {
@@ -2517,7 +2528,7 @@ asmlinkage void __sched notrace preempt_
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task. Just return..
 	 */
-	if (likely(!preemptible()))
+	if (likely(preempt_count() || irqs_disabled()))
 		return;
 
 	do {
@@ -2542,11 +2553,10 @@ EXPORT_SYMBOL(preempt_schedule);
  */
 asmlinkage void __sched preempt_schedule_irq(void)
 {
-	struct thread_info *ti = current_thread_info();
 	enum ctx_state prev_state;
 
 	/* Catch callers which need to be fixed */
-	BUG_ON(ti->preempt_count || !irqs_disabled());
+	BUG_ON(preempt_count() || !irqs_disabled());
 
 	prev_state = exception_enter();
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 07/11] sched, arch: Create asm/preempt.h
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (5 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 06/11] sched: Add NEED_RESCHED to the preempt_count Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 08/11] sched: Create more preempt_count accessors Peter Zijlstra
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-asm-preempt_count.patch --]
[-- Type: text/plain, Size: 10874 bytes --]

In order to prepare to per-arch implementations of preempt_count move
the required bits into an asm-generic header and use this for all
archs.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/alpha/include/asm/Kbuild      |    1 
 arch/arc/include/asm/Kbuild        |    1 
 arch/arm/include/asm/Kbuild        |    1 
 arch/arm64/include/asm/Kbuild      |    1 
 arch/avr32/include/asm/Kbuild      |    1 
 arch/blackfin/include/asm/Kbuild   |    1 
 arch/c6x/include/asm/Kbuild        |    1 
 arch/cris/include/asm/Kbuild       |    1 
 arch/frv/include/asm/Kbuild        |    1 
 arch/h8300/include/asm/Kbuild      |    1 
 arch/hexagon/include/asm/Kbuild    |    1 
 arch/ia64/include/asm/Kbuild       |    1 
 arch/m32r/include/asm/Kbuild       |    1 
 arch/m68k/include/asm/Kbuild       |    1 
 arch/metag/include/asm/Kbuild      |    1 
 arch/microblaze/include/asm/Kbuild |    1 
 arch/mips/include/asm/Kbuild       |    1 
 arch/mn10300/include/asm/Kbuild    |    1 
 arch/openrisc/include/asm/Kbuild   |    1 
 arch/parisc/include/asm/Kbuild     |    1 
 arch/powerpc/include/asm/Kbuild    |    1 
 arch/s390/include/asm/Kbuild       |    1 
 arch/score/include/asm/Kbuild      |    1 
 arch/sh/include/asm/Kbuild         |    1 
 arch/sparc/include/asm/Kbuild      |    1 
 arch/tile/include/asm/Kbuild       |    1 
 arch/um/include/asm/Kbuild         |    1 
 arch/unicore32/include/asm/Kbuild  |    1 
 arch/x86/include/asm/Kbuild        |    1 
 arch/xtensa/include/asm/Kbuild     |    1 
 include/asm-generic/preempt.h      |   44 +++++++++++++++++++++++++++++++++++++
 include/linux/preempt.h            |   39 --------------------------------
 32 files changed, 75 insertions(+), 38 deletions(-)

--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -46,3 +46,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -33,3 +33,4 @@ generic-y += timex.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += unaligned.h
+generic-y += preempt.h
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -50,3 +50,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y	+= clkdev.h
 generic-y	+= exec.h
 generic-y	+= trace_clock.h
 generic-y	+= param.h
+generic-y += preempt.h
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -44,3 +44,4 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -56,3 +56,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -11,3 +11,4 @@ generic-y += module.h
 generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += mmu.h
 generic-y += module.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -53,3 +53,4 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,4 +3,5 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += vtime.h
\ No newline at end of file
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += module.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -31,3 +31,4 @@ generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -52,3 +52,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += syscalls.h
+generic-y += preempt.h
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -11,5 +11,6 @@ generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += ucontext.h
 generic-y += xor.h
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -67,3 +67,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += word-at-a-time.h auxvec.h u
 	  div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
 	  poll.h xor.h clkdev.h exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,4 +2,5 @@
 generic-y += clkdev.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += vtime.h
\ No newline at end of file
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -2,3 +2,4 @@
 
 generic-y += clkdev.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -4,3 +4,4 @@ header-y +=
 generic-y += clkdev.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -34,3 +34,4 @@ generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += ucontext.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -16,3 +16,4 @@ generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
+generic-y += preempt.h
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -38,3 +38,4 @@ generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += hw_irq.h irq_regs.h kdebug.
 generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
 generic-y += switch_to.h clkdev.h
 generic-y += trace_clock.h
+generic-y += preempt.h
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -60,3 +60,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,3 +5,4 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
+generic-y += preempt.h
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -28,3 +28,4 @@ generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
--- /dev/null
+++ b/include/asm-generic/preempt.h
@@ -0,0 +1,44 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
+{
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &current_thread_info()->preempt_count;
+}
+
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
+#endif /* __ASM_PREEMPT_H */
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -6,7 +6,6 @@
  * preempt_count (used for kernel preemption, interrupt count, etc.)
  */
 
-#include <linux/thread_info.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
 
@@ -16,43 +15,7 @@
  */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
-/*
- * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
- * that think a non-zero value indicates we cannot preempt.
- */
-static __always_inline int preempt_count(void)
-{
-	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline int *preempt_count_ptr(void)
-{
-	return &current_thread_info()->preempt_count;
-}
-
-/*
- * We fold the NEED_RESCHED bit into the preempt count such that
- * preempt_enable() can decrement and test for needing to reschedule with a
- * single instruction.
- *
- * We invert the actual bit, so that when the decrement hits 0 we know we both
- * need to resched (the bit is cleared) and can resched (no preempt count).
- */
-
-static __always_inline void set_preempt_need_resched(void)
-{
-	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline void clear_preempt_need_resched(void)
-{
-	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline bool test_preempt_need_resched(void)
-{
-	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
-}
+#include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 08/11] sched: Create more preempt_count accessors
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (6 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 07/11] sched, arch: Create asm/preempt.h Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 09/11] sched: Extract the basic add/sub preempt_count modifiers Peter Zijlstra
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-task_preempt_count.patch --]
[-- Type: text/plain, Size: 2762 bytes --]

We need a few special preempt_count accessors:
 - task_preempt_count() for when we're interested in the preemption
   count of another (non-running) task.
 - init_task_preempt_count() for properly initializing the preemption
   count.
 - init_idle_preempt_count() a special case of the above for the idle
   threads.

With these no generic code ever touches thread_info::preempt_count
anymore and architectures could choose to remove it.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 include/asm-generic/preempt.h |   14 ++++++++++++++
 include/trace/events/sched.h  |    2 +-
 kernel/sched/core.c           |    7 +++----
 3 files changed, 18 insertions(+), 5 deletions(-)

--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -18,6 +18,20 @@ static __always_inline int *preempt_coun
 }
 
 /*
+ * must be macros to avoid header recursion hell
+ */
+#define task_preempt_count(p) \
+	(task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED)
+
+#define init_task_preempt_count(p) do { \
+	task_thread_info(p)->preempt_count = 1 | PREEMPT_NEED_RESCHED; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+	task_thread_info(p)->preempt_count = 0; \
+} while (0)
+
+/*
  * We fold the NEED_RESCHED bit into the preempt count such that
  * preempt_enable() can decrement and test for needing to reschedule with a
  * single instruction.
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -100,7 +100,7 @@ static inline long __trace_sched_switch_
 	/*
 	 * For all intents and purposes a preempted task is a running task.
 	 */
-	if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
+	if (task_preempt_count(p) & PREEMPT_ACTIVE)
 		state = TASK_RUNNING | TASK_STATE_MAX;
 #endif
 
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -996,7 +996,7 @@ void set_task_cpu(struct task_struct *p,
 	 * ttwu() will sort out the placement.
 	 */
 	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+			!(task_preempt_count(p) & PREEMPT_ACTIVE));
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -1743,8 +1743,7 @@ void sched_fork(struct task_struct *p)
 	p->on_cpu = 0;
 #endif
 #ifdef CONFIG_PREEMPT_COUNT
-	/* Want to start with kernel preemption disabled. */
-	task_thread_info(p)->preempt_count = 1;
+	init_task_preempt_count(p);
 #endif
 #ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
@@ -4237,7 +4236,7 @@ void init_idle(struct task_struct *idle,
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-	task_thread_info(idle)->preempt_count = 0;
+	init_idle_preempt_count(idle, cpu);
 
 	/*
 	 * The idle tasks have their own, simple scheduling class:



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 09/11] sched: Extract the basic add/sub preempt_count modifiers
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (7 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 08/11] sched: Create more preempt_count accessors Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 10/11] sched, x86: Provide a per-cpu preempt_count implementation Peter Zijlstra
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-cleanup-preempt.patch --]
[-- Type: text/plain, Size: 14016 bytes --]

Rewrite the preempt_count macros in order to extract the 3 basic
preempt_count value modifiers:

  __preempt_count_add()
  __preempt_count_sub()

and the new:

  __preempt_count_dec_and_test()

And since we're at it anyway, replace the unconventional
$op_preempt_count names with the more conventional preempt_count_$op.

Since these basic operators are equivalent to the previous _notrace()
variants, do away with the _notrace() versions.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/mips/mm/init.c           |    5 -
 arch/x86/kernel/traps.c       |    4 -
 include/asm-generic/preempt.h |   35 +++++++++++++
 include/linux/hardirq.h       |    8 +--
 include/linux/preempt.h       |  106 +++++++++++++++++++-----------------------
 include/linux/sched.h         |    5 -
 include/linux/uaccess.h       |    8 ---
 kernel/context_tracking.c     |    2 
 kernel/sched/core.c           |   29 ++++-------
 kernel/softirq.c              |   12 ++--
 10 files changed, 112 insertions(+), 102 deletions(-)

--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -124,7 +124,7 @@ void *kmap_coherent(struct page *page, u
 
 	BUG_ON(Page_dcache_dirty(page));
 
-	inc_preempt_count();
+	pagefault_disable();
 	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
 #ifdef CONFIG_MIPS_MT_SMTC
 	idx += FIX_N_COLOURS * smp_processor_id() +
@@ -193,8 +193,7 @@ void kunmap_coherent(void)
 	write_c0_entryhi(old_ctx);
 	EXIT_CRITICAL(flags);
 #endif
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 void copy_user_highpage(struct page *to, struct page *from,
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -88,7 +88,7 @@ static inline void conditional_sti(struc
 
 static inline void preempt_conditional_sti(struct pt_regs *regs)
 {
-	inc_preempt_count();
+	preempt_count_inc();
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
@@ -103,7 +103,7 @@ static inline void preempt_conditional_c
 {
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_disable();
-	dec_preempt_count();
+	preempt_count_dec();
 }
 
 static int __kprobes
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -55,4 +55,39 @@ static __always_inline bool test_preempt
 	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
 }
 
+/*
+ * The various preempt_count add/sub methods
+ */
+
+static __always_inline void __preempt_count_add(int val)
+{
+	*preempt_count_ptr() += val;
+}
+
+static __always_inline void __preempt_count_sub(int val)
+{
+	*preempt_count_ptr() -= val;
+}
+
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+	return !--*preempt_count_ptr();
+}
+
+/*
+ * Returns true when we need to resched -- even if we can not.
+ */
+static __always_inline bool need_resched(void)
+{
+	return unlikely(test_preempt_need_resched());
+}
+
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
+static __always_inline bool should_resched(void)
+{
+	return unlikely(!*preempt_count_ptr());
+}
+
 #endif /* __ASM_PREEMPT_H */
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -37,7 +37,7 @@ extern void rcu_nmi_exit(void);
 #define __irq_enter()					\
 	do {						\
 		account_irq_enter_time(current);	\
-		add_preempt_count(HARDIRQ_OFFSET);	\
+		preempt_count_add(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
 
@@ -53,7 +53,7 @@ extern void irq_enter(void);
 	do {						\
 		trace_hardirq_exit();			\
 		account_irq_exit_time(current);		\
-		sub_preempt_count(HARDIRQ_OFFSET);	\
+		preempt_count_sub(HARDIRQ_OFFSET);	\
 	} while (0)
 
 /*
@@ -66,7 +66,7 @@ extern void irq_exit(void);
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
-		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
 	} while (0)
@@ -76,7 +76,7 @@ extern void irq_exit(void);
 		trace_hardirq_exit();				\
 		rcu_nmi_exit();					\
 		BUG_ON(!in_nmi());				\
-		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
 	} while (0)
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -18,97 +18,86 @@
 #include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
-  extern void add_preempt_count(int val);
-  extern void sub_preempt_count(int val);
+extern void preempt_count_add(int val);
+extern void preempt_count_sub(int val);
+#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
 #else
-# define add_preempt_count(val)	do { *preempt_count_ptr() += (val); } while (0)
-# define sub_preempt_count(val)	do { *preempt_count_ptr() -= (val); } while (0)
+#define preempt_count_add(val)	__preempt_count_add(val)
+#define preempt_count_sub(val)	__preempt_count_sub(val)
+#define preempt_count_dec_and_test() __preempt_count_dec_and_test()
 #endif
 
-#define inc_preempt_count() add_preempt_count(1)
-#define dec_preempt_count() sub_preempt_count(1)
-
-#ifdef CONFIG_PREEMPT
-
-asmlinkage void preempt_schedule(void);
-
-#define preempt_check_resched() \
-do { \
-	if (unlikely(!*preempt_count_ptr())) \
-		preempt_schedule(); \
-} while (0)
-
-#ifdef CONFIG_CONTEXT_TRACKING
-
-void preempt_schedule_context(void);
-
-#define preempt_check_resched_context() \
-do { \
-	if (unlikely(!*preempt_count_ptr())) \
-		preempt_schedule_context(); \
-} while (0)
-#else
-
-#define preempt_check_resched_context() preempt_check_resched()
-
-#endif /* CONFIG_CONTEXT_TRACKING */
-
-#else /* !CONFIG_PREEMPT */
-
-#define preempt_check_resched()		do { } while (0)
-#define preempt_check_resched_context()	do { } while (0)
-
-#endif /* CONFIG_PREEMPT */
+#define __preempt_count_inc() __preempt_count_add(1)
+#define __preempt_count_dec() __preempt_count_sub(1)
 
+#define preempt_count_inc() preempt_count_add(1)
+#define preempt_count_dec() preempt_count_sub(1)
 
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
 do { \
-	inc_preempt_count(); \
+	preempt_count_inc(); \
 	barrier(); \
 } while (0)
 
 #define sched_preempt_enable_no_resched() \
 do { \
 	barrier(); \
-	dec_preempt_count(); \
+	preempt_count_dec(); \
 } while (0)
 
-#define preempt_enable_no_resched()	sched_preempt_enable_no_resched()
+#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
+#ifdef CONFIG_PREEMPT
+asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
-	preempt_enable_no_resched(); \
-	preempt_check_resched(); \
+	barrier(); \
+	if (unlikely(preempt_count_dec_and_test())) \
+		preempt_schedule(); \
 } while (0)
 
-/* For debugging and tracer internals only! */
-#define add_preempt_count_notrace(val)			\
-	do { *preempt_count_ptr() += (val); } while (0)
-#define sub_preempt_count_notrace(val)			\
-	do { *preempt_count_ptr() -= (val); } while (0)
-#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
-#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
+#define preempt_check_resched() \
+do { \
+	if (should_resched()) \
+		preempt_schedule(); \
+} while (0)
+
+#else
+#define preempt_enable() preempt_enable_no_resched()
+#define preempt_check_resched() do { } while (0)
+#endif
 
 #define preempt_disable_notrace() \
 do { \
-	inc_preempt_count_notrace(); \
+	__preempt_count_inc(); \
 	barrier(); \
 } while (0)
 
 #define preempt_enable_no_resched_notrace() \
 do { \
 	barrier(); \
-	dec_preempt_count_notrace(); \
+	__preempt_count_dec(); \
 } while (0)
 
-/* preempt_check_resched is OK to trace */
+#ifdef CONFIG_PREEMPT
+
+#ifdef CONFIG_CONTEXT_TRACKING
+asmlinkage void preempt_schedule_context(void);
+#else
+#define preempt_schedule_context() preempt_schedule()
+#endif
+
 #define preempt_enable_notrace() \
 do { \
-	preempt_enable_no_resched_notrace(); \
-	preempt_check_resched_context(); \
+	barrier(); \
+	if (unlikely(__preempt_count_dec_and_test())) \
+		preempt_schedule_context(); \
 } while (0)
+#else
+#define preempt_enable_notrace() preempt_enable_no_resched_notrace()
+#endif
 
 #else /* !CONFIG_PREEMPT_COUNT */
 
@@ -118,10 +107,11 @@ do { \
  * that can cause faults and scheduling migrate into our preempt-protected
  * region.
  */
-#define preempt_disable()		barrier()
+#define preempt_disable()			barrier()
 #define sched_preempt_enable_no_resched()	barrier()
-#define preempt_enable_no_resched()	barrier()
-#define preempt_enable()		barrier()
+#define preempt_enable_no_resched()		barrier()
+#define preempt_enable()			barrier()
+#define preempt_check_resched()			do { } while (0)
 
 #define preempt_disable_notrace()		barrier()
 #define preempt_enable_no_resched_notrace()	barrier()
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2403,11 +2403,6 @@ static inline int signal_pending_state(l
 	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
 }
 
-static inline int need_resched(void)
-{
-	return unlikely(test_preempt_need_resched());
-}
-
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -15,7 +15,7 @@
  */
 static inline void pagefault_disable(void)
 {
-	inc_preempt_count();
+	preempt_count_inc();
 	/*
 	 * make sure to have issued the store before a pagefault
 	 * can hit.
@@ -30,11 +30,7 @@ static inline void pagefault_enable(void
 	 * the pagefault handler again.
 	 */
 	barrier();
-	dec_preempt_count();
-	/*
-	 * make sure we do..
-	 */
-	barrier();
+	preempt_count_dec();
 	preempt_check_resched();
 }
 
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -111,7 +111,7 @@ void context_tracking_user_enter(void)
  * instead of preempt_schedule() to exit user context if needed before
  * calling the scheduler.
  */
-void __sched notrace preempt_schedule_context(void)
+asmlinkage void __sched notrace preempt_schedule_context(void)
 {
 	enum ctx_state prev_ctx;
 
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2239,7 +2239,7 @@ notrace unsigned long get_parent_ip(unsi
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
-void __kprobes add_preempt_count(int val)
+void __kprobes preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2248,7 +2248,7 @@ void __kprobes add_preempt_count(int val
 	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
 		return;
 #endif
-	*preempt_count_ptr() += val;
+	__preempt_count_add(val);
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Spinlock count overflowing soon?
@@ -2259,9 +2259,9 @@ void __kprobes add_preempt_count(int val
 	if (preempt_count() == val)
 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 }
-EXPORT_SYMBOL(add_preempt_count);
+EXPORT_SYMBOL(preempt_count_add);
 
-void __kprobes sub_preempt_count(int val)
+void __kprobes preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2279,9 +2279,9 @@ void __kprobes sub_preempt_count(int val
 
 	if (preempt_count() == val)
 		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
-	*preempt_count_ptr() -= val;
+	__preempt_count_sub(val);
 }
-EXPORT_SYMBOL(sub_preempt_count);
+EXPORT_SYMBOL(preempt_count_sub);
 
 #endif
 
@@ -2545,9 +2545,9 @@ asmlinkage void __sched notrace preempt_
 		return;
 
 	do {
-		add_preempt_count_notrace(PREEMPT_ACTIVE);
+		__preempt_count_add(PREEMPT_ACTIVE);
 		__schedule();
-		sub_preempt_count_notrace(PREEMPT_ACTIVE);
+		__preempt_count_sub(PREEMPT_ACTIVE);
 
 		/*
 		 * Check again in case we missed a preemption opportunity
@@ -2574,11 +2574,11 @@ asmlinkage void __sched preempt_schedule
 	prev_state = exception_enter();
 
 	do {
-		add_preempt_count(PREEMPT_ACTIVE);
+		__preempt_count_add(PREEMPT_ACTIVE);
 		local_irq_enable();
 		__schedule();
 		local_irq_disable();
-		sub_preempt_count(PREEMPT_ACTIVE);
+		__preempt_count_sub(PREEMPT_ACTIVE);
 
 		/*
 		 * Check again in case we missed a preemption opportunity
@@ -3818,16 +3818,11 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
-static inline int should_resched(void)
-{
-	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
-}
-
 static void __cond_resched(void)
 {
-	add_preempt_count(PREEMPT_ACTIVE);
+	preempt_count_add(PREEMPT_ACTIVE);
 	__schedule();
-	sub_preempt_count(PREEMPT_ACTIVE);
+	preempt_count_sub(PREEMPT_ACTIVE);
 }
 
 int __sched _cond_resched(void)
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -100,7 +100,7 @@ static void __local_bh_disable(unsigned
 
 	raw_local_irq_save(flags);
 	/*
-	 * The preempt tracer hooks into add_preempt_count and will break
+	 * The preempt tracer hooks into preempt_count_add and will break
 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
 	 * is set and before current->softirq_enabled is cleared.
 	 * We must manually increment preempt_count here and manually
@@ -120,7 +120,7 @@ static void __local_bh_disable(unsigned
 #else /* !CONFIG_TRACE_IRQFLAGS */
 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
 {
-	add_preempt_count(cnt);
+	preempt_count_add(cnt);
 	barrier();
 }
 #endif /* CONFIG_TRACE_IRQFLAGS */
@@ -139,7 +139,7 @@ static void __local_bh_enable(unsigned i
 
 	if (softirq_count() == cnt)
 		trace_softirqs_on(_RET_IP_);
-	sub_preempt_count(cnt);
+	preempt_count_sub(cnt);
 }
 
 /*
@@ -169,12 +169,12 @@ static inline void _local_bh_enable_ip(u
 	 * Keep preemption disabled until we are done with
 	 * softirq processing:
  	 */
-	sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
+	preempt_count_sub(SOFTIRQ_DISABLE_OFFSET - 1);
 
 	if (unlikely(!in_interrupt() && local_softirq_pending()))
 		do_softirq();
 
-	dec_preempt_count();
+	preempt_count_dec();
 #ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_enable();
 #endif
@@ -360,7 +360,7 @@ void irq_exit(void)
 
 	account_irq_exit_time(current);
 	trace_hardirq_exit();
-	sub_preempt_count(HARDIRQ_OFFSET);
+	preempt_count_sub(HARDIRQ_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 10/11] sched, x86: Provide a per-cpu preempt_count implementation
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (8 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 09/11] sched: Extract the basic add/sub preempt_count modifiers Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17  9:10 ` [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call Peter Zijlstra
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-x86-per-cpu-preempt_count.patch --]
[-- Type: text/plain, Size: 8120 bytes --]

Convert x86 to use a per-cpu preemption count. The reason for doing so
is that accessing per-cpu variables is a lot cheaper than accessing
thread_info variables.

We still need to save/restore the actual preemption count due to
PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the
same cache-line as the other hot __switch_to() variables such as
current_task.

Also rename thread_info::preempt_count to ensure nobody is
'accidentally' still poking at it.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/Kbuild        |    1 
 arch/x86/include/asm/preempt.h     |   98 +++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/thread_info.h |    5 -
 arch/x86/kernel/asm-offsets.c      |    1 
 arch/x86/kernel/cpu/common.c       |    5 +
 arch/x86/kernel/entry_32.S         |    7 --
 arch/x86/kernel/entry_64.S         |    4 -
 arch/x86/kernel/process_32.c       |   10 +++
 arch/x86/kernel/process_64.c       |   10 +++
 9 files changed, 128 insertions(+), 13 deletions(-)

--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,4 +5,3 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
-generic-y += preempt.h
--- /dev/null
+++ b/arch/x86/include/asm/preempt.h
@@ -0,0 +1,98 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <asm/rmwcc.h>
+#include <asm/percpu.h>
+#include <linux/thread_info.h>
+
+DECLARE_PER_CPU(int, __preempt_count);
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
+{
+	return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &__raw_get_cpu_var(__preempt_count);
+}
+
+/*
+ * must be macros to avoid header recursion hell
+ */
+#define task_preempt_count(p) \
+	(task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
+
+#define init_task_preempt_count(p) do { \
+	task_thread_info(p)->saved_preempt_count = 1 | PREEMPT_NEED_RESCHED; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+	task_thread_info(p)->saved_preempt_count = 0; \
+	per_cpu(__preempt_count, (cpu)) = 0; \
+} while (0)
+
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	__this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	__this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
+}
+
+/*
+ * The various preempt_count add/sub methods
+ */
+
+static __always_inline void __preempt_count_add(int val)
+{
+	__this_cpu_add_4(__preempt_count, val);
+}
+
+static __always_inline void __preempt_count_sub(int val)
+{
+	__this_cpu_add_4(__preempt_count, -val);
+}
+
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+}
+
+/*
+ * Returns true when we need to resched -- even if we can not.
+ */
+static __always_inline bool need_resched(void)
+{
+	return unlikely(test_preempt_need_resched());
+}
+
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
+static __always_inline bool should_resched(void)
+{
+	return unlikely(!__this_cpu_read_4(__preempt_count));
+}
+
+#endif /* __ASM_PREEMPT_H */
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -28,8 +28,7 @@ struct thread_info {
 	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
-	int			preempt_count;	/* 0 => preemptable,
-						   <0 => BUG */
+	int			saved_preempt_count;
 	mm_segment_t		addr_limit;
 	struct restart_block    restart_block;
 	void __user		*sysenter_return;
@@ -49,7 +48,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.saved_preempt_count = INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,7 +32,6 @@ void common(void) {
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_status, thread_info, status);
 	OFFSET(TI_addr_limit, thread_info, addr_limit);
-	OFFSET(TI_preempt_count, thread_info, preempt_count);
 
 	BLANK();
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
 
 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
+DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
+
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
 /*
@@ -1169,6 +1172,8 @@ void debug_stack_reset(void)
 
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -362,12 +362,9 @@ END(ret_from_exception)
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
-	jnz restore_all
 need_resched:
-	movl TI_flags(%ebp), %ecx	# need_resched set ?
-	testb $_TIF_NEED_RESCHED, %cl
-	jz restore_all
+	cmpl $0,PER_CPU_VAR(__preempt_count)
+	jnz restore_all
 	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1103,10 +1103,8 @@ ENTRY(native_iret)
 	/* Returning to kernel space. Check if we need preemption */
 	/* rcx:	 threadinfo. interrupts off. */
 ENTRY(retint_kernel)
-	cmpl $0,TI_preempt_count(%rcx)
+	cmpl $0,PER_CPU_VAR(__preempt_count)
 	jnz  retint_restore_args
-	bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
-	jnc  retint_restore_args
 	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
 	call preempt_schedule_irq
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -291,6 +291,16 @@ __switch_to(struct task_struct *prev_p,
 	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
 		set_iopl_mask(next->iopl);
 
+#ifdef CONFIG_PREEMPT_COUNT
+	/*
+	 * If it were not for PREEMPT_ACTIVE we could guarantee that the
+	 * preempt_count of all tasks was equal here and this would not be
+	 * needed.
+	 */
+	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
+	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+#endif
+
 	/*
 	 * Now maybe handle debug registers and/or IO bitmaps
 	 */
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -363,6 +363,16 @@ __switch_to(struct task_struct *prev_p,
 	this_cpu_write(old_rsp, next->usersp);
 	this_cpu_write(current_task, next_p);
 
+#ifdef CONFIG_PREEMPT_COUNT
+	/*
+	 * If it were not for PREEMPT_ACTIVE we could guarantee that the
+	 * preempt_count of all tasks was equal here and this would not be
+	 * needed.
+	 */
+	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
+	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+#endif
+
 	this_cpu_write(kernel_stack,
 		  (unsigned long)task_stack_page(next_p) +
 		  THREAD_SIZE - KERNEL_STACK_OFFSET);



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (9 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 10/11] sched, x86: Provide a per-cpu preempt_count implementation Peter Zijlstra
@ 2013-09-17  9:10 ` Peter Zijlstra
  2013-09-17 20:23   ` Peter Zijlstra
  2013-09-17 10:53 ` [PATCH 00/11] preempt_count rework -v3 Ingo Molnar
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
  12 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17  9:10 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Peter Zijlstra

[-- Attachment #1: peterz-x86-opt-call.patch --]
[-- Type: text/plain, Size: 4962 bytes --]

Remove the bloat of the C calling convention out of the
preempt_enable() sites by creating an ASM wrapper which allows us to
do an asm("call ___preempt_schedule") instead.

calling.h bits by Andi Kleen

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/calling.h |   50 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/preempt.h |    8 ++++++
 arch/x86/kernel/Makefile       |    2 +
 arch/x86/kernel/preempt.S      |   25 ++++++++++++++++++++
 include/asm-generic/preempt.h  |   10 ++++++++
 include/linux/preempt.h        |   13 ++++------
 6 files changed, 100 insertions(+), 8 deletions(-)

--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -48,6 +48,8 @@ For 32-bit we have the following convent
 
 #include <asm/dwarf2.h>
 
+#ifdef CONFIG_X86_64
+
 /*
  * 64-bit system call stack frame layout defines and helpers,
  * for assembly code:
@@ -192,3 +194,51 @@ For 32-bit we have the following convent
 	.macro icebp
 	.byte 0xf1
 	.endm
+
+#else /* CONFIG_X86_64 */
+
+/*
+ * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
+ * are different from the entry_32.S versions in not changing the segment
+ * registers. So only suitable for in kernel use, not when transitioning
+ * from or to user space. The resulting stack frame is not a standard
+ * pt_regs frame. The main use case is calling C code from assembler
+ * when all the registers need to be preserved.
+ */
+
+	.macro SAVE_ALL
+	pushl_cfi %eax
+	CFI_REL_OFFSET eax, 0
+	pushl_cfi %ebp
+	CFI_REL_OFFSET ebp, 0
+	pushl_cfi %edi
+	CFI_REL_OFFSET edi, 0
+	pushl_cfi %esi
+	CFI_REL_OFFSET esi, 0
+	pushl_cfi %edx
+	CFI_REL_OFFSET edx, 0
+	pushl_cfi %ecx
+	CFI_REL_OFFSET ecx, 0
+	pushl_cfi %ebx
+	CFI_REL_OFFSET ebx, 0
+	.endm
+
+	.macro RESTORE_ALL
+	popl_cfi %ebx
+	CFI_RESTORE ebx
+	popl_cfi %ecx
+	CFI_RESTORE ecx
+	popl_cfi %edx
+	CFI_RESTORE edx
+	popl_cfi %esi
+	CFI_RESTORE esi
+	popl_cfi %edi
+	CFI_RESTORE edi
+	popl_cfi %ebp
+	CFI_RESTORE ebp
+	popl_cfi %eax
+	CFI_RESTORE eax
+	.endm
+
+#endif /* CONFIG_X86_64 */
+
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -99,4 +99,12 @@ static __always_inline bool should_resch
 	return unlikely(!__this_cpu_read_4(__preempt_count));
 }
 
+#ifdef CONFIG_PREEMPT
+#define __preempt_schedule() asm ("call ___preempt_schedule")
+
+#ifdef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
+#endif
+#endif /* CONFIG_PREEMPT */
+
 #endif /* __ASM_PREEMPT_H */
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,6 +36,8 @@ obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
 
+obj-$(CONFIG_PREEMPT)	+= preempt.o
+
 obj-y				+= process.o
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
--- /dev/null
+++ b/arch/x86/kernel/preempt.S
@@ -0,0 +1,25 @@
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/asm.h>
+#include <asm/calling.h>
+
+ENTRY(___preempt_schedule)
+	CFI_STARTPROC
+	SAVE_ALL
+	call preempt_schedule
+	RESTORE_ALL
+	ret
+	CFI_ENDPROC
+
+#ifdef CONFIG_CONTEXT_TRACKING
+
+ENTRY(___preempt_schedule_context)
+	CFI_STARTPROC
+	SAVE_ALL
+	call preempt_schedule_context
+	RESTORE_ALL
+	ret
+	CFI_ENDPROC
+
+#endif
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -90,4 +90,14 @@ static __always_inline bool should_resch
 	return unlikely(!*preempt_count_ptr());
 }
 
+#ifdef CONFIG_PREEMPT
+extern asmlinkage void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern asmlinkage void preempt_schedule_context(void);
+#define __preempt_schedule_context() preempt_schedule_context()
+#endif
+#endif /* CONFIG_PREEMPT */
+
 #endif /* __ASM_PREEMPT_H */
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -50,18 +50,17 @@ do { \
 #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
 #ifdef CONFIG_PREEMPT
-asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
 	barrier(); \
 	if (unlikely(preempt_count_dec_and_test())) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #define preempt_check_resched() \
 do { \
 	if (should_resched()) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #else
@@ -83,17 +82,15 @@ do { \
 
 #ifdef CONFIG_PREEMPT
 
-#ifdef CONFIG_CONTEXT_TRACKING
-asmlinkage void preempt_schedule_context(void);
-#else
-#define preempt_schedule_context() preempt_schedule()
+#ifndef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() __preempt_schedule()
 #endif
 
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
 	if (unlikely(__preempt_count_dec_and_test())) \
-		preempt_schedule_context(); \
+		__preempt_schedule_context(); \
 } while (0)
 #else
 #define preempt_enable_notrace() preempt_enable_no_resched_notrace()



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 00/11] preempt_count rework -v3
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (10 preceding siblings ...)
  2013-09-17  9:10 ` [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call Peter Zijlstra
@ 2013-09-17 10:53 ` Ingo Molnar
  2013-09-17 11:22   ` Peter Zijlstra
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
  12 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2013-09-17 10:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Thomas Gleixner, Arjan van de Ven, Frederic Weisbecker,
	linux-kernel, linux-arch


* Peter Zijlstra <peterz@infradead.org> wrote:

> These patches optimize preempt_enable by firstly folding the preempt and
> need_resched tests into one -- this should work for all architectures. And
> secondly by providing per-arch preempt_count implementations; with x86 using
> per-cpu preempt_count for fastest access.
> 
> These patches have been boot tested on CONFIG_PREEMPT=y x86_64 and survive
> building a x86_64-defconfig kernel.
> 
>    text    data     bss     filename
> 11387014  1454776 1187840 defconfig-build/vmlinux.before
> 11352294  1454776 1187840 defconfig-build/vmlinux.after

That's a 0.3% size improvement (and most of the improvement is in 
hotpaths), despite GCC is being somewhat stupid about not allowing us to 
mark asm goto targets as cold paths and thus causes some unnecessary 
register shuffling in some cases, right?

Not bad IMO.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 00/11] preempt_count rework -v3
  2013-09-17 10:53 ` [PATCH 00/11] preempt_count rework -v3 Ingo Molnar
@ 2013-09-17 11:22   ` Peter Zijlstra
  0 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17 11:22 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Thomas Gleixner, Arjan van de Ven, Frederic Weisbecker,
	linux-kernel, linux-arch

On Tue, Sep 17, 2013 at 12:53:44PM +0200, Ingo Molnar wrote:
> 
> * Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > These patches optimize preempt_enable by firstly folding the preempt and
> > need_resched tests into one -- this should work for all architectures. And
> > secondly by providing per-arch preempt_count implementations; with x86 using
> > per-cpu preempt_count for fastest access.
> > 
> > These patches have been boot tested on CONFIG_PREEMPT=y x86_64 and survive
> > building a x86_64-defconfig kernel.
> > 
> >    text    data     bss     filename
> > 11387014  1454776 1187840 defconfig-build/vmlinux.before
> > 11352294  1454776 1187840 defconfig-build/vmlinux.after
> 
> That's a 0.3% size improvement (and most of the improvement is in 
> hotpaths), despite GCC is being somewhat stupid about not allowing us to 
> mark asm goto targets as cold paths and thus causes some unnecessary 
> register shuffling in some cases, right?

I'm not entire sure where the bloat in 1/11 comes from; several
functions look like they avoid using stack variables for using more
registers which create more push/pop on entry/exit paths. Others I'm not
entirely sure of what happens with.

But it does look like the unlikely() thing still works, even with the
asm goto, you'll note that the call to schedule_preempt is out-of-line.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/11] sched, rcu: Make RCU use resched_cpu()
  2013-09-17  9:10 ` [PATCH 02/11] sched, rcu: Make RCU use resched_cpu() Peter Zijlstra
@ 2013-09-17 14:40   ` Peter Zijlstra
  2013-09-23 16:55     ` Paul E. McKenney
  0 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17 14:40 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch,
	Paul McKenney, Thomas Meyer

Thomas Meyer reported a UP build fail, should be fixed.

---
Subject: sched, rcu: Make RCU use resched_cpu()
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue Sep 17 09:30:55 CEST 2013

We're going to deprecate and remove set_need_resched() for it will do
the wrong thing. Make an exception for RCU and allow it to use
resched_cpu() which will do the right thing.

Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 kernel/rcutree.c    |   15 ++++++++++++++-
 kernel/sched/core.c |   10 ++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -898,6 +898,12 @@ static void print_other_cpu_stall(struct
 	force_quiescent_state(rsp);  /* Kick them all. */
 }
 
+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
 static void print_cpu_stall(struct rcu_state *rsp)
 {
 	int cpu;
@@ -927,7 +933,14 @@ static void print_cpu_stall(struct rcu_s
 				     3 * rcu_jiffies_till_stall_check() + 3;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
-	set_need_resched();  /* kick ourselves to get things going. */
+	/*
+	 * Attempt to revive the RCU machinery by forcing a context switch.
+	 *
+	 * A context switch would normally allow the RCU state machine to make
+	 * progress and it could be we're stuck in kernel space without context
+	 * switches for an entirely unreasonable amount of time.
+	 */
+	resched_cpu(smp_processor_id());
 }
 
 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -513,12 +513,11 @@ static inline void init_hrtick(void)
  * might also involve a cross-CPU call to trigger the scheduler on
  * the target CPU.
  */
-#ifdef CONFIG_SMP
 void resched_task(struct task_struct *p)
 {
 	int cpu;
 
-	assert_raw_spin_locked(&task_rq(p)->lock);
+	lockdep_assert_held(&task_rq(p)->lock);
 
 	if (test_tsk_need_resched(p))
 		return;
@@ -546,6 +545,7 @@ void resched_cpu(int cpu)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
+#ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * In the semi idle case, use the nearest busy cpu for migrating timers
@@ -693,12 +693,6 @@ void sched_avg_update(struct rq *rq)
 	}
 }
 
-#else /* !CONFIG_SMP */
-void resched_task(struct task_struct *p)
-{
-	assert_raw_spin_locked(&task_rq(p)->lock);
-	set_tsk_need_resched(p);
-}
 #endif /* CONFIG_SMP */
 
 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 0/6] Make all preempt_count related constants generic
  2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
                   ` (11 preceding siblings ...)
  2013-09-17 10:53 ` [PATCH 00/11] preempt_count rework -v3 Ingo Molnar
@ 2013-09-17 18:53 ` Thomas Gleixner
  2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
                     ` (5 more replies)
  12 siblings, 6 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker

There is no point in having PREEMPT_ACTIVE and HARDIRQ_BITS defined by
an architecture.

The PREEMPT_ACTIVE bit manipulation in the low level entry code is
historical and has been replaced by preempt_schedule_irq().

The HARDIRQ_BITS constant could be 1 in theory because we run all
interrupt handlers with interrupts disabled nowadays. Though we have
some palaeontolical drivers (IDE) which still enable interrupts in the
interrupt handler, so we need to allow at least some limited nesting.

This series converts the remaining low level PREEMPT_ACTIVE users to
utilize preempt_schedule_irq() and moves the PREEMPT_ACTIVE and
HARDIRQ_BITS defines to the core code.

I stumbled over this historical left overs while trying to implement a
lazy interrupt disable scheme (which still has some rough edges due to
completely braindamaged hardware).

Peter, it would be nice if you could pick that up into your
preempt/need_resched series.

Thanks,

	tglx
---
 arch/alpha/include/asm/thread_info.h      |    2 -
 arch/arc/include/asm/thread_info.h        |    2 -
 arch/arm/include/asm/thread_info.h        |    6 ----
 arch/arm64/include/asm/thread_info.h      |    6 ----
 arch/avr32/include/asm/thread_info.h      |    2 -
 arch/blackfin/include/asm/hardirq.h       |    3 --
 arch/blackfin/include/asm/thread_info.h   |    2 -
 arch/c6x/include/asm/thread_info.h        |    2 -
 arch/cris/include/asm/hardirq.h           |   12 --------
 arch/cris/include/asm/thread_info.h       |    2 -
 arch/frv/include/asm/thread_info.h        |    2 -
 arch/h8300/include/asm/hardirq.h          |   12 --------
 arch/h8300/include/asm/thread_info.h      |    2 -
 arch/h8300/kernel/entry.S                 |    6 ----
 arch/hexagon/include/asm/thread_info.h    |    4 --
 arch/ia64/include/asm/thread_info.h       |    3 --
 arch/ia64/kernel/entry.S                  |   15 ----------
 arch/m32r/include/asm/hardirq.h           |   16 -----------
 arch/m32r/include/asm/thread_info.h       |    2 -
 arch/m32r/kernel/entry.S                  |    8 -----
 arch/m68k/include/asm/hardirq.h           |   11 --------
 arch/m68k/include/asm/thread_info.h       |    2 -
 arch/metag/include/asm/thread_info.h      |    2 -
 arch/microblaze/include/asm/thread_info.h |    2 -
 arch/mips/include/asm/thread_info.h       |    2 -
 arch/mn10300/include/asm/thread_info.h    |    2 -
 arch/parisc/include/asm/thread_info.h     |    3 --
 arch/powerpc/include/asm/thread_info.h    |    2 -
 arch/s390/include/asm/hardirq.h           |    2 -
 arch/s390/include/asm/thread_info.h       |    2 -
 arch/score/include/asm/thread_info.h      |    2 -
 arch/sh/include/asm/thread_info.h         |    2 -
 arch/sh/kernel/entry-common.S             |    6 +---
 arch/sparc/include/asm/hardirq_32.h       |    1 
 arch/sparc/include/asm/hardirq_64.h       |    2 -
 arch/sparc/include/asm/thread_info_32.h   |    2 -
 arch/sparc/include/asm/thread_info_64.h   |    2 -
 arch/sparc/kernel/rtrap_64.S              |    5 ---
 arch/tile/include/asm/hardirq.h           |    2 -
 arch/tile/include/asm/thread_info.h       |    2 -
 arch/um/include/asm/thread_info.h         |    2 -
 arch/unicore32/include/asm/thread_info.h  |    6 ----
 arch/x86/include/asm/thread_info.h        |    2 -
 arch/xtensa/include/asm/thread_info.h     |    2 -
 include/linux/preempt_mask.h              |   41 ++++++++----------------------
 45 files changed, 17 insertions(+), 201 deletions(-)


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
@ 2013-09-17 18:53   ` Thomas Gleixner
  2013-09-17 20:00     ` Geert Uytterhoeven
  2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  2013-09-17 18:53   ` [patch 2/6] h8300: Use schedule_preempt_irq Thomas Gleixner
                     ` (4 subsequent siblings)
  5 siblings, 2 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker

[-- Attachment #1: hardirqs-remove-arch-specific-HARDIRQ_BITS.patch --]
[-- Type: text/plain, Size: 6999 bytes --]

There is no reason for per arch hardirq bits. Make them all generic

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/blackfin/include/asm/hardirq.h |    3 ---
 arch/cris/include/asm/hardirq.h     |   12 ------------
 arch/h8300/include/asm/hardirq.h    |   12 ------------
 arch/m32r/include/asm/hardirq.h     |   16 ----------------
 arch/m68k/include/asm/hardirq.h     |   11 -----------
 arch/s390/include/asm/hardirq.h     |    2 --
 arch/sparc/include/asm/hardirq_32.h |    1 -
 arch/sparc/include/asm/hardirq_64.h |    2 --
 arch/tile/include/asm/hardirq.h     |    2 --
 include/linux/preempt_mask.h        |   30 ++++++++----------------------
 10 files changed, 8 insertions(+), 83 deletions(-)

Index: linux-2.6/arch/blackfin/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/hardirq.h
+++ linux-2.6/arch/blackfin/include/asm/hardirq.h
@@ -12,9 +12,6 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
-/* Define until common code gets sane defaults */
-#define HARDIRQ_BITS 9
-
 #include <asm-generic/hardirq.h>
 
 #endif
Index: linux-2.6/arch/cris/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/hardirq.h
+++ linux-2.6/arch/cris/include/asm/hardirq.h
@@ -2,18 +2,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
Index: linux-2.6/arch/h8300/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/hardirq.h
+++ linux-2.6/arch/h8300/include/asm/hardirq.h
@@ -2,18 +2,6 @@
 #define __H8300_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif
Index: linux-2.6/arch/m32r/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/m32r/include/asm/hardirq.h
+++ linux-2.6/arch/m32r/include/asm/hardirq.h
@@ -3,22 +3,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#if NR_IRQS > 256
-#define HARDIRQ_BITS	9
-#else
-#define HARDIRQ_BITS	8
-#endif
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
Index: linux-2.6/arch/m68k/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/hardirq.h
+++ linux-2.6/arch/m68k/include/asm/hardirq.h
@@ -5,17 +5,6 @@
 #include <linux/cache.h>
 #include <asm/irq.h>
 
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #ifdef CONFIG_MMU
 
 static inline void ack_bad_irq(unsigned int irq)
Index: linux-2.6/arch/s390/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/hardirq.h
+++ linux-2.6/arch/s390/include/asm/hardirq.h
@@ -18,8 +18,6 @@
 #define __ARCH_HAS_DO_SOFTIRQ
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED
 
-#define HARDIRQ_BITS	8
-
 static inline void ack_bad_irq(unsigned int irq)
 {
 	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
Index: linux-2.6/arch/sparc/include/asm/hardirq_32.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/hardirq_32.h
+++ linux-2.6/arch/sparc/include/asm/hardirq_32.h
@@ -7,7 +7,6 @@
 #ifndef __SPARC_HARDIRQ_H
 #define __SPARC_HARDIRQ_H
 
-#define HARDIRQ_BITS    8
 #include <asm-generic/hardirq.h>
 
 #endif /* __SPARC_HARDIRQ_H */
Index: linux-2.6/arch/sparc/include/asm/hardirq_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/hardirq_64.h
+++ linux-2.6/arch/sparc/include/asm/hardirq_64.h
@@ -14,6 +14,4 @@
 
 void ack_bad_irq(unsigned int irq);
 
-#define HARDIRQ_BITS	8
-
 #endif /* !(__SPARC64_HARDIRQ_H) */
Index: linux-2.6/arch/tile/include/asm/hardirq.h
===================================================================
--- linux-2.6.orig/arch/tile/include/asm/hardirq.h
+++ linux-2.6/arch/tile/include/asm/hardirq.h
@@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat)
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
-#define HARDIRQ_BITS	8
-
 #endif /* _ASM_TILE_HARDIRQ_H */
Index: linux-2.6/include/linux/preempt_mask.h
===================================================================
--- linux-2.6.orig/include/linux/preempt_mask.h
+++ linux-2.6/include/linux/preempt_mask.h
@@ -11,36 +11,22 @@
  * - bits 0-7 are the preemption count (max preemption depth: 256)
  * - bits 8-15 are the softirq count (max # of softirqs: 256)
  *
- * The hardirq count can in theory reach the same as NR_IRQS.
- * In reality, the number of nested IRQS is limited to the stack
- * size as well. For archs with over 1000 IRQS it is not practical
- * to expect that they will all nest. We give a max of 10 bits for
- * hardirq nesting. An arch may choose to give less than 10 bits.
- * m68k expects it to be 8.
- *
- * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
- * - bit 26 is the NMI_MASK
- * - bit 27 is the PREEMPT_ACTIVE flag
+ * The hardirq count could in theory be the same as the number of
+ * interrupts in the system, but we run all interrupt handlers with
+ * interrupts disabled, so we cannot have nesting interrupts. Though
+ * there are a few palaeontologic drivers which reenable interrupts in
+ * the handler, so we need more than one bit here.
  *
  * PREEMPT_MASK: 0x000000ff
  * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x03ff0000
- *     NMI_MASK: 0x04000000
+ * HARDIRQ_MASK: 0x000f0000
+ *     NMI_MASK: 0x00100000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
+#define HARDIRQ_BITS	4
 #define NMI_BITS	1
 
-#define MAX_HARDIRQ_BITS 10
-
-#ifndef HARDIRQ_BITS
-# define HARDIRQ_BITS	MAX_HARDIRQ_BITS
-#endif
-
-#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
-#error HARDIRQ_BITS too high!
-#endif
-
 #define PREEMPT_SHIFT	0
 #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
 #define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 2/6] h8300: Use schedule_preempt_irq
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
  2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
@ 2013-09-17 18:53   ` Thomas Gleixner
  2013-09-20 17:41     ` Guenter Roeck
  2013-09-17 18:53   ` [patch 3/6] m32r: Use preempt_schedule_irq Thomas Gleixner
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Yoshinori Sato, Geert Uytterhoeven

[-- Attachment #1: h8300-use-schedule-preempt-irq.patch --]
[-- Type: text/plain, Size: 889 bytes --]

Use the proper function instead of fiddling with PREEMPT_ACTIVE and
interrupt enable/disable in the low level code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>

---
 arch/h8300/kernel/entry.S |    6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

Index: linux-2.6/arch/h8300/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/h8300/kernel/entry.S
+++ linux-2.6/arch/h8300/kernel/entry.S
@@ -316,13 +316,9 @@ need_resched:
 	beq	restore_all:8
 	mov.b	@(LCCR+1:16,sp),r0l	/* Interrupt Enabled? */
 	bmi	restore_all:8
-	mov.l	#PREEMPT_ACTIVE,er0
-	mov.l	er0,@(TI_PRE_COUNT:16,er4)
-	andc	#0x7f,ccr
 	mov.l	sp,er0
 	jsr	@_set_esp0
-	jsr	@_schedule
-	orc	#0x80,ccr
+	jsr	@_preempt_schedule_irq
 	bra	need_resched:8
 #endif
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 3/6] m32r: Use preempt_schedule_irq
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
  2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
  2013-09-17 18:53   ` [patch 2/6] h8300: Use schedule_preempt_irq Thomas Gleixner
@ 2013-09-17 18:53   ` Thomas Gleixner
  2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Hirokazu Takata

[-- Attachment #1: m32r-use-preempt-schedule-irq.patch --]
[-- Type: text/plain, Size: 890 bytes --]

Use the proper core function instead of fiddling with preempt_active
and interrupt enable in the low level code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: linux-m32r-ja@ml.linux-m32r.org

---
 arch/m32r/kernel/entry.S |    8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

Index: linux-2.6/arch/m32r/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/m32r/kernel/entry.S
+++ linux-2.6/arch/m32r/kernel/entry.S
@@ -182,13 +182,7 @@ need_resched:
 	ld	r4, PSW(sp)		; interrupts off (exception path) ?
 	and3	r4, r4, #0x4000
 	beqz	r4, restore_all
-	LDIMM	(r4, PREEMPT_ACTIVE)
-	st	r4, @(TI_PRE_COUNT, r8)
-	ENABLE_INTERRUPTS(r4)
-	bl	schedule
-	ldi	r4, #0
-	st	r4, @(TI_PRE_COUNT, r8)
-	DISABLE_INTERRUPTS(r4)
+	bl	preempt_schedule_irq
 	bra	need_resched
 #endif
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 4/6] ia64: Use preempt_schedule_irq
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
                     ` (3 preceding siblings ...)
  2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
@ 2013-09-17 18:53   ` Thomas Gleixner
  2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  2013-11-20 19:59     ` [patch 4/6] " Tony Luck
  2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
  5 siblings, 2 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Tony Luck, Fenghua Yu

[-- Attachment #1: ia64-use-preempt-schedule-irq.patch --]
[-- Type: text/plain, Size: 1289 bytes --]

Use the proper core function instead of fiddling with PREEMPT_ACTIVE
and enable/disable interrupts in the low level code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org

---
 arch/ia64/kernel/entry.S |   15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

Index: linux-2.6/arch/ia64/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/ia64/kernel/entry.S
+++ linux-2.6/arch/ia64/kernel/entry.S
@@ -1169,21 +1169,8 @@ skip_rbs_switch:
 .work_pending:
 	tbit.z p6,p0=r31,TIF_NEED_RESCHED	// is resched not needed?
 (p6)	br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
-	;;
-(pKStk) st4 [r20]=r21
-#endif
-	SSM_PSR_I(p0, p6, r2)	// enable interrupts
-	br.call.spnt.many rp=schedule
+	br.call.spnt.many rp=preempt_schedule_irq
 .ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
-	RSM_PSR_I(p0, r2, r20)	// disable interrupts
-	;;
-#ifdef CONFIG_PREEMPT
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
-#endif
 (pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 5/6] sparc: Use preempt_schedule_irq
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
                     ` (2 preceding siblings ...)
  2013-09-17 18:53   ` [patch 3/6] m32r: Use preempt_schedule_irq Thomas Gleixner
@ 2013-09-17 18:53   ` Thomas Gleixner
  2013-09-17 22:54     ` David Miller
  2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
  2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
  5 siblings, 2 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, David S. Miller

[-- Attachment #1: sparc-use-preempt-schedule-irq.patch --]
[-- Type: text/plain, Size: 1038 bytes --]

The low level preemption code fiddles with the PREEMPT_ACTIVE bit for
no reason and calls schedule() with interrupts disabled, which is
wrong to begin with. Remove the PREEMPT_ACTIVE fiddling and call the
proper schedule_preempt_irq() function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org

---
 arch/sparc/kernel/rtrap_64.S |    5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

Index: linux-2.6/arch/sparc/kernel/rtrap_64.S
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/rtrap_64.S
+++ linux-2.6/arch/sparc/kernel/rtrap_64.S
@@ -306,12 +306,9 @@ to_kernel:
 		 nop
 		cmp			%l4, 0
 		bne,pn			%xcc, kern_fpucheck
-		 sethi			%hi(PREEMPT_ACTIVE), %l6
-		stw			%l6, [%g6 + TI_PRE_COUNT]
-		call			schedule
+		call			preempt_schedule_irq
 		 nop
 		ba,pt			%xcc, rtrap
-		 stw			%g0, [%g6 + TI_PRE_COUNT]
 #endif
 kern_fpucheck:	ldub			[%g6 + TI_FPDEPTH], %l5
 		brz,pt			%l5, rt_continue



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch 6/6] preempt: Make PREEMPT_ACTIVE generic
  2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
                     ` (4 preceding siblings ...)
  2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
@ 2013-09-17 18:53   ` Thomas Gleixner
  2013-09-18 10:48     ` Peter Zijlstra
  2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  5 siblings, 2 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 18:53 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker

[-- Attachment #1: preempt-get-rid-of-preempt-active-arch-magic.patch --]
[-- Type: text/plain, Size: 17414 bytes --]

No point in having this bit defined by architecture.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/include/asm/thread_info.h      |    2 --
 arch/arc/include/asm/thread_info.h        |    2 --
 arch/arm/include/asm/thread_info.h        |    6 ------
 arch/arm64/include/asm/thread_info.h      |    6 ------
 arch/avr32/include/asm/thread_info.h      |    2 --
 arch/blackfin/include/asm/thread_info.h   |    2 --
 arch/c6x/include/asm/thread_info.h        |    2 --
 arch/cris/include/asm/thread_info.h       |    2 --
 arch/frv/include/asm/thread_info.h        |    2 --
 arch/h8300/include/asm/thread_info.h      |    2 --
 arch/hexagon/include/asm/thread_info.h    |    4 ----
 arch/ia64/include/asm/thread_info.h       |    3 ---
 arch/m32r/include/asm/thread_info.h       |    2 --
 arch/m68k/include/asm/thread_info.h       |    2 --
 arch/metag/include/asm/thread_info.h      |    2 --
 arch/microblaze/include/asm/thread_info.h |    2 --
 arch/mips/include/asm/thread_info.h       |    2 --
 arch/mn10300/include/asm/thread_info.h    |    2 --
 arch/parisc/include/asm/thread_info.h     |    3 ---
 arch/powerpc/include/asm/thread_info.h    |    2 --
 arch/s390/include/asm/thread_info.h       |    2 --
 arch/score/include/asm/thread_info.h      |    2 --
 arch/sh/include/asm/thread_info.h         |    2 --
 arch/sh/kernel/entry-common.S             |    6 ++----
 arch/sparc/include/asm/thread_info_32.h   |    2 --
 arch/sparc/include/asm/thread_info_64.h   |    2 --
 arch/tile/include/asm/thread_info.h       |    2 --
 arch/um/include/asm/thread_info.h         |    2 --
 arch/unicore32/include/asm/thread_info.h  |    6 ------
 arch/x86/include/asm/thread_info.h        |    2 --
 arch/xtensa/include/asm/thread_info.h     |    2 --
 include/linux/preempt_mask.h              |   15 +++++----------
 32 files changed, 7 insertions(+), 90 deletions(-)

Index: linux-2.6/arch/alpha/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/thread_info.h
+++ linux-2.6/arch/alpha/include/asm/thread_info.h
@@ -58,8 +58,6 @@ register struct thread_info *__current_t
 #define THREAD_SIZE_ORDER 1
 #define THREAD_SIZE (2*PAGE_SIZE)
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags:
  * - these are process state flags and used from assembly
Index: linux-2.6/arch/arc/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/arc/include/asm/thread_info.h
+++ linux-2.6/arch/arc/include/asm/thread_info.h
@@ -80,8 +80,6 @@ static inline __attribute_const__ struct
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE      0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
Index: linux-2.6/arch/arm/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/thread_info.h
+++ linux-2.6/arch/arm/include/asm/thread_info.h
@@ -141,12 +141,6 @@ extern int vfp_restore_user_hwstate(stru
 #endif
 
 /*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
-/*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SYSCAL_AUDIT	- syscall auditing active
Index: linux-2.6/arch/arm64/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/arm64/include/asm/thread_info.h
+++ linux-2.6/arch/arm64/include/asm/thread_info.h
@@ -89,12 +89,6 @@ static inline struct thread_info *curren
 #endif
 
 /*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
-/*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SIGPENDING	- signal pending
Index: linux-2.6/arch/avr32/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/avr32/include/asm/thread_info.h
+++ linux-2.6/arch/avr32/include/asm/thread_info.h
@@ -66,8 +66,6 @@ static inline struct thread_info *curren
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags
  * - these are process state flags that various assembly files may need to access
Index: linux-2.6/arch/blackfin/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/thread_info.h
+++ linux-2.6/arch/blackfin/include/asm/thread_info.h
@@ -88,8 +88,6 @@ static inline struct thread_info *curren
 #define TI_CPU		12
 #define TI_PREEMPT	16
 
-#define	PREEMPT_ACTIVE	0x4000000
-
 /*
  * thread information flag bit numbers
  */
Index: linux-2.6/arch/c6x/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/c6x/include/asm/thread_info.h
+++ linux-2.6/arch/c6x/include/asm/thread_info.h
@@ -84,8 +84,6 @@ struct thread_info *current_thread_info(
 #define put_thread_info(ti)	put_task_struct((ti)->task)
 #endif /* __ASSEMBLY__ */
 
-#define	PREEMPT_ACTIVE	0x10000000
-
 /*
  * thread information flag bit numbers
  * - pending work-to-be-done flags are in LSW
Index: linux-2.6/arch/cris/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/thread_info.h
+++ linux-2.6/arch/cris/include/asm/thread_info.h
@@ -44,8 +44,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
Index: linux-2.6/arch/frv/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/frv/include/asm/thread_info.h
+++ linux-2.6/arch/frv/include/asm/thread_info.h
@@ -52,8 +52,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
Index: linux-2.6/arch/h8300/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/thread_info.h
+++ linux-2.6/arch/h8300/include/asm/thread_info.h
@@ -77,8 +77,6 @@ static inline struct thread_info *curren
 #define TI_CPU		12
 #define TI_PRE_COUNT	16
 
-#define	PREEMPT_ACTIVE	0x4000000
-
 /*
  * thread information flag bit numbers
  */
Index: linux-2.6/arch/hexagon/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/hexagon/include/asm/thread_info.h
+++ linux-2.6/arch/hexagon/include/asm/thread_info.h
@@ -73,10 +73,6 @@ struct thread_info {
 
 #endif  /* __ASSEMBLY__  */
 
-/*  looks like "linux/hardirq.h" uses this.  */
-
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifndef __ASSEMBLY__
 
 #define INIT_THREAD_INFO(tsk)                   \
Index: linux-2.6/arch/ia64/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/thread_info.h
+++ linux-2.6/arch/ia64/include/asm/thread_info.h
@@ -11,9 +11,6 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 
-#define PREEMPT_ACTIVE_BIT 30
-#define PREEMPT_ACTIVE	(1 << PREEMPT_ACTIVE_BIT)
-
 #ifndef __ASSEMBLY__
 
 /*
Index: linux-2.6/arch/m32r/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/m32r/include/asm/thread_info.h
+++ linux-2.6/arch/m32r/include/asm/thread_info.h
@@ -53,8 +53,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define THREAD_SIZE		(PAGE_SIZE << 1)
 #define THREAD_SIZE_ORDER	1
 /*
Index: linux-2.6/arch/m68k/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/thread_info.h
+++ linux-2.6/arch/m68k/include/asm/thread_info.h
@@ -35,8 +35,6 @@ struct thread_info {
 };
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
Index: linux-2.6/arch/metag/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/metag/include/asm/thread_info.h
+++ linux-2.6/arch/metag/include/asm/thread_info.h
@@ -46,8 +46,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SHIFT		12
 #else
Index: linux-2.6/arch/microblaze/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/microblaze/include/asm/thread_info.h
+++ linux-2.6/arch/microblaze/include/asm/thread_info.h
@@ -106,8 +106,6 @@ static inline struct thread_info *curren
 /* thread information allocation */
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may
Index: linux-2.6/arch/mips/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/mips/include/asm/thread_info.h
+++ linux-2.6/arch/mips/include/asm/thread_info.h
@@ -92,8 +92,6 @@ static inline struct thread_info *curren
 
 #define STACK_WARN	(THREAD_SIZE / 8)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
Index: linux-2.6/arch/mn10300/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/mn10300/include/asm/thread_info.h
+++ linux-2.6/arch/mn10300/include/asm/thread_info.h
@@ -16,8 +16,6 @@
 
 #include <asm/page.h>
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SIZE		(4096)
 #define THREAD_SIZE_ORDER	(0)
Index: linux-2.6/arch/parisc/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/parisc/include/asm/thread_info.h
+++ linux-2.6/arch/parisc/include/asm/thread_info.h
@@ -46,9 +46,6 @@ struct thread_info {
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
 
-#define PREEMPT_ACTIVE_BIT	28
-#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
-
 /*
  * thread information flags
  */
Index: linux-2.6/arch/powerpc/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/thread_info.h
+++ linux-2.6/arch/powerpc/include/asm/thread_info.h
@@ -82,8 +82,6 @@ static inline struct thread_info *curren
 
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flag bit numbers
  */
Index: linux-2.6/arch/s390/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/thread_info.h
+++ linux-2.6/arch/s390/include/asm/thread_info.h
@@ -111,6 +111,4 @@ static inline struct thread_info *curren
 #define is_32bit_task()		(1)
 #endif
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #endif /* _ASM_THREAD_INFO_H */
Index: linux-2.6/arch/score/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/score/include/asm/thread_info.h
+++ linux-2.6/arch/score/include/asm/thread_info.h
@@ -72,8 +72,6 @@ register struct thread_info *__current_t
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
Index: linux-2.6/arch/sh/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/thread_info.h
+++ linux-2.6/arch/sh/include/asm/thread_info.h
@@ -41,8 +41,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #if defined(CONFIG_4KSTACKS)
 #define THREAD_SHIFT	12
 #else
Index: linux-2.6/arch/sh/kernel/entry-common.S
===================================================================
--- linux-2.6.orig/arch/sh/kernel/entry-common.S
+++ linux-2.6/arch/sh/kernel/entry-common.S
@@ -108,7 +108,7 @@ need_resched:
 	and	#(0xf0>>1), r0		! interrupts off (exception path)?
 	cmp/eq	#(0xf0>>1), r0
 	bt	noresched
-	mov.l	3f, r0
+	mov.l	1f, r0
 	jsr	@r0			! call preempt_schedule_irq
 	 nop
 	bra	need_resched
@@ -119,9 +119,7 @@ noresched:
 	 nop
 
 	.align 2
-1:	.long	PREEMPT_ACTIVE
-2:	.long	schedule
-3:	.long	preempt_schedule_irq
+1:	.long	preempt_schedule_irq
 #endif
 
 ENTRY(resume_userspace)
Index: linux-2.6/arch/sparc/include/asm/thread_info_32.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/thread_info_32.h
+++ linux-2.6/arch/sparc/include/asm/thread_info_32.h
@@ -105,8 +105,6 @@ register struct thread_info *current_thr
 #define TI_W_SAVED	0x250
 /* #define TI_RESTART_BLOCK 0x25n */ /* Nobody cares */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 /*
  * thread information flag bit numbers
  */
Index: linux-2.6/arch/sparc/include/asm/thread_info_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/thread_info_64.h
+++ linux-2.6/arch/sparc/include/asm/thread_info_64.h
@@ -111,8 +111,6 @@ struct thread_info {
 #define THREAD_SHIFT PAGE_SHIFT
 #endif /* PAGE_SHIFT == 13 */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
Index: linux-2.6/arch/tile/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/tile/include/asm/thread_info.h
+++ linux-2.6/arch/tile/include/asm/thread_info.h
@@ -113,8 +113,6 @@ extern void _cpu_idle(void);
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * Thread information flags that various assembly files may need to access.
  * Keep flags accessed frequently in low bits, particular since it makes
Index: linux-2.6/arch/um/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/thread_info.h
+++ linux-2.6/arch/um/include/asm/thread_info.h
@@ -60,8 +60,6 @@ static inline struct thread_info *curren
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
Index: linux-2.6/arch/unicore32/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/unicore32/include/asm/thread_info.h
+++ linux-2.6/arch/unicore32/include/asm/thread_info.h
@@ -118,12 +118,6 @@ static inline struct thread_info *curren
 #endif
 
 /*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
-/*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SIGPENDING	- signal pending
Index: linux-2.6/arch/x86/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/thread_info.h
+++ linux-2.6/arch/x86/include/asm/thread_info.h
@@ -154,8 +154,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_X86_32
 
 #define STACK_WARN	(THREAD_SIZE/8)
Index: linux-2.6/arch/xtensa/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/xtensa/include/asm/thread_info.h
+++ linux-2.6/arch/xtensa/include/asm/thread_info.h
@@ -76,8 +76,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
Index: linux-2.6/include/linux/preempt_mask.h
===================================================================
--- linux-2.6.orig/include/linux/preempt_mask.h
+++ linux-2.6/include/linux/preempt_mask.h
@@ -17,10 +17,11 @@
  * there are a few palaeontologic drivers which reenable interrupts in
  * the handler, so we need more than one bit here.
  *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x000f0000
- *     NMI_MASK: 0x00100000
+ * PREEMPT_MASK:	0x000000ff
+ * SOFTIRQ_MASK:	0x0000ff00
+ * HARDIRQ_MASK:	0x000f0000
+ *     NMI_MASK:	0x00100000
+ * PREEMPT_ACTIVE:	0x00200000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
@@ -46,15 +47,9 @@
 
 #define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
 
-#ifndef PREEMPT_ACTIVE
 #define PREEMPT_ACTIVE_BITS	1
 #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
 #define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
 
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
@ 2013-09-17 20:00     ` Geert Uytterhoeven
  2013-09-17 21:24       ` Thomas Gleixner
  2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  1 sibling, 1 reply; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-09-17 20:00 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Linux/m68k

On Tue, Sep 17, 2013 at 8:53 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> --- linux-2.6.orig/arch/m68k/include/asm/hardirq.h
> +++ linux-2.6/arch/m68k/include/asm/hardirq.h
> @@ -5,17 +5,6 @@
>  #include <linux/cache.h>
>  #include <asm/irq.h>
>
> -#define HARDIRQ_BITS   8

> --- linux-2.6.orig/include/linux/preempt_mask.h
> +++ linux-2.6/include/linux/preempt_mask.h
> @@ -11,36 +11,22 @@
>   * - bits 0-7 are the preemption count (max preemption depth: 256)
>   * - bits 8-15 are the softirq count (max # of softirqs: 256)
>   *
> - * The hardirq count can in theory reach the same as NR_IRQS.
> - * In reality, the number of nested IRQS is limited to the stack
> - * size as well. For archs with over 1000 IRQS it is not practical
> - * to expect that they will all nest. We give a max of 10 bits for
> - * hardirq nesting. An arch may choose to give less than 10 bits.
> - * m68k expects it to be 8.

m68k needs some changes in arch/m68k/kernel/entry.S, cfr. this check
in arch/m68k/kernel/ints.c:

        /* assembly irq entry code relies on this... */
        if (HARDIRQ_MASK != 0x00ff0000) {
                extern void hardirq_mask_is_broken(void);
                hardirq_mask_is_broken();
        }

Haven't looked into the details yet...

> - * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
> - * - bit 26 is the NMI_MASK
> - * - bit 27 is the PREEMPT_ACTIVE flag
> + * The hardirq count could in theory be the same as the number of
> + * interrupts in the system, but we run all interrupt handlers with
> + * interrupts disabled, so we cannot have nesting interrupts. Though
> + * there are a few palaeontologic drivers which reenable interrupts in
> + * the handler, so we need more than one bit here.
>   *
>   * PREEMPT_MASK: 0x000000ff
>   * SOFTIRQ_MASK: 0x0000ff00
> - * HARDIRQ_MASK: 0x03ff0000
> - *     NMI_MASK: 0x04000000
> + * HARDIRQ_MASK: 0x000f0000
> + *     NMI_MASK: 0x00100000
>   */
>  #define PREEMPT_BITS   8
>  #define SOFTIRQ_BITS   8
> +#define HARDIRQ_BITS   4
>  #define NMI_BITS       1

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call
  2013-09-17  9:10 ` [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call Peter Zijlstra
@ 2013-09-17 20:23   ` Peter Zijlstra
  0 siblings, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-17 20:23 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Andi Kleen, Peter Anvin, Mike Galbraith, Thomas Gleixner,
	Arjan van de Ven, Frederic Weisbecker, linux-kernel, linux-arch

Another build error by Thomas Meyer.. we need to export the asm symbols.

---
Subject: sched, x86: Optimize the preempt_schedule() call
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed Aug 14 14:51:00 CEST 2013

Remove the bloat of the C calling convention out of the
preempt_enable() sites by creating an ASM wrapper which allows us to
do an asm("call ___preempt_schedule") instead.

calling.h bits by Andi Kleen

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/calling.h   |   50 +++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/preempt.h   |    8 ++++++
 arch/x86/kernel/Makefile         |    2 +
 arch/x86/kernel/i386_ksyms_32.c  |    7 +++++
 arch/x86/kernel/preempt.S        |   25 +++++++++++++++++++
 arch/x86/kernel/x8664_ksyms_64.c |    7 +++++
 include/asm-generic/preempt.h    |   10 +++++++
 include/linux/preempt.h          |   13 +++-------
 8 files changed, 114 insertions(+), 8 deletions(-)

--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -48,6 +48,8 @@ For 32-bit we have the following convent
 
 #include <asm/dwarf2.h>
 
+#ifdef CONFIG_X86_64
+
 /*
  * 64-bit system call stack frame layout defines and helpers,
  * for assembly code:
@@ -192,3 +194,51 @@ For 32-bit we have the following convent
 	.macro icebp
 	.byte 0xf1
 	.endm
+
+#else /* CONFIG_X86_64 */
+
+/*
+ * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
+ * are different from the entry_32.S versions in not changing the segment
+ * registers. So only suitable for in kernel use, not when transitioning
+ * from or to user space. The resulting stack frame is not a standard
+ * pt_regs frame. The main use case is calling C code from assembler
+ * when all the registers need to be preserved.
+ */
+
+	.macro SAVE_ALL
+	pushl_cfi %eax
+	CFI_REL_OFFSET eax, 0
+	pushl_cfi %ebp
+	CFI_REL_OFFSET ebp, 0
+	pushl_cfi %edi
+	CFI_REL_OFFSET edi, 0
+	pushl_cfi %esi
+	CFI_REL_OFFSET esi, 0
+	pushl_cfi %edx
+	CFI_REL_OFFSET edx, 0
+	pushl_cfi %ecx
+	CFI_REL_OFFSET ecx, 0
+	pushl_cfi %ebx
+	CFI_REL_OFFSET ebx, 0
+	.endm
+
+	.macro RESTORE_ALL
+	popl_cfi %ebx
+	CFI_RESTORE ebx
+	popl_cfi %ecx
+	CFI_RESTORE ecx
+	popl_cfi %edx
+	CFI_RESTORE edx
+	popl_cfi %esi
+	CFI_RESTORE esi
+	popl_cfi %edi
+	CFI_RESTORE edi
+	popl_cfi %ebp
+	CFI_RESTORE ebp
+	popl_cfi %eax
+	CFI_RESTORE eax
+	.endm
+
+#endif /* CONFIG_X86_64 */
+
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -95,4 +95,12 @@ static __always_inline bool should_resch
 	return unlikely(!__this_cpu_read_4(__preempt_count));
 }
 
+#ifdef CONFIG_PREEMPT
+#define __preempt_schedule() asm ("call ___preempt_schedule")
+
+#ifdef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
+#endif
+#endif /* CONFIG_PREEMPT */
+
 #endif /* __ASM_PREEMPT_H */
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,6 +36,8 @@ obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
 
+obj-$(CONFIG_PREEMPT)	+= preempt.o
+
 obj-y				+= process.o
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr);
 
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
--- /dev/null
+++ b/arch/x86/kernel/preempt.S
@@ -0,0 +1,25 @@
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/asm.h>
+#include <asm/calling.h>
+
+ENTRY(___preempt_schedule)
+	CFI_STARTPROC
+	SAVE_ALL
+	call preempt_schedule
+	RESTORE_ALL
+	ret
+	CFI_ENDPROC
+
+#ifdef CONFIG_CONTEXT_TRACKING
+
+ENTRY(___preempt_schedule_context)
+	CFI_STARTPROC
+	SAVE_ALL
+	call preempt_schedule_context
+	RESTORE_ALL
+	ret
+	CFI_ENDPROC
+
+#endif
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page);
 #ifndef CONFIG_PARAVIRT
 EXPORT_SYMBOL(native_load_gs_index);
 #endif
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -90,4 +90,14 @@ static __always_inline bool should_resch
 	return unlikely(!*preempt_count_ptr());
 }
 
+#ifdef CONFIG_PREEMPT
+extern asmlinkage void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern asmlinkage void preempt_schedule_context(void);
+#define __preempt_schedule_context() preempt_schedule_context()
+#endif
+#endif /* CONFIG_PREEMPT */
+
 #endif /* __ASM_PREEMPT_H */
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -50,18 +50,17 @@ do { \
 #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
 #ifdef CONFIG_PREEMPT
-asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
 	barrier(); \
 	if (unlikely(preempt_count_dec_and_test())) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #define preempt_check_resched() \
 do { \
 	if (should_resched()) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #else
@@ -83,17 +82,15 @@ do { \
 
 #ifdef CONFIG_PREEMPT
 
-#ifdef CONFIG_CONTEXT_TRACKING
-asmlinkage void preempt_schedule_context(void);
-#else
-#define preempt_schedule_context() preempt_schedule()
+#ifndef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() __preempt_schedule()
 #endif
 
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
 	if (unlikely(__preempt_count_dec_and_test())) \
-		preempt_schedule_context(); \
+		__preempt_schedule_context(); \
 } while (0)
 #else
 #define preempt_enable_notrace() preempt_enable_no_resched_notrace()


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-17 20:00     ` Geert Uytterhoeven
@ 2013-09-17 21:24       ` Thomas Gleixner
  2013-09-18 14:06         ` Thomas Gleixner
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 21:24 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Linux/m68k

On Tue, 17 Sep 2013, Geert Uytterhoeven wrote:

> On Tue, Sep 17, 2013 at 8:53 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > --- linux-2.6.orig/arch/m68k/include/asm/hardirq.h
> > +++ linux-2.6/arch/m68k/include/asm/hardirq.h
> > @@ -5,17 +5,6 @@
> >  #include <linux/cache.h>
> >  #include <asm/irq.h>
> >
> > -#define HARDIRQ_BITS   8
> 
> > --- linux-2.6.orig/include/linux/preempt_mask.h
> > +++ linux-2.6/include/linux/preempt_mask.h
> > @@ -11,36 +11,22 @@
> >   * - bits 0-7 are the preemption count (max preemption depth: 256)
> >   * - bits 8-15 are the softirq count (max # of softirqs: 256)
> >   *
> > - * The hardirq count can in theory reach the same as NR_IRQS.
> > - * In reality, the number of nested IRQS is limited to the stack
> > - * size as well. For archs with over 1000 IRQS it is not practical
> > - * to expect that they will all nest. We give a max of 10 bits for
> > - * hardirq nesting. An arch may choose to give less than 10 bits.
> > - * m68k expects it to be 8.
> 
> m68k needs some changes in arch/m68k/kernel/entry.S, cfr. this check
> in arch/m68k/kernel/ints.c:
> 
>         /* assembly irq entry code relies on this... */
>         if (HARDIRQ_MASK != 0x00ff0000) {
>                 extern void hardirq_mask_is_broken(void);
>                 hardirq_mask_is_broken();
>         }
> 
> Haven't looked into the details yet...

Whee. Did not notice that one. Though I can't find anything
interesting in the low level entry code... Looks like some more
histerical left overs.

Thanks,

	tglx


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 5/6] sparc: Use preempt_schedule_irq
  2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
@ 2013-09-17 22:54     ` David Miller
  2013-09-17 23:23       ` Thomas Gleixner
  2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  1 sibling, 1 reply; 80+ messages in thread
From: David Miller @ 2013-09-17 22:54 UTC (permalink / raw)
  To: tglx
  Cc: linux-kernel, peterz, mingo, linux-arch, torvalds, ak, hpa,
	bitbucket, arjan, fweisbec

From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Sep 2013 18:53:08 -0000

>  		bne,pn			%xcc, kern_fpucheck
> -		 sethi			%hi(PREEMPT_ACTIVE), %l6
> -		stw			%l6, [%g6 + TI_PRE_COUNT]
> -		call			schedule
> +		call			preempt_schedule_irq
>  		 nop

You've put the function call into the delay slot of the branch,
which you don't want to do.

That's, btw, why we indent instructions with an extra space like
that, to emphasize that it's a delay slot of the preceeding
branch.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 5/6] sparc: Use preempt_schedule_irq
  2013-09-17 22:54     ` David Miller
@ 2013-09-17 23:23       ` Thomas Gleixner
  2013-09-18  0:12         ` David Miller
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-17 23:23 UTC (permalink / raw)
  To: David Miller
  Cc: linux-kernel, peterz, mingo, linux-arch, torvalds, ak, hpa,
	bitbucket, arjan, fweisbec

On Tue, 17 Sep 2013, David Miller wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> Date: Tue, 17 Sep 2013 18:53:08 -0000
> 
> >  		bne,pn			%xcc, kern_fpucheck
> > -		 sethi			%hi(PREEMPT_ACTIVE), %l6
> > -		stw			%l6, [%g6 + TI_PRE_COUNT]
> > -		call			schedule
> > +		call			preempt_schedule_irq
> >  		 nop
> 
> You've put the function call into the delay slot of the branch,
> which you don't want to do.
> 
> That's, btw, why we indent instructions with an extra space like
> that, to emphasize that it's a delay slot of the preceeding
> branch.

I knew that I'd get it wrong :) So is adding another nop the right
thing to do ?

 		bne,pn			%xcc, kern_fpucheck
-		 sethi			%hi(PREEMPT_ACTIVE), %l6
-		stw			%l6, [%g6 + TI_PRE_COUNT]
-		call			schedule
+  		 nop
+		call			preempt_schedule_irq
  		 nop
 
Thanks,

	tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 5/6] sparc: Use preempt_schedule_irq
  2013-09-17 23:23       ` Thomas Gleixner
@ 2013-09-18  0:12         ` David Miller
  0 siblings, 0 replies; 80+ messages in thread
From: David Miller @ 2013-09-18  0:12 UTC (permalink / raw)
  To: tglx
  Cc: linux-kernel, peterz, mingo, linux-arch, torvalds, ak, hpa,
	bitbucket, arjan, fweisbec

From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 Sep 2013 01:23:58 +0200 (CEST)

> I knew that I'd get it wrong :) So is adding another nop the right
> thing to do ?
> 
>  		bne,pn			%xcc, kern_fpucheck
> -		 sethi			%hi(PREEMPT_ACTIVE), %l6
> -		stw			%l6, [%g6 + TI_PRE_COUNT]
> -		call			schedule
> +  		 nop
> +		call			preempt_schedule_irq
>   		 nop

Yep, that's perfect:

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 6/6] preempt: Make PREEMPT_ACTIVE generic
  2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
@ 2013-09-18 10:48     ` Peter Zijlstra
  2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-18 10:48 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Ingo Molnar, linux-arch, Linus Torvalds, Andi Kleen,
	Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker

On Tue, Sep 17, 2013 at 06:53:09PM -0000, Thomas Gleixner wrote:
> No point in having this bit defined by architecture.
> 
> Index: linux-2.6/include/linux/preempt_mask.h
> ===================================================================
> --- linux-2.6.orig/include/linux/preempt_mask.h
> +++ linux-2.6/include/linux/preempt_mask.h
> @@ -17,10 +17,11 @@
>   * there are a few palaeontologic drivers which reenable interrupts in
>   * the handler, so we need more than one bit here.
>   *
> - * PREEMPT_MASK: 0x000000ff
> - * SOFTIRQ_MASK: 0x0000ff00
> - * HARDIRQ_MASK: 0x000f0000
> - *     NMI_MASK: 0x00100000
> + * PREEMPT_MASK:	0x000000ff
> + * SOFTIRQ_MASK:	0x0000ff00
> + * HARDIRQ_MASK:	0x000f0000
> + *     NMI_MASK:	0x00100000
> + * PREEMPT_ACTIVE:	0x00200000
>   */
>  #define PREEMPT_BITS	8
>  #define SOFTIRQ_BITS	8
> @@ -46,15 +47,9 @@
>  
>  #define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
>  
> -#ifndef PREEMPT_ACTIVE
>  #define PREEMPT_ACTIVE_BITS	1
>  #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
>  #define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
> -#endif
> -
> -#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
> -#error PREEMPT_ACTIVE is too low!
> -#endif
>  
>  #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
>  #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)

I get:

 init/init_task.c:26:4: error: ‘PREEMPT_ACTIVE’ undeclared here (not in a function)

Because for some obscure reason sched.h doesn't actually include enough.

The below cures things:

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
+#include <linux/preempt_mask.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-17 21:24       ` Thomas Gleixner
@ 2013-09-18 14:06         ` Thomas Gleixner
  2013-09-19 15:14           ` Thomas Gleixner
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-18 14:06 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Linux/m68k

On Tue, 17 Sep 2013, Thomas Gleixner wrote:

> On Tue, 17 Sep 2013, Geert Uytterhoeven wrote:
> 
> > On Tue, Sep 17, 2013 at 8:53 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > > --- linux-2.6.orig/arch/m68k/include/asm/hardirq.h
> > > +++ linux-2.6/arch/m68k/include/asm/hardirq.h
> > > @@ -5,17 +5,6 @@
> > >  #include <linux/cache.h>
> > >  #include <asm/irq.h>
> > >
> > > -#define HARDIRQ_BITS   8
> > 
> > > --- linux-2.6.orig/include/linux/preempt_mask.h
> > > +++ linux-2.6/include/linux/preempt_mask.h
> > > @@ -11,36 +11,22 @@
> > >   * - bits 0-7 are the preemption count (max preemption depth: 256)
> > >   * - bits 8-15 are the softirq count (max # of softirqs: 256)
> > >   *
> > > - * The hardirq count can in theory reach the same as NR_IRQS.
> > > - * In reality, the number of nested IRQS is limited to the stack
> > > - * size as well. For archs with over 1000 IRQS it is not practical
> > > - * to expect that they will all nest. We give a max of 10 bits for
> > > - * hardirq nesting. An arch may choose to give less than 10 bits.
> > > - * m68k expects it to be 8.
> > 
> > m68k needs some changes in arch/m68k/kernel/entry.S, cfr. this check
> > in arch/m68k/kernel/ints.c:
> > 
> >         /* assembly irq entry code relies on this... */
> >         if (HARDIRQ_MASK != 0x00ff0000) {
> >                 extern void hardirq_mask_is_broken(void);
> >                 hardirq_mask_is_broken();
> >         }
> > 
> > Haven't looked into the details yet...
> 
> Whee. Did not notice that one. Though I can't find anything
> interesting in the low level entry code... Looks like some more
> histerical left overs.

Duh. With brain awake I can see it.

The low level entry code is fiddling with preempt_count by adding
HARDIRQ_OFFSET to it to keep track of nested interrupts. If the count
goes to 0, it invokes do_softirq(). And you have another safety guard:

ret_from_last_interrupt:
        moveq   #(~ALLOWINT>>8)&0xff,%d0
	andb    %sp@(PT_OFF_SR),%d0
	jne     2b

That's due to the irq priority level stuff, which results in nested
interrupts depending on the level of the serviced interrupt, right?
And that's why you fiddle yourself with the HARDIRQ bits in the
preempt count to prevent the core code from calling do_softirq().

Though this scheme also prevents that other parts of irq_exit() are
working correctly, because they depend on the hardirq count being
zero, e.g. the nohz code.

Needs more thoughts how to fix that w/o wasting precious bits for the
HARDIRQ count.

Thanks,

	tglx











^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions
  2013-09-17  9:10 ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Peter Zijlstra
@ 2013-09-18 18:44   ` Linus Torvalds
       [not found]     ` <4ec87843-c29a-401a-a54f-2cd4d61fba62@email.android.com>
  0 siblings, 1 reply; 80+ messages in thread
From: Linus Torvalds @ 2013-09-18 18:44 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Andi Kleen, Peter Anvin, Mike Galbraith,
	Thomas Gleixner, Arjan van de Ven, Frederic Weisbecker,
	Linux Kernel Mailing List, linux-arch

[-- Attachment #1: Type: text/plain, Size: 1831 bytes --]

On Tue, Sep 17, 2013 at 4:10 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> Linus suggested using asm goto to get rid of the typical SETcc + TEST
> instruction pair -- which also clobbers an extra register -- for our
> typical modify_and_test() functions.

Thinking about this, we actually have another place in x86 low-level
code where "asm goto" makes a lot of sense: exception handling for
__put_user_asm().

I'd love to use it for __get_user_asm() too, but it doesn't work there
because "asm goto" cannot have outputs (and a get_user obviously needs
an output - the value it gets). But for put_user(), it seems to be a
very good match.

The attached patch is ENTIRELY untested, but I did check some of the
generated assembly language. And the output is absolutely beautiful,
because now gcc sees the error case directly, so the straight-line
code is just he single "mov" instruction, no tests, no nothing. The
exception case will just jump to the local label directly.

Of course, the STAC/CLAC noise is there, and we really should try to
come up with a better model for that (so that the code that uses
__put_user() because it wants to do many of them in one go after
having done one access_ok() check) but that's a separate issue.

hpa, comments? Are you looking at perhaps moving the stac/clac
instructions out? With this, "filldir()" ends up lookng something like

   ...
   data32 xchg %ax,%ax  # stac
   mov    %rcx,0x8(%rax)
   data32 xchg %ax,%ax  # clac
   mov    0x10(%rbx),%r13
   data32 xchg %ax,%ax  # stac
   mov    %r8,0x0(%r13)
   data32 xchg %ax,%ax  # clac
   data32 xchg %ax,%ax  # stac
   mov    %r12w,0x10(%r13)
   data32 xchg %ax,%ax  # clac
   ...

which is a bit sad, since the code really is almost perfect aside from
the tons of extra nops/stac/clac instructions...

                        Linus

[-- Attachment #2: patch.diff --]
[-- Type: application/octet-stream, Size: 1436 bytes --]

 arch/x86/include/asm/uaccess.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5838fa911aa0..33597722cfc1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -411,6 +411,24 @@ struct __large_struct { unsigned long buf[100]; };
  * we do not write to any memory gcc knows about, so there are no
  * aliasing issues.
  */
+#ifdef CC_HAVE_ASM_GOTO
+#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
+do {	__label__ error_label;						\
+	asm goto(ASM_STAC "\n"						\
+		 "1:	mov"itype" %"rtype"0,%1\n"			\
+		 "	" ASM_CLAC "\n"					\
+		 _ASM_EXTABLE(1b,%l[error_label])			\
+		 : /* no outputs */					\
+		 : ltype(x), "m" (__m(addr))				\
+		 : /* no clobbers */					\
+		 : error_label);					\
+	err = 0;							\
+	break;								\
+error_label:								\
+	asm volatile(ASM_CLAC);						\
+	err = errret;							\
+} while (0)
+#else
 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
 	asm volatile(ASM_STAC "\n"					\
 		     "1:	mov"itype" %"rtype"1,%2\n"		\
@@ -422,6 +440,7 @@ struct __large_struct { unsigned long buf[100]; };
 		     _ASM_EXTABLE(1b, 3b)				\
 		     : "=r"(err)					\
 		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
+#endif
 
 #define __put_user_asm_ex(x, addr, itype, rtype, ltype)			\
 	asm volatile("1:	mov"itype" %"rtype"0,%1\n"		\

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions
       [not found]     ` <4ec87843-c29a-401a-a54f-2cd4d61fba62@email.android.com>
@ 2013-09-19  8:31       ` Andi Kleen
  2013-09-19  9:39         ` Ingo Molnar
  2013-09-20  4:43         ` H. Peter Anvin
  0 siblings, 2 replies; 80+ messages in thread
From: Andi Kleen @ 2013-09-19  8:31 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, Mike Galbraith,
	Thomas Gleixner, Arjan van de Ven, Frederic Weisbecker,
	Linux Kernel Mailing List, linux-arch

On Wed, Sep 18, 2013 at 02:02:37PM -0500, H. Peter Anvin wrote:
> Yes, a bit sad.  We allow bracketing with the get/put_user_try/catch blocks, but that is x86-specific.
> 
> I don't think a generic option is possible without compiler support, but it might be possible to do better than we do know.

Letting the compiler do it is a bit risky, because it may open it up for
really large blocks, thus defeating the security advantages.

-Andi

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions
  2013-09-19  8:31       ` Andi Kleen
@ 2013-09-19  9:39         ` Ingo Molnar
  2013-09-20  4:43         ` H. Peter Anvin
  1 sibling, 0 replies; 80+ messages in thread
From: Ingo Molnar @ 2013-09-19  9:39 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Linus Torvalds, Peter Zijlstra, Mike Galbraith,
	Thomas Gleixner, Arjan van de Ven, Frederic Weisbecker,
	Linux Kernel Mailing List, linux-arch


* Andi Kleen <ak@linux.intel.com> wrote:

> On Wed, Sep 18, 2013 at 02:02:37PM -0500, H. Peter Anvin wrote:
>
> > Yes, a bit sad.  We allow bracketing with the get/put_user_try/catch 
> > blocks, but that is x86-specific. I don't think a generic option is 
> > possible without compiler support, but it might be possible to do 
> > better than we do know.
> 
> Letting the compiler do it is a bit risky, because it may open it up for 
> really large blocks, thus defeating the security advantages.

Yeah, the compiler could cover other pointer dereferences in the put_user 
block and that won't result in any visible breakage, so it's difficult to 
prevent the compiler doing it accidentally or even intentionally.

Then again the many repeated STAC/CLAC sequences are really not nice.

So maybe we could add some macro magic to generate better assembly here - 
if we coded up a __put_user_2field() primitive then we could already 
optimize the filldir() case:

before:

        if (__put_user(d_ino, &dirent->d_ino))
                goto efault;
        if (__put_user(reclen, &dirent->d_reclen))
                goto efault;
        if (copy_to_user(dirent->d_name, name, namlen))
                goto efault;
        if (__put_user(0, dirent->d_name + namlen))
                goto efault;
        if (__put_user(d_type, (char __user *) dirent + reclen - 1))
                goto efault;

after:

        if (__put_user_2field(d_ino, &dirent->d_ino, reclen, &dirent->d_reclen))
                goto efault;
        if (copy_to_user(dirent->d_name, name, namlen))
                goto efault;
        if (__put_user_2field(0, dirent->d_name + namlen, d_type, (char __user *) dirent + reclen - 1)))
                goto efault;

That cuts down the inlined STAC/CLAC pairs from 4 to 2.

__put_user_2field() would be some truly disgusting (but hidden from most 
people) macro and assembly magic.

We could also add __put_user_4field() and slightly reorder filldir():

        if (__put_user_4field(	d_ino,		&dirent->d_ino,
				reclen,		&dirent->d_reclen,
        			0,		dirent->d_name + namlen,
				d_type,		(char __user *) dirent + reclen - 1)))
                goto efault;

        if (copy_to_user(dirent->d_name, name, namlen))
                goto efault;

That would reduce the inlined STAC/CLAC pairs to a minimal 1 (only one of 
which would be visible in the filldir() disassembly).

In theory we could do something generic:

        if (__put_user_fields(	4,
				d_ino,		&dirent->d_ino,
				reclen,		&dirent->d_reclen,
        			0,		 dirent->d_name + namlen,
				d_type,		(char __user *)dirent + reclen-1 ))
                goto efault;

        if (copy_to_user(dirent->d_name, name, namlen))
                goto efault;

and implement it up to 4 or so. It will be some truly disgusting lowlevel 
code (especially due to the size variations which could make it explode 
combinatorically), with some generic header fallback that utilizes 
existing put_user primitives.

But it's solvable IMO, if we want to solve it. On the high level it's also 
more readable in a fashion and hence perhaps a bit less fragile than our 
usual __put_user() patterns.

Btw., while at it we could also maybe fix the assignment ordering and use 
copy_to_user() naming:

        if (__copy_to_user_fields(4,

			&dirent->d_ino,				d_ino,
			&dirent->d_reclen,			reclen,		
        		dirent->d_name + namlen,		0,
			(char __user *)dirent + reclen-1,	d_type	))

                goto efault;

        if (copy_to_user(dirent->d_name, name, namlen))
                goto efault;

That would make it even more readable.

(Thinking about the macro tricks needed for something like this gave me a 
bad headache though.)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-18 14:06         ` Thomas Gleixner
@ 2013-09-19 15:14           ` Thomas Gleixner
  2013-09-19 17:02             ` Andreas Schwab
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-19 15:14 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Linux/m68k

Geert,

On Wed, 18 Sep 2013, Thomas Gleixner wrote:
> The low level entry code is fiddling with preempt_count by adding
> HARDIRQ_OFFSET to it to keep track of nested interrupts. If the count
> goes to 0, it invokes do_softirq(). And you have another safety guard:
> 
> ret_from_last_interrupt:
>         moveq   #(~ALLOWINT>>8)&0xff,%d0
> 	andb    %sp@(PT_OFF_SR),%d0
> 	jne     2b
> 
> That's due to the irq priority level stuff, which results in nested
> interrupts depending on the level of the serviced interrupt, right?
> And that's why you fiddle yourself with the HARDIRQ bits in the
> preempt count to prevent the core code from calling do_softirq().
> 
> Though this scheme also prevents that other parts of irq_exit() are
> working correctly, because they depend on the hardirq count being
> zero, e.g. the nohz code.
> 
> Needs more thoughts how to fix that w/o wasting precious bits for the
> HARDIRQ count.

So after staring a while into the m68k code I came up with the
following (untested and uncompiled) solution:

Instead of doing the HARDIRQ fiddling and the softirq handling from
the low level entry code, I provided an irq_exit_nested() variant
which has an argument to tell the core code, that it shouldn't invoke
softirq handling and the nohz exit code. That way 4 HARDIRQ bits in
preempt_count() should be sufficient and we can move them around as we
see fit without being bound by some magic asm code fiddling with it.

By modifying do_IRQ to return an indicator whether this is the last
irq in the chain, we can also simplify the asm code significantly.

Can you have a look with m68k eyes on that please? I'm sure that my
vague memory of the 68k ASM tricked me into doing something
fundamentally wrong :)

Thanks,

	tglx
---
Subject: m68k: Deal with interrupt nesting in the core code
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 Sep 2013 11:56:58 +0200

m68k increments the HARDIRQ part of preempt_count in the low level
interrupt entry code, which prevents the core code from restructuring
the preempt_count layout.

This is done to prevent softirq invocation for
nested interrupts. The nesting can happen due to the interrupt
priority scheme of the 68k.

This patch removes the low level handling and moves it to the
interrupt core code by providing an irq_exit_nested() variant. The
nested argument to this function tells the core code whether to invoke
softirqs or not.

Also let do_IRQ() and the other handler variants return the nest
indicator so the low level entry code can utilize this to select
either immediate return or the expensive work functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/m68k/include/asm/irq.h      |    2 +-
 arch/m68k/kernel/entry.S         |   36 +++++++++---------------------------
 arch/m68k/kernel/ints.c          |    6 ------
 arch/m68k/kernel/irq.c           |    7 ++++---
 arch/m68k/platform/68000/entry.S |   19 ++++++-------------
 arch/m68k/platform/68000/ints.c  |    7 +++----
 arch/m68k/platform/68360/entry.S |   25 ++++++++-----------------
 arch/m68k/q40/q40ints.c          |   12 ++++++------
 include/linux/hardirq.h          |    1 +
 kernel/softirq.c                 |   15 +++++++++++----
 10 files changed, 49 insertions(+), 81 deletions(-)

Index: linux-2.6/arch/m68k/include/asm/irq.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/irq.h
+++ linux-2.6/arch/m68k/include/asm/irq.h
@@ -74,7 +74,7 @@ extern unsigned int irq_canonicalize(uns
 #define irq_canonicalize(irq)  (irq)
 #endif /* !(CONFIG_M68020 || CONFIG_M68030 || CONFIG_M68040 || CONFIG_M68060) */
 
-asmlinkage void do_IRQ(int irq, struct pt_regs *regs);
+asmlinkage int do_IRQ(int irq, struct pt_regs *regs);
 extern atomic_t irq_err_count;
 
 #endif /* _M68K_IRQ_H_ */
Index: linux-2.6/arch/m68k/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/entry.S
+++ linux-2.6/arch/m68k/kernel/entry.S
@@ -274,9 +274,6 @@ do_delayed_trace:
 
 ENTRY(auto_inthandler)
 	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 	subw	#VEC_SPUR,%d0
@@ -284,34 +281,22 @@ ENTRY(auto_inthandler)
 	movel	%sp,%sp@-
 	movel	%d0,%sp@-		|  put vector # on stack
 auto_irqhandler_fixup = . + 2
-	jsr	do_IRQ			|  process the IRQ
+	jsr	do_IRQ			|  process the IRQ, returns nest level
 	addql	#8,%sp			|  pop parameters off stack
 
 ret_from_interrupt:
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-2:	RESTORE_ALL
-
-	ALIGN
-ret_from_last_interrupt:
-	moveq	#(~ALLOWINT>>8)&0xff,%d0
-	andb	%sp@(PT_OFF_SR),%d0
-	jne	2b
-
-	/* check if we need to do software interrupts */
-	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
+	/*
+	 * Only the last interrupt leaving the kernel goes through the
+	 * various exception return checks.
+	 */
+	cmpl	#0, %d0
 	jeq	.Lret_from_exception
-	pea	ret_from_exception
-	jra	do_softirq
+	RESTORE_ALL
 
 /* Handler for user defined interrupt vectors */
 
 ENTRY(user_inthandler)
 	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 user_irqvec_fixup = . + 2
@@ -319,13 +304,10 @@ user_irqvec_fixup = . + 2
 
 	movel	%sp,%sp@-
 	movel	%d0,%sp@-		|  put vector # on stack
-	jsr	do_IRQ			|  process the IRQ
+	jsr	do_IRQ			|  process the IRQ, returns nest level
 	addql	#8,%sp			|  pop parameters off stack
 
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
+	jra	ret_from_interrupt
 
 /* Handler for uninitialized and spurious interrupts */
 
Index: linux-2.6/arch/m68k/kernel/ints.c
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/ints.c
+++ linux-2.6/arch/m68k/kernel/ints.c
@@ -58,12 +58,6 @@ void __init init_IRQ(void)
 {
 	int i;
 
-	/* assembly irq entry code relies on this... */
-	if (HARDIRQ_MASK != 0x00ff0000) {
-		extern void hardirq_mask_is_broken(void);
-		hardirq_mask_is_broken();
-	}
-
 	for (i = IRQ_AUTO_1; i <= IRQ_AUTO_7; i++)
 		irq_set_chip_and_handler(i, &auto_irq_chip, handle_simple_irq);
 
Index: linux-2.6/arch/m68k/kernel/irq.c
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/irq.c
+++ linux-2.6/arch/m68k/kernel/irq.c
@@ -17,18 +17,19 @@
 #include <linux/seq_file.h>
 #include <asm/traps.h>
 
-asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
+asmlinkage int do_IRQ(int irq, struct pt_regs *regs)
 {
 	struct pt_regs *oldregs = set_irq_regs(regs);
+	int nested = regs->sr & ~ALLOWINT;
 
 	irq_enter();
 	generic_handle_irq(irq);
-	irq_exit();
+	irq_exit_nested(nested);
 
 	set_irq_regs(oldregs);
+	return nested;
 }
 
-
 /* The number of spurious interrupts */
 atomic_t irq_err_count;
 
Index: linux-2.6/arch/m68k/platform/68000/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/platform/68000/entry.S
+++ linux-2.6/arch/m68k/platform/68000/entry.S
@@ -217,20 +217,13 @@ inthandler:
 	bra	ret_from_interrupt
 
 ret_from_interrupt:
-	jeq	1f
-2:
-	RESTORE_ALL
-1:
-	moveb	%sp@(PT_OFF_SR), %d0
-	and	#7, %d0
-	jhi	2b
-
-	/* check if we need to do software interrupts */
+	/*
+	 * Only the last interrupt leaving the kernel goes through the
+	 * various exception return checks.
+	 */
+	cmpl	#0, %d0
 	jeq	ret_from_exception
-
-	pea	ret_from_exception
-	jra	do_softirq
-
+	RESTORE_ALL
 
 /*
  * Handler for uninitialized and spurious interrupts.
Index: linux-2.6/arch/m68k/platform/68000/ints.c
===================================================================
--- linux-2.6.orig/arch/m68k/platform/68000/ints.c
+++ linux-2.6/arch/m68k/platform/68000/ints.c
@@ -74,11 +74,9 @@ asmlinkage irqreturn_t inthandler7(void)
  * into one vector and look in the blasted mask register...
  * This code is designed to be fast, almost constant time, not clean!
  */
-void process_int(int vec, struct pt_regs *fp)
+int process_int(int vec, struct pt_regs *fp)
 {
-	int irq;
-	int mask;
-
+	int irq, mask, nested =fp->sr & ~ALLOWINT;
 	unsigned long pend = ISR;
 
 	while (pend) {
@@ -128,6 +126,7 @@ void process_int(int vec, struct pt_regs
 		do_IRQ(irq, fp);
 		pend &= ~mask;
 	}
+	return nested;
 }
 
 static void intc_irq_unmask(struct irq_data *d)
Index: linux-2.6/arch/m68k/platform/68360/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/platform/68360/entry.S
+++ linux-2.6/arch/m68k/platform/68360/entry.S
@@ -132,26 +132,17 @@ inthandler:
 
 	movel	%sp,%sp@-
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
-	jbsr	do_IRQ			/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	jbsr	do_IRQ			/*  process the IRQ, returns nest level */
+     	addql	#8,%sp			/*  pop parameters off stack*/
 
 ret_from_interrupt:
-	jeq	1f
-2:
-	RESTORE_ALL
-1:
-	moveb	%sp@(PT_OFF_SR), %d0
-	and	#7, %d0
-	jhi	2b
-	/* check if we need to do software interrupts */
-
-	movel	irq_stat+CPUSTAT_SOFTIRQ_PENDING,%d0
+	/*
+	 * Only the last interrupt leaving the kernel goes through the
+	 * various exception return checks.
+	 */
+	cmpl	#0, %d0
 	jeq	ret_from_exception
-
-	pea	ret_from_exception
-	jra	do_softirq
-
+	RESTORE_ALL
 
 /*
  * Handler for uninitialized and spurious interrupts.
Index: linux-2.6/arch/m68k/q40/q40ints.c
===================================================================
--- linux-2.6.orig/arch/m68k/q40/q40ints.c
+++ linux-2.6/arch/m68k/q40/q40ints.c
@@ -202,10 +202,10 @@ static int aliased_irq=0;  /* how many t
 
 
 /* got interrupt, dispatch to ISA or keyboard/timer IRQs */
-static void q40_irq_handler(unsigned int irq, struct pt_regs *fp)
+static int q40_irq_handler(unsigned int irq, struct pt_regs *fp)
 {
 	unsigned mir, mer;
-	int i;
+	int i, nested = fp->sr & ~ALLOWINT;
 
 //repeat:
 	mir = master_inb(IIRQ_REG);
@@ -213,14 +213,14 @@ static void q40_irq_handler(unsigned int
 	if ((mir & Q40_IRQ_EXT_MASK) &&
 	    (master_inb(EIRQ_REG) & Q40_IRQ6_MASK)) {
 		floppy_hardint();
-		return;
+		return nested;
 	}
 #endif
 	switch (irq) {
 	case 4:
 	case 6:
 		do_IRQ(Q40_IRQ_SAMPLE, fp);
-		return;
+		return nested;
 	}
 	if (mir & Q40_IRQ_FRAME_MASK) {
 		do_IRQ(Q40_IRQ_FRAME, fp);
@@ -277,7 +277,7 @@ static void q40_irq_handler(unsigned int
 #endif
 				}
 // used to do 'goto repeat;' here, this delayed bh processing too long
-				return;
+				return nested;
 			}
 		}
 		if (mer && ccleirq > 0 && !aliased_irq) {
@@ -291,7 +291,7 @@ static void q40_irq_handler(unsigned int
 	if (mir & Q40_IRQ_KEYB_MASK)
 		do_IRQ(Q40_IRQ_KEYBOARD, fp);
 
-	return;
+	return nested;
 }
 
 void q40_irq_enable(struct irq_data *data)
Index: linux-2.6/include/linux/hardirq.h
===================================================================
--- linux-2.6.orig/include/linux/hardirq.h
+++ linux-2.6/include/linux/hardirq.h
@@ -55,6 +55,7 @@ extern void irq_enter(void);
 /*
  * Exit irq context and process softirqs if needed:
  */
+extern void irq_exit_nested(bool nested);
 extern void irq_exit(void);
 
 #define nmi_enter()						\
Index: linux-2.6/kernel/softirq.c
===================================================================
--- linux-2.6.orig/kernel/softirq.c
+++ linux-2.6/kernel/softirq.c
@@ -350,7 +350,7 @@ static inline void tick_irq_exit(void)
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
-void irq_exit(void)
+void irq_exit_nested(bool nested)
 {
 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
 	local_irq_disable();
@@ -361,13 +361,20 @@ void irq_exit(void)
 	account_irq_exit_time(current);
 	trace_hardirq_exit();
 	sub_preempt_count(HARDIRQ_OFFSET);
-	if (!in_interrupt() && local_softirq_pending())
-		invoke_softirq();
 
-	tick_irq_exit();
+	if (!nested) {
+		if (!in_interrupt() && local_softirq_pending())
+			invoke_softirq();
+		tick_irq_exit();
+	}
 	rcu_irq_exit();
 }
 
+void irq_exit(void)
+{
+	irq_exit_nested(false);
+}
+
 /*
  * This function must run with irqs disabled!
  */

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-19 15:14           ` Thomas Gleixner
@ 2013-09-19 17:02             ` Andreas Schwab
  2013-09-19 18:19               ` Geert Uytterhoeven
  0 siblings, 1 reply; 80+ messages in thread
From: Andreas Schwab @ 2013-09-19 17:02 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Geert Uytterhoeven, LKML, Peter Zijlstra, Ingo Molnar,
	Linux-Arch, Linus Torvalds, Andi Kleen, Peter Anvin,
	Mike Galbraith, Arjan van de Ven, Frederic Weisbecker,
	Linux/m68k

Thomas Gleixner <tglx@linutronix.de> writes:

> +	/*
> +	 * Only the last interrupt leaving the kernel goes through the
> +	 * various exception return checks.
> +	 */
> +	cmpl	#0, %d0
	tstl	%d0

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-19 17:02             ` Andreas Schwab
@ 2013-09-19 18:19               ` Geert Uytterhoeven
  2013-09-20  9:26                 ` Thomas Gleixner
  2013-11-04 12:06                 ` Thomas Gleixner
  0 siblings, 2 replies; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-09-19 18:19 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

On Thu, Sep 19, 2013 at 7:02 PM, Andreas Schwab <schwab@linux-m68k.org> wrote:
> Thomas Gleixner <tglx@linutronix.de> writes:
>> +     /*
>> +      * Only the last interrupt leaving the kernel goes through the
>> +      * various exception return checks.
>> +      */
>> +     cmpl    #0, %d0
>         tstl    %d0

arch/m68k/kernel/built-in.o: In function `bad_inthandler':
(.text+0x2a6): undefined reference to `ret_from_last_interrupt'

I came up with the quick (whitespace-damaged-gmail) fix below.
Or should we handle the nesting in handle_badint(), too?

--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -313,17 +313,11 @@ user_irqvec_fixup = . + 2

 ENTRY(bad_inthandler)
        SAVE_ALL_INT
-       GET_CURRENT(%d0)
-       movel   %d0,%a1
-       addqb   #1,%a1@(TINFO_PREEMPT+1)

        movel   %sp,%sp@-
        jsr     handle_badint
        addql   #4,%sp

-       movel   %curptr@(TASK_STACK),%a1
-       subqb   #1,%a1@(TINFO_PREEMPT+1)
-       jeq     ret_from_last_interrupt
        RESTORE_ALL

However, the resulting kernel hangs (on ARAnyM) after starting userspace:

| INIT: version 2.86 booting

I'll have a deeper look when I have some more time...

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions
  2013-09-19  8:31       ` Andi Kleen
  2013-09-19  9:39         ` Ingo Molnar
@ 2013-09-20  4:43         ` H. Peter Anvin
  1 sibling, 0 replies; 80+ messages in thread
From: H. Peter Anvin @ 2013-09-20  4:43 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Linus Torvalds, Peter Zijlstra, Ingo Molnar, Mike Galbraith,
	Thomas Gleixner, Arjan van de Ven, Frederic Weisbecker,
	Linux Kernel Mailing List, linux-arch

On 09/19/2013 03:31 AM, Andi Kleen wrote:
> On Wed, Sep 18, 2013 at 02:02:37PM -0500, H. Peter Anvin wrote:
>> Yes, a bit sad.  We allow bracketing with the get/put_user_try/catch blocks, but that is x86-specific.
>>
>> I don't think a generic option is possible without compiler support, but it might be possible to do better than we do know.
>
> Letting the compiler do it is a bit risky, because it may open it up for
> really large blocks, thus defeating the security advantages.
>
> -Andi
>

The compiler support. if done right, should avoid unannotated memory 
references.  This is a pretty big job in the compiler, though.

	-hpa


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-19 18:19               ` Geert Uytterhoeven
@ 2013-09-20  9:26                 ` Thomas Gleixner
  2013-11-04 12:06                 ` Thomas Gleixner
  1 sibling, 0 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-20  9:26 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

On Thu, 19 Sep 2013, Geert Uytterhoeven wrote:

> On Thu, Sep 19, 2013 at 7:02 PM, Andreas Schwab <schwab@linux-m68k.org> wrote:
> > Thomas Gleixner <tglx@linutronix.de> writes:
> >> +     /*
> >> +      * Only the last interrupt leaving the kernel goes through the
> >> +      * various exception return checks.
> >> +      */
> >> +     cmpl    #0, %d0
> >         tstl    %d0
> 
> arch/m68k/kernel/built-in.o: In function `bad_inthandler':
> (.text+0x2a6): undefined reference to `ret_from_last_interrupt'
> 
> I came up with the quick (whitespace-damaged-gmail) fix below.
> Or should we handle the nesting in handle_badint(), too?

Hmm, probably yes. If a badint gets interrupted by a good one, you
would fail to go through ret_from_exception.
 
> --- a/arch/m68k/kernel/entry.S
> +++ b/arch/m68k/kernel/entry.S
> @@ -313,17 +313,11 @@ user_irqvec_fixup = . + 2
> 
>  ENTRY(bad_inthandler)
>         SAVE_ALL_INT
> -       GET_CURRENT(%d0)
> -       movel   %d0,%a1
> -       addqb   #1,%a1@(TINFO_PREEMPT+1)
> 
>         movel   %sp,%sp@-
>         jsr     handle_badint
>         addql   #4,%sp
> 
> -       movel   %curptr@(TASK_STACK),%a1
> -       subqb   #1,%a1@(TINFO_PREEMPT+1)
> -       jeq     ret_from_last_interrupt
>         RESTORE_ALL
> 
> However, the resulting kernel hangs (on ARAnyM) after starting userspace:
> 
> | INIT: version 2.86 booting

Hmm.
 
> I'll have a deeper look when I have some more time...

Thanks a lot!

       tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 2/6] h8300: Use schedule_preempt_irq
  2013-09-17 18:53   ` [patch 2/6] h8300: Use schedule_preempt_irq Thomas Gleixner
@ 2013-09-20 17:41     ` Guenter Roeck
  2013-09-20 21:46       ` Thomas Gleixner
  0 siblings, 1 reply; 80+ messages in thread
From: Guenter Roeck @ 2013-09-20 17:41 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjanvan

On Tue, Sep 17, 2013 at 06:53:06PM -0000, Thomas Gleixner wrote:
> Use the proper function instead of fiddling with PREEMPT_ACTIVE and
> interrupt enable/disable in the low level code.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
> Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> 
This will result in a conflict in -next as h8300 support has been removed
there, and I'll send a pull request to Linus in the next commit window.

Guenter

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 2/6] h8300: Use schedule_preempt_irq
  2013-09-20 17:41     ` Guenter Roeck
@ 2013-09-20 21:46       ` Thomas Gleixner
  0 siblings, 0 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-09-20 21:46 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: LKML, Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjanvan

On Fri, 20 Sep 2013, Guenter Roeck wrote:

> On Tue, Sep 17, 2013 at 06:53:06PM -0000, Thomas Gleixner wrote:
> > Use the proper function instead of fiddling with PREEMPT_ACTIVE and
> > interrupt enable/disable in the low level code.
> > 
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
> > Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> > 
> This will result in a conflict in -next as h8300 support has been removed
> there, and I'll send a pull request to Linus in the next commit window.

An easy to resolve one methinks :)

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/11] sched, rcu: Make RCU use resched_cpu()
  2013-09-17 14:40   ` Peter Zijlstra
@ 2013-09-23 16:55     ` Paul E. McKenney
  2013-09-23 21:18       ` Paul E. McKenney
  0 siblings, 1 reply; 80+ messages in thread
From: Paul E. McKenney @ 2013-09-23 16:55 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Ingo Molnar, Andi Kleen, Peter Anvin,
	Mike Galbraith, Thomas Gleixner, Arjan van de Ven,
	Frederic Weisbecker, linux-kernel, linux-arch, Thomas Meyer

On Tue, Sep 17, 2013 at 04:40:12PM +0200, Peter Zijlstra wrote:
> Thomas Meyer reported a UP build fail, should be fixed.
> 
> ---
> Subject: sched, rcu: Make RCU use resched_cpu()
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Tue Sep 17 09:30:55 CEST 2013
> 
> We're going to deprecate and remove set_need_resched() for it will do
> the wrong thing. Make an exception for RCU and allow it to use
> resched_cpu() which will do the right thing.
> 
> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
> Signed-off-by: Peter Zijlstra <peterz@infradead.org>

Queued for 3.13, thank you!

							Thanx, Paul

> ---
>  kernel/rcutree.c    |   15 ++++++++++++++-
>  kernel/sched/core.c |   10 ++--------
>  2 files changed, 16 insertions(+), 9 deletions(-)
> 
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -898,6 +898,12 @@ static void print_other_cpu_stall(struct
>  	force_quiescent_state(rsp);  /* Kick them all. */
>  }
> 
> +/*
> + * This function really isn't for public consumption, but RCU is special in
> + * that context switches can allow the state machine to make progress.
> + */
> +extern void resched_cpu(int cpu);
> +
>  static void print_cpu_stall(struct rcu_state *rsp)
>  {
>  	int cpu;
> @@ -927,7 +933,14 @@ static void print_cpu_stall(struct rcu_s
>  				     3 * rcu_jiffies_till_stall_check() + 3;
>  	raw_spin_unlock_irqrestore(&rnp->lock, flags);
> 
> -	set_need_resched();  /* kick ourselves to get things going. */
> +	/*
> +	 * Attempt to revive the RCU machinery by forcing a context switch.
> +	 *
> +	 * A context switch would normally allow the RCU state machine to make
> +	 * progress and it could be we're stuck in kernel space without context
> +	 * switches for an entirely unreasonable amount of time.
> +	 */
> +	resched_cpu(smp_processor_id());
>  }
> 
>  static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -513,12 +513,11 @@ static inline void init_hrtick(void)
>   * might also involve a cross-CPU call to trigger the scheduler on
>   * the target CPU.
>   */
> -#ifdef CONFIG_SMP
>  void resched_task(struct task_struct *p)
>  {
>  	int cpu;
> 
> -	assert_raw_spin_locked(&task_rq(p)->lock);
> +	lockdep_assert_held(&task_rq(p)->lock);
> 
>  	if (test_tsk_need_resched(p))
>  		return;
> @@ -546,6 +545,7 @@ void resched_cpu(int cpu)
>  	raw_spin_unlock_irqrestore(&rq->lock, flags);
>  }
> 
> +#ifdef CONFIG_SMP
>  #ifdef CONFIG_NO_HZ_COMMON
>  /*
>   * In the semi idle case, use the nearest busy cpu for migrating timers
> @@ -693,12 +693,6 @@ void sched_avg_update(struct rq *rq)
>  	}
>  }
> 
> -#else /* !CONFIG_SMP */
> -void resched_task(struct task_struct *p)
> -{
> -	assert_raw_spin_locked(&task_rq(p)->lock);
> -	set_tsk_need_resched(p);
> -}
>  #endif /* CONFIG_SMP */
> 
>  #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
> 
> 


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/11] sched, rcu: Make RCU use resched_cpu()
  2013-09-23 16:55     ` Paul E. McKenney
@ 2013-09-23 21:18       ` Paul E. McKenney
  2013-09-24  8:07         ` Peter Zijlstra
  0 siblings, 1 reply; 80+ messages in thread
From: Paul E. McKenney @ 2013-09-23 21:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Ingo Molnar, Andi Kleen, Peter Anvin,
	Mike Galbraith, Thomas Gleixner, Arjan van de Ven,
	Frederic Weisbecker, linux-kernel, linux-arch, Thomas Meyer

On Mon, Sep 23, 2013 at 09:55:40AM -0700, Paul E. McKenney wrote:
> On Tue, Sep 17, 2013 at 04:40:12PM +0200, Peter Zijlstra wrote:
> > Thomas Meyer reported a UP build fail, should be fixed.
> > 
> > ---
> > Subject: sched, rcu: Make RCU use resched_cpu()
> > From: Peter Zijlstra <peterz@infradead.org>
> > Date: Tue Sep 17 09:30:55 CEST 2013
> > 
> > We're going to deprecate and remove set_need_resched() for it will do
> > the wrong thing. Make an exception for RCU and allow it to use
> > resched_cpu() which will do the right thing.
> > 
> > Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
> > Signed-off-by: Peter Zijlstra <peterz@infradead.org>
> 
> Queued for 3.13, thank you!

Passes light testing, but I am confused about why the change to
resched_task() when only resched_cpu() is invoked elsewhere in the patch.
Enlightenment?

							Thanx, Paul

> > ---
> >  kernel/rcutree.c    |   15 ++++++++++++++-
> >  kernel/sched/core.c |   10 ++--------
> >  2 files changed, 16 insertions(+), 9 deletions(-)
> > 
> > --- a/kernel/rcutree.c
> > +++ b/kernel/rcutree.c
> > @@ -898,6 +898,12 @@ static void print_other_cpu_stall(struct
> >  	force_quiescent_state(rsp);  /* Kick them all. */
> >  }
> > 
> > +/*
> > + * This function really isn't for public consumption, but RCU is special in
> > + * that context switches can allow the state machine to make progress.
> > + */
> > +extern void resched_cpu(int cpu);
> > +
> >  static void print_cpu_stall(struct rcu_state *rsp)
> >  {
> >  	int cpu;
> > @@ -927,7 +933,14 @@ static void print_cpu_stall(struct rcu_s
> >  				     3 * rcu_jiffies_till_stall_check() + 3;
> >  	raw_spin_unlock_irqrestore(&rnp->lock, flags);
> > 
> > -	set_need_resched();  /* kick ourselves to get things going. */
> > +	/*
> > +	 * Attempt to revive the RCU machinery by forcing a context switch.
> > +	 *
> > +	 * A context switch would normally allow the RCU state machine to make
> > +	 * progress and it could be we're stuck in kernel space without context
> > +	 * switches for an entirely unreasonable amount of time.
> > +	 */
> > +	resched_cpu(smp_processor_id());
> >  }
> > 
> >  static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -513,12 +513,11 @@ static inline void init_hrtick(void)
> >   * might also involve a cross-CPU call to trigger the scheduler on
> >   * the target CPU.
> >   */
> > -#ifdef CONFIG_SMP
> >  void resched_task(struct task_struct *p)
> >  {
> >  	int cpu;
> > 
> > -	assert_raw_spin_locked(&task_rq(p)->lock);
> > +	lockdep_assert_held(&task_rq(p)->lock);
> > 
> >  	if (test_tsk_need_resched(p))
> >  		return;
> > @@ -546,6 +545,7 @@ void resched_cpu(int cpu)
> >  	raw_spin_unlock_irqrestore(&rq->lock, flags);
> >  }
> > 
> > +#ifdef CONFIG_SMP
> >  #ifdef CONFIG_NO_HZ_COMMON
> >  /*
> >   * In the semi idle case, use the nearest busy cpu for migrating timers
> > @@ -693,12 +693,6 @@ void sched_avg_update(struct rq *rq)
> >  	}
> >  }
> > 
> > -#else /* !CONFIG_SMP */
> > -void resched_task(struct task_struct *p)
> > -{
> > -	assert_raw_spin_locked(&task_rq(p)->lock);
> > -	set_tsk_need_resched(p);
> > -}
> >  #endif /* CONFIG_SMP */
> > 
> >  #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
> > 
> > 


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/11] sched, rcu: Make RCU use resched_cpu()
  2013-09-23 21:18       ` Paul E. McKenney
@ 2013-09-24  8:07         ` Peter Zijlstra
  2013-09-24 13:37           ` Paul E. McKenney
  0 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-09-24  8:07 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Linus Torvalds, Ingo Molnar, Andi Kleen, Peter Anvin,
	Mike Galbraith, Thomas Gleixner, Arjan van de Ven,
	Frederic Weisbecker, linux-kernel, linux-arch, Thomas Meyer

On Mon, Sep 23, 2013 at 02:18:27PM -0700, Paul E. McKenney wrote:
> Passes light testing, but I am confused about why the change to
> resched_task() when only resched_cpu() is invoked elsewhere in the patch.
> Enlightenment?
> > > -#ifdef CONFIG_SMP
> > >  void resched_task(struct task_struct *p)
> > >  {
> > >  	int cpu;
> > > 
> > > -	assert_raw_spin_locked(&task_rq(p)->lock);
> > > +	lockdep_assert_held(&task_rq(p)->lock);
> > > 
> > >  	if (test_tsk_need_resched(p))
> > >  		return;

That one? Fly by fixup I suppose.. the lockdep check is cheaper as it
compiles away for !lockdep kernels, also its more correct.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 02/11] sched, rcu: Make RCU use resched_cpu()
  2013-09-24  8:07         ` Peter Zijlstra
@ 2013-09-24 13:37           ` Paul E. McKenney
  0 siblings, 0 replies; 80+ messages in thread
From: Paul E. McKenney @ 2013-09-24 13:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Ingo Molnar, Andi Kleen, Peter Anvin,
	Mike Galbraith, Thomas Gleixner, Arjan van de Ven,
	Frederic Weisbecker, linux-kernel, linux-arch, Thomas Meyer

On Tue, Sep 24, 2013 at 10:07:06AM +0200, Peter Zijlstra wrote:
> On Mon, Sep 23, 2013 at 02:18:27PM -0700, Paul E. McKenney wrote:
> > Passes light testing, but I am confused about why the change to
> > resched_task() when only resched_cpu() is invoked elsewhere in the patch.
> > Enlightenment?
> > > > -#ifdef CONFIG_SMP
> > > >  void resched_task(struct task_struct *p)
> > > >  {
> > > >  	int cpu;
> > > > 
> > > > -	assert_raw_spin_locked(&task_rq(p)->lock);
> > > > +	lockdep_assert_held(&task_rq(p)->lock);
> > > > 
> > > >  	if (test_tsk_need_resched(p))
> > > >  		return;
> 
> That one? Fly by fixup I suppose.. the lockdep check is cheaper as it
> compiles away for !lockdep kernels, also its more correct.

OK, happy to carry it, just wanted to make sure it was supposed to be there.

							Thanx, Paul


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-09-19 18:19               ` Geert Uytterhoeven
  2013-09-20  9:26                 ` Thomas Gleixner
@ 2013-11-04 12:06                 ` Thomas Gleixner
  2013-11-04 19:44                     ` Geert Uytterhoeven
  1 sibling, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-04 12:06 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

On Thu, 19 Sep 2013, Geert Uytterhoeven wrote:
> However, the resulting kernel hangs (on ARAnyM) after starting userspace:
> 
> | INIT: version 2.86 booting
> 
> I'll have a deeper look when I have some more time...

Any chance that you find some more time? :)

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-04 12:06                 ` Thomas Gleixner
@ 2013-11-04 19:44                     ` Geert Uytterhoeven
  0 siblings, 0 replies; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-11-04 19:44 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

	Hi Thomas,

On Mon, 4 Nov 2013, Thomas Gleixner wrote:
> On Thu, 19 Sep 2013, Geert Uytterhoeven wrote:
> > However, the resulting kernel hangs (on ARAnyM) after starting userspace:
> > 
> > | INIT: version 2.86 booting
> > 
> > I'll have a deeper look when I have some more time...
> 
> Any chance that you find some more time? :)

Sure!

But only if you look at "[m68k] IRQ: add handle_polled_irq() for timer
based soft interrupt" (http://www.spinics.net/lists/linux-m68k/msg05889.html)
first ;-)

Below is a patch with some fixups, on top of your two patches.

Unfortunately it still hangs somewhere after mounting the root filesystem.

Using this debug code for do_IRQ():

diff --git a/arch/m68k/kernel/irq.c b/arch/m68k/kernel/irq.c
index aaf7b15fad41..da9687803d98 100644
--- a/arch/m68k/kernel/irq.c
+++ b/arch/m68k/kernel/irq.c
@@ -22,11 +22,21 @@ asmlinkage int do_IRQ(int irq, struct pt_regs *regs)
 	struct pt_regs *oldregs = set_irq_regs(regs);
 	int nested = regs->sr & ~ALLOWINT;
 
+static int nesting;
+const char prefix[] = "                ";
+unsigned long flags;
+local_irq_save(flags);
+nesting++;
+printk("# %sirq %d nested %d\n", &prefix[16-2*nesting], irq, nested);
+local_irq_restore(flags);
 	irq_enter();
 	generic_handle_irq(irq);
 	irq_exit_nested(nested);
 
 	set_irq_regs(oldregs);
+local_irq_save(flags);
+nesting--;
+local_irq_restore(flags);
 	return nested;
 }
 
I get output like

#   irq 15 nested 0
#     irq 15 nested 1024

irq 15 while irq 15 in progress??

#     irq 15 nested 1024
#     irq 15 nested 1024
#     irq 15 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#   irq 13 nested 1024

[...]

#   irq 13 nested 1024
#   irq 13 nested 1024
#   irq 4 nested 0
#     irq 13 nested 1024
#     irq 4 nested 0

irq 4 while irq 4 in progress?

#   irq 13 nested 1024
#   irq 4 nested 0
#   irq 13 nested 0

and then it stops printing anything.

With similar debug code on the old working do_IRQ(), I get
  - slightly less deep nesting,
  - do_IRQ() is never re-entered with the same irq number.

Also note that the value of "nested" doesn't match the indentation level,
which depends on my own bookkeeping using "nesting".

Anyone with an idea where it's going wrong?

Thanks!

>From 209b6ac37811297cd305821c5689dff75226af48 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 22 Sep 2013 11:31:25 +0200
Subject: [PATCH] m68k/hardirq: Make hardirq bits generic fixups

  - tstl instead of cmpl #0 (from Andreas)
  - arch/m68k/kernel/built-in.o: In function `bad_inthandler': (.text+0x2a6): undefined reference to `ret_from_last_interrupt'
  - Handle nesting in bad_inthandler() and handle_badint(),
  - As do_IRQ() now returns int, m68k_setup_auto_interrupt() should take a
    function that returns int, too,
  - Forgot to update forward declaration of q40_irq_handler(),
  - Whitespace fixes

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/irq.h      |    4 ++--
 arch/m68k/kernel/entry.S         |   10 ++--------
 arch/m68k/kernel/ints.c          |    9 +++++++--
 arch/m68k/platform/68000/entry.S |    2 +-
 arch/m68k/platform/68000/ints.c  |    2 +-
 arch/m68k/platform/68360/entry.S |    4 ++--
 arch/m68k/q40/q40ints.c          |    2 +-
 7 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/m68k/include/asm/irq.h b/arch/m68k/include/asm/irq.h
index 8d8e0f835275..fa7f079aeafa 100644
--- a/arch/m68k/include/asm/irq.h
+++ b/arch/m68k/include/asm/irq.h
@@ -60,8 +60,8 @@ struct irq_desc;
 extern unsigned int m68k_irq_startup(struct irq_data *data);
 extern unsigned int m68k_irq_startup_irq(unsigned int irq);
 extern void m68k_irq_shutdown(struct irq_data *data);
-extern void m68k_setup_auto_interrupt(void (*handler)(unsigned int,
-						      struct pt_regs *));
+extern void m68k_setup_auto_interrupt(int (*handler)(unsigned int,
+						     struct pt_regs *));
 extern void m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt);
 extern void m68k_setup_irq_controller(struct irq_chip *,
 				      void (*handle)(unsigned int irq,
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index d35c2a22398a..ca355813ce51 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -289,7 +289,7 @@ ret_from_interrupt:
 	 * Only the last interrupt leaving the kernel goes through the
 	 * various exception return checks.
 	 */
-	cmpl	#0, %d0
+	tstl	%d0
 	jeq	.Lret_from_exception
 	RESTORE_ALL
 
@@ -313,18 +313,12 @@ user_irqvec_fixup = . + 2
 
 ENTRY(bad_inthandler)
 	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 
 	movel	%sp,%sp@-
 	jsr	handle_badint
 	addql	#4,%sp
 
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
+	jra	ret_from_interrupt
 
 
 resume:
diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 077d3a70fed1..ec1648b97dc8 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -72,7 +72,8 @@ void __init init_IRQ(void)
  * standard do_IRQ(), it will be called with irq numbers in the range
  * from IRQ_AUTO_1 - IRQ_AUTO_7.
  */
-void __init m68k_setup_auto_interrupt(void (*handler)(unsigned int, struct pt_regs *))
+void __init m68k_setup_auto_interrupt(int (*handler)(unsigned int,
+						     struct pt_regs *))
 {
 	if (handler)
 		*auto_irqhandler_fixup = (u32)handler;
@@ -162,8 +163,12 @@ unsigned int irq_canonicalize(unsigned int irq)
 EXPORT_SYMBOL(irq_canonicalize);
 
 
-asmlinkage void handle_badint(struct pt_regs *regs)
+asmlinkage int handle_badint(struct pt_regs *regs)
 {
+	int nested = regs->sr & ~ALLOWINT;
+
 	atomic_inc(&irq_err_count);
 	pr_warn("unexpected interrupt from %u\n", regs->vector);
+
+	return nested;
 }
diff --git a/arch/m68k/platform/68000/entry.S b/arch/m68k/platform/68000/entry.S
index afc49235c3c7..b32c6c423c90 100644
--- a/arch/m68k/platform/68000/entry.S
+++ b/arch/m68k/platform/68000/entry.S
@@ -221,7 +221,7 @@ ret_from_interrupt:
 	 * Only the last interrupt leaving the kernel goes through the
 	 * various exception return checks.
 	 */
-	cmpl	#0, %d0
+	tstl	%d0
 	jeq	ret_from_exception
 	RESTORE_ALL
 
diff --git a/arch/m68k/platform/68000/ints.c b/arch/m68k/platform/68000/ints.c
index fadd2b9ff0d9..a6c05a60a9e5 100644
--- a/arch/m68k/platform/68000/ints.c
+++ b/arch/m68k/platform/68000/ints.c
@@ -76,7 +76,7 @@ asmlinkage irqreturn_t inthandler7(void);
  */
 int process_int(int vec, struct pt_regs *fp)
 {
-	int irq, mask, nested =fp->sr & ~ALLOWINT;
+	int irq, mask, nested = fp->sr & ~ALLOWINT;
 	unsigned long pend = ISR;
 
 	while (pend) {
diff --git a/arch/m68k/platform/68360/entry.S b/arch/m68k/platform/68360/entry.S
index 795abe505c35..e818794edfa7 100644
--- a/arch/m68k/platform/68360/entry.S
+++ b/arch/m68k/platform/68360/entry.S
@@ -133,14 +133,14 @@ inthandler:
 	movel	%sp,%sp@-
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
 	jbsr	do_IRQ			/*  process the IRQ, returns nest level */
-     	addql	#8,%sp			/*  pop parameters off stack*/
+	addql	#8,%sp			/*  pop parameters off stack*/
 
 ret_from_interrupt:
 	/*
 	 * Only the last interrupt leaving the kernel goes through the
 	 * various exception return checks.
 	 */
-	cmpl	#0, %d0
+	tstl	%d0
 	jeq	ret_from_exception
 	RESTORE_ALL
 
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 179aee3a6498..a7525f189264 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -33,7 +33,7 @@
  *
 */
 
-static void q40_irq_handler(unsigned int, struct pt_regs *fp);
+static int q40_irq_handler(unsigned int, struct pt_regs *fp);
 static void q40_irq_enable(struct irq_data *data);
 static void q40_irq_disable(struct irq_data *data);
 
-- 
1.7.9.5

Gr{oetje,eeting}s,

						Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
							    -- Linus Torvalds

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
@ 2013-11-04 19:44                     ` Geert Uytterhoeven
  0 siblings, 0 replies; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-11-04 19:44 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

	Hi Thomas,

On Mon, 4 Nov 2013, Thomas Gleixner wrote:
> On Thu, 19 Sep 2013, Geert Uytterhoeven wrote:
> > However, the resulting kernel hangs (on ARAnyM) after starting userspace:
> > 
> > | INIT: version 2.86 booting
> > 
> > I'll have a deeper look when I have some more time...
> 
> Any chance that you find some more time? :)

Sure!

But only if you look at "[m68k] IRQ: add handle_polled_irq() for timer
based soft interrupt" (http://www.spinics.net/lists/linux-m68k/msg05889.html)
first ;-)

Below is a patch with some fixups, on top of your two patches.

Unfortunately it still hangs somewhere after mounting the root filesystem.

Using this debug code for do_IRQ():

diff --git a/arch/m68k/kernel/irq.c b/arch/m68k/kernel/irq.c
index aaf7b15fad41..da9687803d98 100644
--- a/arch/m68k/kernel/irq.c
+++ b/arch/m68k/kernel/irq.c
@@ -22,11 +22,21 @@ asmlinkage int do_IRQ(int irq, struct pt_regs *regs)
 	struct pt_regs *oldregs = set_irq_regs(regs);
 	int nested = regs->sr & ~ALLOWINT;
 
+static int nesting;
+const char prefix[] = "                ";
+unsigned long flags;
+local_irq_save(flags);
+nesting++;
+printk("# %sirq %d nested %d\n", &prefix[16-2*nesting], irq, nested);
+local_irq_restore(flags);
 	irq_enter();
 	generic_handle_irq(irq);
 	irq_exit_nested(nested);
 
 	set_irq_regs(oldregs);
+local_irq_save(flags);
+nesting--;
+local_irq_restore(flags);
 	return nested;
 }
 
I get output like

#   irq 15 nested 0
#     irq 15 nested 1024

irq 15 while irq 15 in progress??

#     irq 15 nested 1024
#     irq 15 nested 1024
#     irq 15 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#       irq 13 nested 1024
#     irq 4 nested 0
#       irq 13 nested 1024
#   irq 13 nested 1024

[...]

#   irq 13 nested 1024
#   irq 13 nested 1024
#   irq 4 nested 0
#     irq 13 nested 1024
#     irq 4 nested 0

irq 4 while irq 4 in progress?

#   irq 13 nested 1024
#   irq 4 nested 0
#   irq 13 nested 0

and then it stops printing anything.

With similar debug code on the old working do_IRQ(), I get
  - slightly less deep nesting,
  - do_IRQ() is never re-entered with the same irq number.

Also note that the value of "nested" doesn't match the indentation level,
which depends on my own bookkeeping using "nesting".

Anyone with an idea where it's going wrong?

Thanks!

From 209b6ac37811297cd305821c5689dff75226af48 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 22 Sep 2013 11:31:25 +0200
Subject: [PATCH] m68k/hardirq: Make hardirq bits generic fixups

  - tstl instead of cmpl #0 (from Andreas)
  - arch/m68k/kernel/built-in.o: In function `bad_inthandler': (.text+0x2a6): undefined reference to `ret_from_last_interrupt'
  - Handle nesting in bad_inthandler() and handle_badint(),
  - As do_IRQ() now returns int, m68k_setup_auto_interrupt() should take a
    function that returns int, too,
  - Forgot to update forward declaration of q40_irq_handler(),
  - Whitespace fixes

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/irq.h      |    4 ++--
 arch/m68k/kernel/entry.S         |   10 ++--------
 arch/m68k/kernel/ints.c          |    9 +++++++--
 arch/m68k/platform/68000/entry.S |    2 +-
 arch/m68k/platform/68000/ints.c  |    2 +-
 arch/m68k/platform/68360/entry.S |    4 ++--
 arch/m68k/q40/q40ints.c          |    2 +-
 7 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/m68k/include/asm/irq.h b/arch/m68k/include/asm/irq.h
index 8d8e0f835275..fa7f079aeafa 100644
--- a/arch/m68k/include/asm/irq.h
+++ b/arch/m68k/include/asm/irq.h
@@ -60,8 +60,8 @@ struct irq_desc;
 extern unsigned int m68k_irq_startup(struct irq_data *data);
 extern unsigned int m68k_irq_startup_irq(unsigned int irq);
 extern void m68k_irq_shutdown(struct irq_data *data);
-extern void m68k_setup_auto_interrupt(void (*handler)(unsigned int,
-						      struct pt_regs *));
+extern void m68k_setup_auto_interrupt(int (*handler)(unsigned int,
+						     struct pt_regs *));
 extern void m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt);
 extern void m68k_setup_irq_controller(struct irq_chip *,
 				      void (*handle)(unsigned int irq,
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index d35c2a22398a..ca355813ce51 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -289,7 +289,7 @@ ret_from_interrupt:
 	 * Only the last interrupt leaving the kernel goes through the
 	 * various exception return checks.
 	 */
-	cmpl	#0, %d0
+	tstl	%d0
 	jeq	.Lret_from_exception
 	RESTORE_ALL
 
@@ -313,18 +313,12 @@ user_irqvec_fixup = . + 2
 
 ENTRY(bad_inthandler)
 	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 
 	movel	%sp,%sp@-
 	jsr	handle_badint
 	addql	#4,%sp
 
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
+	jra	ret_from_interrupt
 
 
 resume:
diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 077d3a70fed1..ec1648b97dc8 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -72,7 +72,8 @@ void __init init_IRQ(void)
  * standard do_IRQ(), it will be called with irq numbers in the range
  * from IRQ_AUTO_1 - IRQ_AUTO_7.
  */
-void __init m68k_setup_auto_interrupt(void (*handler)(unsigned int, struct pt_regs *))
+void __init m68k_setup_auto_interrupt(int (*handler)(unsigned int,
+						     struct pt_regs *))
 {
 	if (handler)
 		*auto_irqhandler_fixup = (u32)handler;
@@ -162,8 +163,12 @@ unsigned int irq_canonicalize(unsigned int irq)
 EXPORT_SYMBOL(irq_canonicalize);
 
 
-asmlinkage void handle_badint(struct pt_regs *regs)
+asmlinkage int handle_badint(struct pt_regs *regs)
 {
+	int nested = regs->sr & ~ALLOWINT;
+
 	atomic_inc(&irq_err_count);
 	pr_warn("unexpected interrupt from %u\n", regs->vector);
+
+	return nested;
 }
diff --git a/arch/m68k/platform/68000/entry.S b/arch/m68k/platform/68000/entry.S
index afc49235c3c7..b32c6c423c90 100644
--- a/arch/m68k/platform/68000/entry.S
+++ b/arch/m68k/platform/68000/entry.S
@@ -221,7 +221,7 @@ ret_from_interrupt:
 	 * Only the last interrupt leaving the kernel goes through the
 	 * various exception return checks.
 	 */
-	cmpl	#0, %d0
+	tstl	%d0
 	jeq	ret_from_exception
 	RESTORE_ALL
 
diff --git a/arch/m68k/platform/68000/ints.c b/arch/m68k/platform/68000/ints.c
index fadd2b9ff0d9..a6c05a60a9e5 100644
--- a/arch/m68k/platform/68000/ints.c
+++ b/arch/m68k/platform/68000/ints.c
@@ -76,7 +76,7 @@ asmlinkage irqreturn_t inthandler7(void);
  */
 int process_int(int vec, struct pt_regs *fp)
 {
-	int irq, mask, nested =fp->sr & ~ALLOWINT;
+	int irq, mask, nested = fp->sr & ~ALLOWINT;
 	unsigned long pend = ISR;
 
 	while (pend) {
diff --git a/arch/m68k/platform/68360/entry.S b/arch/m68k/platform/68360/entry.S
index 795abe505c35..e818794edfa7 100644
--- a/arch/m68k/platform/68360/entry.S
+++ b/arch/m68k/platform/68360/entry.S
@@ -133,14 +133,14 @@ inthandler:
 	movel	%sp,%sp@-
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
 	jbsr	do_IRQ			/*  process the IRQ, returns nest level */
-     	addql	#8,%sp			/*  pop parameters off stack*/
+	addql	#8,%sp			/*  pop parameters off stack*/
 
 ret_from_interrupt:
 	/*
 	 * Only the last interrupt leaving the kernel goes through the
 	 * various exception return checks.
 	 */
-	cmpl	#0, %d0
+	tstl	%d0
 	jeq	ret_from_exception
 	RESTORE_ALL
 
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 179aee3a6498..a7525f189264 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -33,7 +33,7 @@
  *
 */
 
-static void q40_irq_handler(unsigned int, struct pt_regs *fp);
+static int q40_irq_handler(unsigned int, struct pt_regs *fp);
 static void q40_irq_enable(struct irq_data *data);
 static void q40_irq_disable(struct irq_data *data);
 
-- 
1.7.9.5

Gr{oetje,eeting}s,

						Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
							    -- Linus Torvalds

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-04 19:44                     ` Geert Uytterhoeven
  (?)
@ 2013-11-06 17:23                     ` Thomas Gleixner
  2013-11-07 14:12                       ` Geert Uytterhoeven
  -1 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-06 17:23 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

Geert,

On Mon, 4 Nov 2013, Geert Uytterhoeven wrote:
> But only if you look at "[m68k] IRQ: add handle_polled_irq() for timer
> based soft interrupt" (http://www.spinics.net/lists/linux-m68k/msg05889.html)
> first ;-)

Done. Thanks for the reminder!
 
> Below is a patch with some fixups, on top of your two patches.
> 
> Unfortunately it still hangs somewhere after mounting the root filesystem.
> 
> Using this debug code for do_IRQ():
> 
> diff --git a/arch/m68k/kernel/irq.c b/arch/m68k/kernel/irq.c
> index aaf7b15fad41..da9687803d98 100644
> --- a/arch/m68k/kernel/irq.c
> +++ b/arch/m68k/kernel/irq.c
> @@ -22,11 +22,21 @@ asmlinkage int do_IRQ(int irq, struct pt_regs *regs)
>  	struct pt_regs *oldregs = set_irq_regs(regs);
>  	int nested = regs->sr & ~ALLOWINT;
>  
> +static int nesting;
> +const char prefix[] = "                ";
> +unsigned long flags;
> +local_irq_save(flags);
> +nesting++;
> +printk("# %sirq %d nested %d\n", &prefix[16-2*nesting], irq, nested);
> +local_irq_restore(flags);
>  	irq_enter();
>  	generic_handle_irq(irq);
>  	irq_exit_nested(nested);
>  
>  	set_irq_regs(oldregs);
> +local_irq_save(flags);
> +nesting--;
> +local_irq_restore(flags);
>  	return nested;
>  }
>  
> I get output like
> 
> #   irq 15 nested 0
> #     irq 15 nested 1024
> 
> irq 15 while irq 15 in progress??

Huch, that's odd.
 
> With similar debug code on the old working do_IRQ(), I get
>   - slightly less deep nesting,
>   - do_IRQ() is never re-entered with the same irq number.
> 
> Also note that the value of "nested" doesn't match the indentation level,
> which depends on my own bookkeeping using "nesting".

Well, nested is just an indicator. It's not the nest level.

      nested = pt->sr & ~ALLOWINT;
i.e.:
      nested = pt->sr & 0x0700;

So in the case above nested is 0x400
 
> Anyone with an idea where it's going wrong?

The original code does:

    add_preempt_count(HARDIRQ_OFFSET);

    do_IRQ()
	irq_enter();
	  add_preempt_count(HARDIRQ_OFFSET);

	handle_irq();

	irq_exit();
	    local_irq_disable();
	    sub_preempt_count(HARDIRQ_OFFSET);

    sub_preempt_count(HARDIRQ_OFFSET);
    
    /* Check for nested irq */
    if (in_hardirq())
       reti();

    /* Check for nested irq again */
    if (pt->sr & ~ALLOWINT != 0)
       reti();

    do_softirq();
       ....
    ret_from_exception();

With the patches in place it looks like this:

     do_IRQ()
	nested = pt->sr & ~ALLOWINT;

	irq_enter();
	  add_preempt_count(HARDIRQ_OFFSET);

	handle_irq();

	irq_exit_nested(nested);
	    local_irq_disable();
	    sub_preempt_count(HARDIRQ_OFFSET);
	    if (!nested && !in_hardirq())
	       do_softirq()
		  
	return nested;

      if (nested)
      	 reti();

      ret_from_exception();

So all it does essentially is to move the softirq invocation in the
non nested case a tad earlier. I'm really puzzled as I can't spot the
point where this change makes a real difference.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-06 17:23                     ` Thomas Gleixner
@ 2013-11-07 14:12                       ` Geert Uytterhoeven
  2013-11-07 16:39                         ` Thomas Gleixner
  0 siblings, 1 reply; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-11-07 14:12 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

Hi Thomas,

On Wed, Nov 6, 2013 at 6:23 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
>> Also note that the value of "nested" doesn't match the indentation level,
>> which depends on my own bookkeeping using "nesting".
>
> Well, nested is just an indicator. It's not the nest level.

I know, the only thing that matters is whether it's zero or not.
But it should always be zero if there's no nesting, and non-zero if there
is, right?

So:

#   irq 13 nested 1024

nested should be 0 here.

#   irq 4 nested 0

ok

#     irq 13 nested 1024

ok (two extra spaces in front of "irq").

#     irq 4 nested 0

nested should be non-zero here.

>       nested = pt->sr & ~ALLOWINT;
> i.e.:
>       nested = pt->sr & 0x0700;
>
> So in the case above nested is 0x400
>
>> Anyone with an idea where it's going wrong?
>
> The original code does:
>
>     add_preempt_count(HARDIRQ_OFFSET);
>
>     do_IRQ()
>         irq_enter();
>           add_preempt_count(HARDIRQ_OFFSET);
>
>         handle_irq();
>
>         irq_exit();
>             local_irq_disable();
>             sub_preempt_count(HARDIRQ_OFFSET);
>
>     sub_preempt_count(HARDIRQ_OFFSET);
>
>     /* Check for nested irq */
>     if (in_hardirq())
>        reti();
>
>     /* Check for nested irq again */
>     if (pt->sr & ~ALLOWINT != 0)
>        reti();
>
>     do_softirq();
>        ....
>     ret_from_exception();
>
> With the patches in place it looks like this:
>
>      do_IRQ()
>         nested = pt->sr & ~ALLOWINT;
>
>         irq_enter();
>           add_preempt_count(HARDIRQ_OFFSET);
>
>         handle_irq();
>
>         irq_exit_nested(nested);
>             local_irq_disable();
>             sub_preempt_count(HARDIRQ_OFFSET);
>             if (!nested && !in_hardirq())
>                do_softirq()
>
>         return nested;
>
>       if (nested)
>          reti();
>
>       ret_from_exception();
>
> So all it does essentially is to move the softirq invocation in the
> non nested case a tad earlier. I'm really puzzled as I can't spot the
> point where this change makes a real difference.

Yes, that's also my understanding.
But I can't spot it neither :-(

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-07 14:12                       ` Geert Uytterhoeven
@ 2013-11-07 16:39                         ` Thomas Gleixner
  2013-11-10  8:49                           ` Michael Schmitz
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-07 16:39 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Andreas Schwab, LKML, Peter Zijlstra, Ingo Molnar, Linux-Arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Linux/m68k

On Thu, 7 Nov 2013, Geert Uytterhoeven wrote:

> Hi Thomas,
> 
> On Wed, Nov 6, 2013 at 6:23 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >> Also note that the value of "nested" doesn't match the indentation level,
> >> which depends on my own bookkeeping using "nesting".
> >
> > Well, nested is just an indicator. It's not the nest level.
> 
> I know, the only thing that matters is whether it's zero or not.
> But it should always be zero if there's no nesting, and non-zero if there
> is, right?
> 
> So:
> 
> #   irq 13 nested 1024
> 
> nested should be 0 here.
> 
> #   irq 4 nested 0
> 
> ok
> 
> #     irq 13 nested 1024
> 
> ok (two extra spaces in front of "irq").
> 
> #     irq 4 nested 0
> 
> nested should be non-zero here.

Hmm. The softirq code reenables interrupts unconditionally. So when an
interrupt hits there SR on stack has the bits cleared. You could
verify that by checking in_serving_softirq() at the entry to
do_IRQ(). That could also explain the irq 4 nests in irq 4 issue. You
can't observe that on the original code as the softirq invocation and
therefor the interrupt enable happens outside of do_IRQ().

Though that does not explain the non nested case where nested is !=
0. But it looks like that irq 13 has a higher level than 4:

> #     irq 13 nested 1024
>
> ok (two extra spaces in front of "irq").

So it could actually be the following:

   irq X arrives, SR I2/1/0 is set to 4

   Now before we reach do_IRQ()
   
	irq 13 arrives and interrupts irq X as it has a higher level

   	Your nest accounting shows 0, but the SR says nested, which is
   	actually the correct state.

Is there an easy to setup/use emulator around on which I could try to
dig into that myself?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-07 16:39                         ` Thomas Gleixner
@ 2013-11-10  8:49                           ` Michael Schmitz
  2013-11-10  9:12                             ` Geert Uytterhoeven
  0 siblings, 1 reply; 80+ messages in thread
From: Michael Schmitz @ 2013-11-10  8:49 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, Linux/m68k, Geert Uytterhoeven

Thomas,

> Is there an easy to setup/use emulator around on which I could try to
> dig into that myself?

I believe Geert uses ARAnyM for his tests of m68k kernels on emulators 
- it is reasonably easy to set up and use. I've used it to debug 
problems we had with the SLUB allocator two years ago.

Emulation is for Atari Falcon 040 hardware, things like interrupt 
priorities ought to be reproduced faithfully.

Not that it would hurt to try Geert's last non-booting kernel on true 
hardware - either send the kernel image or a patch to apply to your 
current tree please, Geert.

Regards,

	Michael


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-10  8:49                           ` Michael Schmitz
@ 2013-11-10  9:12                             ` Geert Uytterhoeven
  2013-11-11 14:11                               ` Thomas Gleixner
  0 siblings, 1 reply; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-11-10  9:12 UTC (permalink / raw)
  To: Michael Schmitz; +Cc: Thomas Gleixner, LKML, Linux/m68k

On Sun, Nov 10, 2013 at 9:49 AM, Michael Schmitz
<schmitz@biophys.uni-duesseldorf.de> wrote:
>> Is there an easy to setup/use emulator around on which I could try to
>> dig into that myself?
>
> I believe Geert uses ARAnyM for his tests of m68k kernels on emulators - it
> is reasonably easy to set up and use. I've used it to debug problems we had
> with the SLUB allocator two years ago.

Indeed. ARAnyM is the way to go.

> Not that it would hurt to try Geert's last non-booting kernel on true
> hardware - either send the kernel image or a patch to apply to your current
> tree please, Geert.

Patches sent by private email.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-10  9:12                             ` Geert Uytterhoeven
@ 2013-11-11 14:11                               ` Thomas Gleixner
  2013-11-11 19:34                                 ` Thomas Gleixner
  2013-11-11 19:42                                 ` Andreas Schwab
  0 siblings, 2 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-11 14:11 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: Michael Schmitz, LKML, Linux/m68k

On Sun, 10 Nov 2013, Geert Uytterhoeven wrote:

> On Sun, Nov 10, 2013 at 9:49 AM, Michael Schmitz
> <schmitz@biophys.uni-duesseldorf.de> wrote:
> >> Is there an easy to setup/use emulator around on which I could try to
> >> dig into that myself?
> >
> > I believe Geert uses ARAnyM for his tests of m68k kernels on emulators - it
> > is reasonably easy to set up and use. I've used it to debug problems we had
> > with the SLUB allocator two years ago.
> 
> Indeed. ARAnyM is the way to go.

Ok. Got it running and looked a bit deeper. I haven't yet found the
root cause, but there are quite some fishy things going on. Adding
enough debug printks makes the thing boot. Aside of that there seems
to be a violation of the 68k interrupt model.

The 68k interrupt handling allows only interrupts which have an higher
level than the value of the interrupt priority mask in SR. Further the
cpu sets the SR priority mask on interrupt entry to the level of the
interrupt which is serviced.

So now with aranym I can see a different behaviour. I just added the
debug patch below to a vanilla 3.12.

And I can see ever repeating

  IRQ 13 flags 0x400 regs->sr 0x400

with a few

  IRQ 15 flags 0x400 regs->sr 0x400

sprinkeled in.

Not what you would expect, right?

Thanks,

	tglx

Index: linux-2.6/arch/m68k/kernel/irq.c
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/irq.c
+++ linux-2.6/arch/m68k/kernel/irq.c
@@ -20,6 +20,12 @@
 asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
 {
 	struct pt_regs *oldregs = set_irq_regs(regs);
+	unsigned long nested = regs->sr & ~ALLOWINT;
+	unsigned long flags = arch_local_save_flags() & ~ALLOWINT;
+
+	if (nested >= flags)
+		printk(KERN_ERR "IRQ %d flags 0x%lx regs->sr 0x%lx\n",
+		       irq, flags, nested);
 
 	irq_enter();
 	generic_handle_irq(irq);

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 14:11                               ` Thomas Gleixner
@ 2013-11-11 19:34                                 ` Thomas Gleixner
  2013-11-11 20:52                                   ` Thomas Gleixner
  2013-11-12 14:09                                   ` [patch 1/6] hardirq: Make hardirq bits generic Geert Uytterhoeven
  2013-11-11 19:42                                 ` Andreas Schwab
  1 sibling, 2 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-11 19:34 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: Michael Schmitz, LKML, Linux/m68k

On Mon, 11 Nov 2013, Thomas Gleixner wrote:
> Not what you would expect, right?

Finally found the issue. The patch below fixes the problem here. The
little missing detail is, that I zapped GET_CURRENT() assuming blindly
that this is only needed for the preempt_count hackery. But in fact
the world and some more depends on it which leads to interesting
explosions.

Thanks,

	tglx
----
Index: linux-2.6/arch/m68k/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/entry.S
+++ linux-2.6/arch/m68k/kernel/entry.S
@@ -274,6 +274,7 @@ do_delayed_trace:
 
 ENTRY(auto_inthandler)
 	SAVE_ALL_INT
+	GET_CURRENT(%d0)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 	subw	#VEC_SPUR,%d0
@@ -297,6 +298,7 @@ ret_from_interrupt:
 
 ENTRY(user_inthandler)
 	SAVE_ALL_INT
+	GET_CURRENT(%d0)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 user_irqvec_fixup = . + 2
@@ -313,6 +315,7 @@ user_irqvec_fixup = . + 2
 
 ENTRY(bad_inthandler)
 	SAVE_ALL_INT
+	GET_CURRENT(%d0)
 
 	movel	%sp,%sp@-
 	jsr	handle_badint


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 14:11                               ` Thomas Gleixner
  2013-11-11 19:34                                 ` Thomas Gleixner
@ 2013-11-11 19:42                                 ` Andreas Schwab
  2013-11-12  9:18                                   ` Thomas Gleixner
  1 sibling, 1 reply; 80+ messages in thread
From: Andreas Schwab @ 2013-11-11 19:42 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Geert Uytterhoeven, Michael Schmitz, LKML, Linux/m68k

Thomas Gleixner <tglx@linutronix.de> writes:

> And I can see ever repeating
>
>   IRQ 13 flags 0x400 regs->sr 0x400
>
> with a few
>
>   IRQ 15 flags 0x400 regs->sr 0x400
>
> sprinkeled in.
>
> Not what you would expect, right?

If you configured for ATARI only, then ALLOWINT filters out the I1 bit,
which makes irq level 4 and level 6 look the same (all MFP interrupts
are at level 6).

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 19:34                                 ` Thomas Gleixner
@ 2013-11-11 20:52                                   ` Thomas Gleixner
  2013-11-12  6:56                                       ` Michael Schmitz
                                                       ` (2 more replies)
  2013-11-12 14:09                                   ` [patch 1/6] hardirq: Make hardirq bits generic Geert Uytterhoeven
  1 sibling, 3 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-11 20:52 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: Michael Schmitz, LKML, Linux/m68k

On Mon, 11 Nov 2013, Thomas Gleixner wrote:

> On Mon, 11 Nov 2013, Thomas Gleixner wrote:
> > Not what you would expect, right?
> 
> Finally found the issue. The patch below fixes the problem here. The
> little missing detail is, that I zapped GET_CURRENT() assuming blindly
> that this is only needed for the preempt_count hackery. But in fact
> the world and some more depends on it which leads to interesting
> explosions.

Some more thoughts on this.

The whole nesting check in the exisiting low level entry code and what
I tried to resemble with the irq_exit_nested() is pretty pointless.

Let's look at auto_inthandler and ret_from_exception

ENTRY(auto_inthandler)
	SAVE_ALL_INT
	GET_CURRENT(%d0)
	movel	%d0,%a1
	addqb	#1,%a1@(TINFO_PREEMPT+1)
					|  put exception # in d0
	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
	subw	#VEC_SPUR,%d0

	movel	%sp,%sp@-
	movel	%d0,%sp@-		|  put vector # on stack
auto_irqhandler_fixup = . + 2
	jsr	do_IRQ			|  process the IRQ
	addql	#8,%sp			|  pop parameters off stack

ret_from_interrupt:
	movel	%curptr@(TASK_STACK),%a1
	subqb	#1,%a1@(TINFO_PREEMPT+1)
	jeq	ret_from_last_interrupt
2:	RESTORE_ALL

	ALIGN
ret_from_last_interrupt:
	moveq	#(~ALLOWINT>>8)&0xff,%d0
	andb	%sp@(PT_OFF_SR),%d0
	jne	2b

	/* check if we need to do software interrupts */
	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
	jeq	.Lret_from_exception
	pea	ret_from_exception
	jra	do_softirq


ENTRY(ret_from_exception)
.Lret_from_exception:
	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
	bnes	1f			| if so, skip resched, signals
	....
1:	RESTORE_ALL

So in every interrupt exit path we check:

   1) Whether the hardirq part of preempt_count is zero

   2) Whether the interrupt prio mask of SR on stack is zero

and if we finally reach ret_from_exception we have the final check:

   3) whether we return to kernel or user space.

And this final check is the only one which matters, really.

If you look at the probability of the first two checks catching
anything, then it's pretty low. Most interrupts returns go through
ret_from_exception. Yes, I added counters which prove that at least on
the aranym, but I doubt that it will make a real difference if you run
this on real hardware.

So what's the point of having these checks in the hotpath? The patch
below against 3.12 vanilla works nicely on the aranym and I don't see
a reason why this extra hackery is necessary at all. It's just code
bloat in a hotpath.

Now the only valid concern might be do_softirq itself, but that's
pointless as well. If the softirq is interrupted, then we do not
invoke it again. If the nested interrupt happens before irq_exit()
actually disables interrupts, then we won't invoke it either as the
hardirq part of preempt_count is still not zero.

As a side note: The do_softirq calls in the platform/68xxx entry
pathes are just copied leftovers as well. Both entry code pathes are
not fiddling with the preempt count and both call do_IRQ() which will
call irq_exit() at the end which will invoke do_softirq(), so the
check for more softirqs in the irq return path is just pointless.

Thanks,

	tglx
---
 kernel/entry.S         |   40 ++++------------------------------------
 kernel/ints.c          |    6 ------
 platform/68000/entry.S |   33 ++++++++-------------------------
 platform/68360/entry.S |   24 +++---------------------
 4 files changed, 15 insertions(+), 88 deletions(-)


Index: linux-2.6/arch/m68k/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/entry.S
+++ linux-2.6/arch/m68k/kernel/entry.S
@@ -45,7 +45,7 @@
 .globl system_call, buserr, trap, resume
 .globl sys_call_table
 .globl __sys_fork, __sys_clone, __sys_vfork
-.globl ret_from_interrupt, bad_interrupt
+.globl bad_interrupt
 .globl auto_irqhandler_fixup
 .globl user_irqvec_fixup
 
@@ -275,8 +275,6 @@ do_delayed_trace:
 ENTRY(auto_inthandler)
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 	subw	#VEC_SPUR,%d0
@@ -286,32 +284,13 @@ ENTRY(auto_inthandler)
 auto_irqhandler_fixup = . + 2
 	jsr	do_IRQ			|  process the IRQ
 	addql	#8,%sp			|  pop parameters off stack
-
-ret_from_interrupt:
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-2:	RESTORE_ALL
-
-	ALIGN
-ret_from_last_interrupt:
-	moveq	#(~ALLOWINT>>8)&0xff,%d0
-	andb	%sp@(PT_OFF_SR),%d0
-	jne	2b
-
-	/* check if we need to do software interrupts */
-	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
-	jeq	.Lret_from_exception
-	pea	ret_from_exception
-	jra	do_softirq
+	jra	ret_from_exception
 
 /* Handler for user defined interrupt vectors */
 
 ENTRY(user_inthandler)
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 user_irqvec_fixup = . + 2
@@ -321,29 +300,18 @@ user_irqvec_fixup = . + 2
 	movel	%d0,%sp@-		|  put vector # on stack
 	jsr	do_IRQ			|  process the IRQ
 	addql	#8,%sp			|  pop parameters off stack
-
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
+	jra	ret_from_exception
 
 /* Handler for uninitialized and spurious interrupts */
 
 ENTRY(bad_inthandler)
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 
 	movel	%sp,%sp@-
 	jsr	handle_badint
 	addql	#4,%sp
-
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
-
+	jra	ret_from_exception
 
 resume:
 	/*
Index: linux-2.6/arch/m68k/kernel/ints.c
===================================================================
--- linux-2.6.orig/arch/m68k/kernel/ints.c
+++ linux-2.6/arch/m68k/kernel/ints.c
@@ -58,12 +58,6 @@ void __init init_IRQ(void)
 {
 	int i;
 
-	/* assembly irq entry code relies on this... */
-	if (HARDIRQ_MASK != 0x00ff0000) {
-		extern void hardirq_mask_is_broken(void);
-		hardirq_mask_is_broken();
-	}
-
 	for (i = IRQ_AUTO_1; i <= IRQ_AUTO_7; i++)
 		irq_set_chip_and_handler(i, &auto_irq_chip, handle_simple_irq);
 
Index: linux-2.6/arch/m68k/platform/68000/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/platform/68000/entry.S
+++ linux-2.6/arch/m68k/platform/68000/entry.S
@@ -27,7 +27,6 @@
 .globl ret_from_exception
 .globl ret_from_signal
 .globl sys_call_table
-.globl ret_from_interrupt
 .globl bad_interrupt
 .globl inthandler1
 .globl inthandler2
@@ -137,7 +136,7 @@ inthandler1:
 	movel	#65,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler2:
 	SAVE_ALL_INT
@@ -148,7 +147,7 @@ inthandler2:
 	movel	#66,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler3:
 	SAVE_ALL_INT
@@ -159,7 +158,7 @@ inthandler3:
 	movel	#67,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler4:
 	SAVE_ALL_INT
@@ -170,7 +169,7 @@ inthandler4:
 	movel	#68,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler5:
 	SAVE_ALL_INT
@@ -181,7 +180,7 @@ inthandler5:
 	movel	#69,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler6:
 	SAVE_ALL_INT
@@ -192,7 +191,7 @@ inthandler6:
 	movel	#70,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler7:
 	SAVE_ALL_INT
@@ -203,7 +202,7 @@ inthandler7:
 	movel	#71,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler:
 	SAVE_ALL_INT
@@ -214,23 +213,7 @@ inthandler:
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
-
-ret_from_interrupt:
-	jeq	1f
-2:
-	RESTORE_ALL
-1:
-	moveb	%sp@(PT_OFF_SR), %d0
-	and	#7, %d0
-	jhi	2b
-
-	/* check if we need to do software interrupts */
-	jeq	ret_from_exception
-
-	pea	ret_from_exception
-	jra	do_softirq
-
+	bra	ret_from_exception
 
 /*
  * Handler for uninitialized and spurious interrupts.
Index: linux-2.6/arch/m68k/platform/68360/entry.S
===================================================================
--- linux-2.6.orig/arch/m68k/platform/68360/entry.S
+++ linux-2.6/arch/m68k/platform/68360/entry.S
@@ -29,7 +29,6 @@
 .globl ret_from_exception
 .globl ret_from_signal
 .globl sys_call_table
-.globl ret_from_interrupt
 .globl bad_interrupt
 .globl inthandler
 
@@ -132,26 +131,9 @@ inthandler:
 
 	movel	%sp,%sp@-
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
-	jbsr	do_IRQ			/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
-
-ret_from_interrupt:
-	jeq	1f
-2:
-	RESTORE_ALL
-1:
-	moveb	%sp@(PT_OFF_SR), %d0
-	and	#7, %d0
-	jhi	2b
-	/* check if we need to do software interrupts */
-
-	movel	irq_stat+CPUSTAT_SOFTIRQ_PENDING,%d0
-	jeq	ret_from_exception
-
-	pea	ret_from_exception
-	jra	do_softirq
-
+	jbsr	do_IRQ			/*  process the IRQ */
+	addql	#8,%sp			/*  pop parameters off stack*/
+	jra	ret_from_exception
 
 /*
  * Handler for uninitialized and spurious interrupts.





^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 20:52                                   ` Thomas Gleixner
@ 2013-11-12  6:56                                       ` Michael Schmitz
  2013-11-12 15:08                                     ` Geert Uytterhoeven
  2013-11-13 19:42                                     ` [tip:irq/urgent] m68k: Simplify low level interrupt handling code tip-bot for Thomas Gleixner
  2 siblings, 0 replies; 80+ messages in thread
From: Michael Schmitz @ 2013-11-12  6:56 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, Linux/m68k, Geert Uytterhoeven

Thomas,

>> Finally found the issue. The patch below fixes the problem here. The
>> little missing detail is, that I zapped GET_CURRENT() assuming blindly
>> that this is only needed for the preempt_count hackery. But in fact
>> the world and some more depends on it which leads to interesting
>> explosions.

Thanks for debugging this - I won't speculate on why we attempt to 
handle softirqs explicitly, as I have only a passing acquaintance with 
this code, many years back.

>
> Some more thoughts on this.
>
> The whole nesting check in the exisiting low level entry code and what
> I tried to resemble with the irq_exit_nested() is pretty pointless.
>
> Let's look at auto_inthandler and ret_from_exception
>
> ENTRY(auto_inthandler)
> 	SAVE_ALL_INT
> 	GET_CURRENT(%d0)
> 	movel	%d0,%a1
> 	addqb	#1,%a1@(TINFO_PREEMPT+1)
> 					|  put exception # in d0
> 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
> 	subw	#VEC_SPUR,%d0
>
> 	movel	%sp,%sp@-
> 	movel	%d0,%sp@-		|  put vector # on stack
> auto_irqhandler_fixup = . + 2
> 	jsr	do_IRQ			|  process the IRQ
> 	addql	#8,%sp			|  pop parameters off stack
>
> ret_from_interrupt:
> 	movel	%curptr@(TASK_STACK),%a1
> 	subqb	#1,%a1@(TINFO_PREEMPT+1)
> 	jeq	ret_from_last_interrupt
> 2:	RESTORE_ALL
>
> 	ALIGN
> ret_from_last_interrupt:
> 	moveq	#(~ALLOWINT>>8)&0xff,%d0
> 	andb	%sp@(PT_OFF_SR),%d0
> 	jne	2b
>
> 	/* check if we need to do software interrupts */
> 	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
> 	jeq	.Lret_from_exception
> 	pea	ret_from_exception
> 	jra	do_softirq
>
>
> ENTRY(ret_from_exception)
> .Lret_from_exception:
> 	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
> 	bnes	1f			| if so, skip resched, signals
> 	....
> 1:	RESTORE_ALL
>
> So in every interrupt exit path we check:
>
>    1) Whether the hardirq part of preempt_count is zero
>
>    2) Whether the interrupt prio mask of SR on stack is zero
>
> and if we finally reach ret_from_exception we have the final check:
>
>    3) whether we return to kernel or user space.
>
> And this final check is the only one which matters, really.
>
> If you look at the probability of the first two checks catching
> anything, then it's pretty low. Most interrupts returns go through
> ret_from_exception. Yes, I added counters which prove that at least on
> the aranym, but I doubt that it will make a real difference if you run
> this on real hardware.

I'm happy to try that (indeed, verify your patch works on Atari 
hardware first) - I trust the patch is relative to m68k git, not your 
previous patches?

Cheers,


	Michael


>
> So what's the point of having these checks in the hotpath? The patch
> below against 3.12 vanilla works nicely on the aranym and I don't see
> a reason why this extra hackery is necessary at all. It's just code
> bloat in a hotpath.
>
> Now the only valid concern might be do_softirq itself, but that's
> pointless as well. If the softirq is interrupted, then we do not
> invoke it again. If the nested interrupt happens before irq_exit()
> actually disables interrupts, then we won't invoke it either as the
> hardirq part of preempt_count is still not zero.
>
> As a side note: The do_softirq calls in the platform/68xxx entry
> pathes are just copied leftovers as well. Both entry code pathes are
> not fiddling with the preempt count and both call do_IRQ() which will
> call irq_exit() at the end which will invoke do_softirq(), so the
> check for more softirqs in the irq return path is just pointless.
>
> Thanks,
>
> 	tglx
> ---
>  kernel/entry.S         |   40 ++++------------------------------------
>  kernel/ints.c          |    6 ------
>  platform/68000/entry.S |   33 ++++++++-------------------------
>  platform/68360/entry.S |   24 +++---------------------
>  4 files changed, 15 insertions(+), 88 deletions(-)
>
>
> Index: linux-2.6/arch/m68k/kernel/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/m68k/kernel/entry.S
> +++ linux-2.6/arch/m68k/kernel/entry.S
> @@ -45,7 +45,7 @@
>  .globl system_call, buserr, trap, resume
>  .globl sys_call_table
>  .globl __sys_fork, __sys_clone, __sys_vfork
> -.globl ret_from_interrupt, bad_interrupt
> +.globl bad_interrupt
>  .globl auto_irqhandler_fixup
>  .globl user_irqvec_fixup
>
> @@ -275,8 +275,6 @@ do_delayed_trace:
>  ENTRY(auto_inthandler)
>  	SAVE_ALL_INT
>  	GET_CURRENT(%d0)
> -	movel	%d0,%a1
> -	addqb	#1,%a1@(TINFO_PREEMPT+1)
>  					|  put exception # in d0
>  	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
>  	subw	#VEC_SPUR,%d0
> @@ -286,32 +284,13 @@ ENTRY(auto_inthandler)
>  auto_irqhandler_fixup = . + 2
>  	jsr	do_IRQ			|  process the IRQ
>  	addql	#8,%sp			|  pop parameters off stack
> -
> -ret_from_interrupt:
> -	movel	%curptr@(TASK_STACK),%a1
> -	subqb	#1,%a1@(TINFO_PREEMPT+1)
> -	jeq	ret_from_last_interrupt
> -2:	RESTORE_ALL
> -
> -	ALIGN
> -ret_from_last_interrupt:
> -	moveq	#(~ALLOWINT>>8)&0xff,%d0
> -	andb	%sp@(PT_OFF_SR),%d0
> -	jne	2b
> -
> -	/* check if we need to do software interrupts */
> -	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
> -	jeq	.Lret_from_exception
> -	pea	ret_from_exception
> -	jra	do_softirq
> +	jra	ret_from_exception
>
>  /* Handler for user defined interrupt vectors */
>
>  ENTRY(user_inthandler)
>  	SAVE_ALL_INT
>  	GET_CURRENT(%d0)
> -	movel	%d0,%a1
> -	addqb	#1,%a1@(TINFO_PREEMPT+1)
>  					|  put exception # in d0
>  	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
>  user_irqvec_fixup = . + 2
> @@ -321,29 +300,18 @@ user_irqvec_fixup = . + 2
>  	movel	%d0,%sp@-		|  put vector # on stack
>  	jsr	do_IRQ			|  process the IRQ
>  	addql	#8,%sp			|  pop parameters off stack
> -
> -	movel	%curptr@(TASK_STACK),%a1
> -	subqb	#1,%a1@(TINFO_PREEMPT+1)
> -	jeq	ret_from_last_interrupt
> -	RESTORE_ALL
> +	jra	ret_from_exception
>
>  /* Handler for uninitialized and spurious interrupts */
>
>  ENTRY(bad_inthandler)
>  	SAVE_ALL_INT
>  	GET_CURRENT(%d0)
> -	movel	%d0,%a1
> -	addqb	#1,%a1@(TINFO_PREEMPT+1)
>
>  	movel	%sp,%sp@-
>  	jsr	handle_badint
>  	addql	#4,%sp
> -
> -	movel	%curptr@(TASK_STACK),%a1
> -	subqb	#1,%a1@(TINFO_PREEMPT+1)
> -	jeq	ret_from_last_interrupt
> -	RESTORE_ALL
> -
> +	jra	ret_from_exception
>
>  resume:
>  	/*
> Index: linux-2.6/arch/m68k/kernel/ints.c
> ===================================================================
> --- linux-2.6.orig/arch/m68k/kernel/ints.c
> +++ linux-2.6/arch/m68k/kernel/ints.c
> @@ -58,12 +58,6 @@ void __init init_IRQ(void)
>  {
>  	int i;
>
> -	/* assembly irq entry code relies on this... */
> -	if (HARDIRQ_MASK != 0x00ff0000) {
> -		extern void hardirq_mask_is_broken(void);
> -		hardirq_mask_is_broken();
> -	}
> -
>  	for (i = IRQ_AUTO_1; i <= IRQ_AUTO_7; i++)
>  		irq_set_chip_and_handler(i, &auto_irq_chip, handle_simple_irq);
>
> Index: linux-2.6/arch/m68k/platform/68000/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/m68k/platform/68000/entry.S
> +++ linux-2.6/arch/m68k/platform/68000/entry.S
> @@ -27,7 +27,6 @@
>  .globl ret_from_exception
>  .globl ret_from_signal
>  .globl sys_call_table
> -.globl ret_from_interrupt
>  .globl bad_interrupt
>  .globl inthandler1
>  .globl inthandler2
> @@ -137,7 +136,7 @@ inthandler1:
>  	movel	#65,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler2:
>  	SAVE_ALL_INT
> @@ -148,7 +147,7 @@ inthandler2:
>  	movel	#66,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler3:
>  	SAVE_ALL_INT
> @@ -159,7 +158,7 @@ inthandler3:
>  	movel	#67,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler4:
>  	SAVE_ALL_INT
> @@ -170,7 +169,7 @@ inthandler4:
>  	movel	#68,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler5:
>  	SAVE_ALL_INT
> @@ -181,7 +180,7 @@ inthandler5:
>  	movel	#69,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler6:
>  	SAVE_ALL_INT
> @@ -192,7 +191,7 @@ inthandler6:
>  	movel	#70,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler7:
>  	SAVE_ALL_INT
> @@ -203,7 +202,7 @@ inthandler7:
>  	movel	#71,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler:
>  	SAVE_ALL_INT
> @@ -214,23 +213,7 @@ inthandler:
>  	movel	%d0,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> -
> -ret_from_interrupt:
> -	jeq	1f
> -2:
> -	RESTORE_ALL
> -1:
> -	moveb	%sp@(PT_OFF_SR), %d0
> -	and	#7, %d0
> -	jhi	2b
> -
> -	/* check if we need to do software interrupts */
> -	jeq	ret_from_exception
> -
> -	pea	ret_from_exception
> -	jra	do_softirq
> -
> +	bra	ret_from_exception
>
>  /*
>   * Handler for uninitialized and spurious interrupts.
> Index: linux-2.6/arch/m68k/platform/68360/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/m68k/platform/68360/entry.S
> +++ linux-2.6/arch/m68k/platform/68360/entry.S
> @@ -29,7 +29,6 @@
>  .globl ret_from_exception
>  .globl ret_from_signal
>  .globl sys_call_table
> -.globl ret_from_interrupt
>  .globl bad_interrupt
>  .globl inthandler
>
> @@ -132,26 +131,9 @@ inthandler:
>
>  	movel	%sp,%sp@-
>  	movel	%d0,%sp@- 		/*  put vector # on stack*/
> -	jbsr	do_IRQ			/*  process the IRQ*/
> -3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> -
> -ret_from_interrupt:
> -	jeq	1f
> -2:
> -	RESTORE_ALL
> -1:
> -	moveb	%sp@(PT_OFF_SR), %d0
> -	and	#7, %d0
> -	jhi	2b
> -	/* check if we need to do software interrupts */
> -
> -	movel	irq_stat+CPUSTAT_SOFTIRQ_PENDING,%d0
> -	jeq	ret_from_exception
> -
> -	pea	ret_from_exception
> -	jra	do_softirq
> -
> +	jbsr	do_IRQ			/*  process the IRQ */
> +	addql	#8,%sp			/*  pop parameters off stack*/
> +	jra	ret_from_exception
>
>  /*
>   * Handler for uninitialized and spurious interrupts.
>
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-m68k" 
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
@ 2013-11-12  6:56                                       ` Michael Schmitz
  0 siblings, 0 replies; 80+ messages in thread
From: Michael Schmitz @ 2013-11-12  6:56 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, Linux/m68k, Geert Uytterhoeven

Thomas,

>> Finally found the issue. The patch below fixes the problem here. The
>> little missing detail is, that I zapped GET_CURRENT() assuming blindly
>> that this is only needed for the preempt_count hackery. But in fact
>> the world and some more depends on it which leads to interesting
>> explosions.

Thanks for debugging this - I won't speculate on why we attempt to 
handle softirqs explicitly, as I have only a passing acquaintance with 
this code, many years back.

>
> Some more thoughts on this.
>
> The whole nesting check in the exisiting low level entry code and what
> I tried to resemble with the irq_exit_nested() is pretty pointless.
>
> Let's look at auto_inthandler and ret_from_exception
>
> ENTRY(auto_inthandler)
> 	SAVE_ALL_INT
> 	GET_CURRENT(%d0)
> 	movel	%d0,%a1
> 	addqb	#1,%a1@(TINFO_PREEMPT+1)
> 					|  put exception # in d0
> 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
> 	subw	#VEC_SPUR,%d0
>
> 	movel	%sp,%sp@-
> 	movel	%d0,%sp@-		|  put vector # on stack
> auto_irqhandler_fixup = . + 2
> 	jsr	do_IRQ			|  process the IRQ
> 	addql	#8,%sp			|  pop parameters off stack
>
> ret_from_interrupt:
> 	movel	%curptr@(TASK_STACK),%a1
> 	subqb	#1,%a1@(TINFO_PREEMPT+1)
> 	jeq	ret_from_last_interrupt
> 2:	RESTORE_ALL
>
> 	ALIGN
> ret_from_last_interrupt:
> 	moveq	#(~ALLOWINT>>8)&0xff,%d0
> 	andb	%sp@(PT_OFF_SR),%d0
> 	jne	2b
>
> 	/* check if we need to do software interrupts */
> 	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
> 	jeq	.Lret_from_exception
> 	pea	ret_from_exception
> 	jra	do_softirq
>
>
> ENTRY(ret_from_exception)
> .Lret_from_exception:
> 	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
> 	bnes	1f			| if so, skip resched, signals
> 	....
> 1:	RESTORE_ALL
>
> So in every interrupt exit path we check:
>
>    1) Whether the hardirq part of preempt_count is zero
>
>    2) Whether the interrupt prio mask of SR on stack is zero
>
> and if we finally reach ret_from_exception we have the final check:
>
>    3) whether we return to kernel or user space.
>
> And this final check is the only one which matters, really.
>
> If you look at the probability of the first two checks catching
> anything, then it's pretty low. Most interrupts returns go through
> ret_from_exception. Yes, I added counters which prove that at least on
> the aranym, but I doubt that it will make a real difference if you run
> this on real hardware.

I'm happy to try that (indeed, verify your patch works on Atari 
hardware first) - I trust the patch is relative to m68k git, not your 
previous patches?

Cheers,


	Michael


>
> So what's the point of having these checks in the hotpath? The patch
> below against 3.12 vanilla works nicely on the aranym and I don't see
> a reason why this extra hackery is necessary at all. It's just code
> bloat in a hotpath.
>
> Now the only valid concern might be do_softirq itself, but that's
> pointless as well. If the softirq is interrupted, then we do not
> invoke it again. If the nested interrupt happens before irq_exit()
> actually disables interrupts, then we won't invoke it either as the
> hardirq part of preempt_count is still not zero.
>
> As a side note: The do_softirq calls in the platform/68xxx entry
> pathes are just copied leftovers as well. Both entry code pathes are
> not fiddling with the preempt count and both call do_IRQ() which will
> call irq_exit() at the end which will invoke do_softirq(), so the
> check for more softirqs in the irq return path is just pointless.
>
> Thanks,
>
> 	tglx
> ---
>  kernel/entry.S         |   40 ++++------------------------------------
>  kernel/ints.c          |    6 ------
>  platform/68000/entry.S |   33 ++++++++-------------------------
>  platform/68360/entry.S |   24 +++---------------------
>  4 files changed, 15 insertions(+), 88 deletions(-)
>
>
> Index: linux-2.6/arch/m68k/kernel/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/m68k/kernel/entry.S
> +++ linux-2.6/arch/m68k/kernel/entry.S
> @@ -45,7 +45,7 @@
>  .globl system_call, buserr, trap, resume
>  .globl sys_call_table
>  .globl __sys_fork, __sys_clone, __sys_vfork
> -.globl ret_from_interrupt, bad_interrupt
> +.globl bad_interrupt
>  .globl auto_irqhandler_fixup
>  .globl user_irqvec_fixup
>
> @@ -275,8 +275,6 @@ do_delayed_trace:
>  ENTRY(auto_inthandler)
>  	SAVE_ALL_INT
>  	GET_CURRENT(%d0)
> -	movel	%d0,%a1
> -	addqb	#1,%a1@(TINFO_PREEMPT+1)
>  					|  put exception # in d0
>  	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
>  	subw	#VEC_SPUR,%d0
> @@ -286,32 +284,13 @@ ENTRY(auto_inthandler)
>  auto_irqhandler_fixup = . + 2
>  	jsr	do_IRQ			|  process the IRQ
>  	addql	#8,%sp			|  pop parameters off stack
> -
> -ret_from_interrupt:
> -	movel	%curptr@(TASK_STACK),%a1
> -	subqb	#1,%a1@(TINFO_PREEMPT+1)
> -	jeq	ret_from_last_interrupt
> -2:	RESTORE_ALL
> -
> -	ALIGN
> -ret_from_last_interrupt:
> -	moveq	#(~ALLOWINT>>8)&0xff,%d0
> -	andb	%sp@(PT_OFF_SR),%d0
> -	jne	2b
> -
> -	/* check if we need to do software interrupts */
> -	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
> -	jeq	.Lret_from_exception
> -	pea	ret_from_exception
> -	jra	do_softirq
> +	jra	ret_from_exception
>
>  /* Handler for user defined interrupt vectors */
>
>  ENTRY(user_inthandler)
>  	SAVE_ALL_INT
>  	GET_CURRENT(%d0)
> -	movel	%d0,%a1
> -	addqb	#1,%a1@(TINFO_PREEMPT+1)
>  					|  put exception # in d0
>  	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
>  user_irqvec_fixup = . + 2
> @@ -321,29 +300,18 @@ user_irqvec_fixup = . + 2
>  	movel	%d0,%sp@-		|  put vector # on stack
>  	jsr	do_IRQ			|  process the IRQ
>  	addql	#8,%sp			|  pop parameters off stack
> -
> -	movel	%curptr@(TASK_STACK),%a1
> -	subqb	#1,%a1@(TINFO_PREEMPT+1)
> -	jeq	ret_from_last_interrupt
> -	RESTORE_ALL
> +	jra	ret_from_exception
>
>  /* Handler for uninitialized and spurious interrupts */
>
>  ENTRY(bad_inthandler)
>  	SAVE_ALL_INT
>  	GET_CURRENT(%d0)
> -	movel	%d0,%a1
> -	addqb	#1,%a1@(TINFO_PREEMPT+1)
>
>  	movel	%sp,%sp@-
>  	jsr	handle_badint
>  	addql	#4,%sp
> -
> -	movel	%curptr@(TASK_STACK),%a1
> -	subqb	#1,%a1@(TINFO_PREEMPT+1)
> -	jeq	ret_from_last_interrupt
> -	RESTORE_ALL
> -
> +	jra	ret_from_exception
>
>  resume:
>  	/*
> Index: linux-2.6/arch/m68k/kernel/ints.c
> ===================================================================
> --- linux-2.6.orig/arch/m68k/kernel/ints.c
> +++ linux-2.6/arch/m68k/kernel/ints.c
> @@ -58,12 +58,6 @@ void __init init_IRQ(void)
>  {
>  	int i;
>
> -	/* assembly irq entry code relies on this... */
> -	if (HARDIRQ_MASK != 0x00ff0000) {
> -		extern void hardirq_mask_is_broken(void);
> -		hardirq_mask_is_broken();
> -	}
> -
>  	for (i = IRQ_AUTO_1; i <= IRQ_AUTO_7; i++)
>  		irq_set_chip_and_handler(i, &auto_irq_chip, handle_simple_irq);
>
> Index: linux-2.6/arch/m68k/platform/68000/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/m68k/platform/68000/entry.S
> +++ linux-2.6/arch/m68k/platform/68000/entry.S
> @@ -27,7 +27,6 @@
>  .globl ret_from_exception
>  .globl ret_from_signal
>  .globl sys_call_table
> -.globl ret_from_interrupt
>  .globl bad_interrupt
>  .globl inthandler1
>  .globl inthandler2
> @@ -137,7 +136,7 @@ inthandler1:
>  	movel	#65,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler2:
>  	SAVE_ALL_INT
> @@ -148,7 +147,7 @@ inthandler2:
>  	movel	#66,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler3:
>  	SAVE_ALL_INT
> @@ -159,7 +158,7 @@ inthandler3:
>  	movel	#67,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler4:
>  	SAVE_ALL_INT
> @@ -170,7 +169,7 @@ inthandler4:
>  	movel	#68,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler5:
>  	SAVE_ALL_INT
> @@ -181,7 +180,7 @@ inthandler5:
>  	movel	#69,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler6:
>  	SAVE_ALL_INT
> @@ -192,7 +191,7 @@ inthandler6:
>  	movel	#70,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler7:
>  	SAVE_ALL_INT
> @@ -203,7 +202,7 @@ inthandler7:
>  	movel	#71,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> +	bra	ret_from_exception
>
>  inthandler:
>  	SAVE_ALL_INT
> @@ -214,23 +213,7 @@ inthandler:
>  	movel	%d0,%sp@- 		/*  put vector # on stack*/
>  	jbsr	process_int		/*  process the IRQ*/
>  3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> -
> -ret_from_interrupt:
> -	jeq	1f
> -2:
> -	RESTORE_ALL
> -1:
> -	moveb	%sp@(PT_OFF_SR), %d0
> -	and	#7, %d0
> -	jhi	2b
> -
> -	/* check if we need to do software interrupts */
> -	jeq	ret_from_exception
> -
> -	pea	ret_from_exception
> -	jra	do_softirq
> -
> +	bra	ret_from_exception
>
>  /*
>   * Handler for uninitialized and spurious interrupts.
> Index: linux-2.6/arch/m68k/platform/68360/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/m68k/platform/68360/entry.S
> +++ linux-2.6/arch/m68k/platform/68360/entry.S
> @@ -29,7 +29,6 @@
>  .globl ret_from_exception
>  .globl ret_from_signal
>  .globl sys_call_table
> -.globl ret_from_interrupt
>  .globl bad_interrupt
>  .globl inthandler
>
> @@ -132,26 +131,9 @@ inthandler:
>
>  	movel	%sp,%sp@-
>  	movel	%d0,%sp@- 		/*  put vector # on stack*/
> -	jbsr	do_IRQ			/*  process the IRQ*/
> -3:     	addql	#8,%sp			/*  pop parameters off stack*/
> -	bra	ret_from_interrupt
> -
> -ret_from_interrupt:
> -	jeq	1f
> -2:
> -	RESTORE_ALL
> -1:
> -	moveb	%sp@(PT_OFF_SR), %d0
> -	and	#7, %d0
> -	jhi	2b
> -	/* check if we need to do software interrupts */
> -
> -	movel	irq_stat+CPUSTAT_SOFTIRQ_PENDING,%d0
> -	jeq	ret_from_exception
> -
> -	pea	ret_from_exception
> -	jra	do_softirq
> -
> +	jbsr	do_IRQ			/*  process the IRQ */
> +	addql	#8,%sp			/*  pop parameters off stack*/
> +	jra	ret_from_exception
>
>  /*
>   * Handler for uninitialized and spurious interrupts.
>
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-m68k" 
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-12  6:56                                       ` Michael Schmitz
@ 2013-11-12  8:44                                         ` schmitz
  -1 siblings, 0 replies; 80+ messages in thread
From: schmitz @ 2013-11-12  8:44 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, Linux/m68k, Geert Uytterhoeven

Thomas,
>
>>
>> If you look at the probability of the first two checks catching
>> anything, then it's pretty low. Most interrupts returns go through
>> ret_from_exception. Yes, I added counters which prove that at least on
>> the aranym, but I doubt that it will make a real difference if you run
>> this on real hardware.
>
> I'm happy to try that (indeed, verify your patch works on Atari 
> hardware first) - I trust the patch is relative to m68k git, not your 
> previous patches?

Pleased to report the patch works, as expected, on the actual hardware. 
I doubt we still need to profile the interrupt return path, Geert?

Cheers,

    Michael


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
@ 2013-11-12  8:44                                         ` schmitz
  0 siblings, 0 replies; 80+ messages in thread
From: schmitz @ 2013-11-12  8:44 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, Linux/m68k, Geert Uytterhoeven

Thomas,
>
>>
>> If you look at the probability of the first two checks catching
>> anything, then it's pretty low. Most interrupts returns go through
>> ret_from_exception. Yes, I added counters which prove that at least on
>> the aranym, but I doubt that it will make a real difference if you run
>> this on real hardware.
>
> I'm happy to try that (indeed, verify your patch works on Atari 
> hardware first) - I trust the patch is relative to m68k git, not your 
> previous patches?

Pleased to report the patch works, as expected, on the actual hardware. 
I doubt we still need to profile the interrupt return path, Geert?

Cheers,

    Michael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 19:42                                 ` Andreas Schwab
@ 2013-11-12  9:18                                   ` Thomas Gleixner
  0 siblings, 0 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-12  9:18 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Geert Uytterhoeven, Michael Schmitz, LKML, Linux/m68k

On Mon, 11 Nov 2013, Andreas Schwab wrote:

> Thomas Gleixner <tglx@linutronix.de> writes:
> 
> > And I can see ever repeating
> >
> >   IRQ 13 flags 0x400 regs->sr 0x400
> >
> > with a few
> >
> >   IRQ 15 flags 0x400 regs->sr 0x400
> >
> > sprinkeled in.
> >
> > Not what you would expect, right?
> 
> If you configured for ATARI only, then ALLOWINT filters out the I1 bit,
> which makes irq level 4 and level 6 look the same (all MFP interrupts
> are at level 6).

Fair enough.




^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 19:34                                 ` Thomas Gleixner
  2013-11-11 20:52                                   ` Thomas Gleixner
@ 2013-11-12 14:09                                   ` Geert Uytterhoeven
  1 sibling, 0 replies; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-11-12 14:09 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Michael Schmitz, LKML, Linux/m68k

Hi Thomas,

On Mon, Nov 11, 2013 at 8:34 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> Finally found the issue. The patch below fixes the problem here. The
> little missing detail is, that I zapped GET_CURRENT() assuming blindly
> that this is only needed for the preempt_count hackery. But in fact
> the world and some more depends on it which leads to interesting
> explosions.

Yes, GET_CURRENT() sets up the current task in %a2.

Many thanks for tracking this down!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 1/6] hardirq: Make hardirq bits generic
  2013-11-11 20:52                                   ` Thomas Gleixner
  2013-11-12  6:56                                       ` Michael Schmitz
@ 2013-11-12 15:08                                     ` Geert Uytterhoeven
  2013-11-13 19:42                                     ` [tip:irq/urgent] m68k: Simplify low level interrupt handling code tip-bot for Thomas Gleixner
  2 siblings, 0 replies; 80+ messages in thread
From: Geert Uytterhoeven @ 2013-11-12 15:08 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Michael Schmitz, LKML, Linux/m68k

Hi Thomas,

On Mon, Nov 11, 2013 at 9:52 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> Some more thoughts on this.
>
> The whole nesting check in the exisiting low level entry code and what
> I tried to resemble with the irq_exit_nested() is pretty pointless.
>
> Let's look at auto_inthandler and ret_from_exception
>
> ENTRY(auto_inthandler)
>         SAVE_ALL_INT
>         GET_CURRENT(%d0)
>         movel   %d0,%a1
>         addqb   #1,%a1@(TINFO_PREEMPT+1)
>                                         |  put exception # in d0
>         bfextu  %sp@(PT_OFF_FORMATVEC){#4,#10},%d0
>         subw    #VEC_SPUR,%d0
>
>         movel   %sp,%sp@-
>         movel   %d0,%sp@-               |  put vector # on stack
> auto_irqhandler_fixup = . + 2
>         jsr     do_IRQ                  |  process the IRQ
>         addql   #8,%sp                  |  pop parameters off stack
>
> ret_from_interrupt:
>         movel   %curptr@(TASK_STACK),%a1
>         subqb   #1,%a1@(TINFO_PREEMPT+1)
>         jeq     ret_from_last_interrupt
> 2:      RESTORE_ALL
>
>         ALIGN
> ret_from_last_interrupt:
>         moveq   #(~ALLOWINT>>8)&0xff,%d0
>         andb    %sp@(PT_OFF_SR),%d0
>         jne     2b
>
>         /* check if we need to do software interrupts */
>         tstl    irq_stat+CPUSTAT_SOFTIRQ_PENDING
>         jeq     .Lret_from_exception
>         pea     ret_from_exception
>         jra     do_softirq
>
>
> ENTRY(ret_from_exception)
> .Lret_from_exception:
>         btst    #5,%sp@(PT_OFF_SR)      | check if returning to kernel
>         bnes    1f                      | if so, skip resched, signals
>         ....
> 1:      RESTORE_ALL
>
> So in every interrupt exit path we check:
>
>    1) Whether the hardirq part of preempt_count is zero
>
>    2) Whether the interrupt prio mask of SR on stack is zero
>
> and if we finally reach ret_from_exception we have the final check:
>
>    3) whether we return to kernel or user space.
>
> And this final check is the only one which matters, really.
>
> If you look at the probability of the first two checks catching
> anything, then it's pretty low. Most interrupts returns go through
> ret_from_exception. Yes, I added counters which prove that at least on
> the aranym, but I doubt that it will make a real difference if you run
> this on real hardware.
>
> So what's the point of having these checks in the hotpath? The patch

Most of this seems to be as old as stone-age. It was rewritten for
v2.5.29, but the initial bookkeeping was there in v2.1, and even in some
form in v1.3.94.

> below against 3.12 vanilla works nicely on the aranym and I don't see
> a reason why this extra hackery is necessary at all. It's just code
> bloat in a hotpath.
>
> Now the only valid concern might be do_softirq itself, but that's
> pointless as well. If the softirq is interrupted, then we do not
> invoke it again. If the nested interrupt happens before irq_exit()
> actually disables interrupts, then we won't invoke it either as the
> hardirq part of preempt_count is still not zero.
>
> As a side note: The do_softirq calls in the platform/68xxx entry
> pathes are just copied leftovers as well. Both entry code pathes are
> not fiddling with the preempt count and both call do_IRQ() which will
> call irq_exit() at the end which will invoke do_softirq(), so the
> check for more softirqs in the irq return path is just pointless.

Your reasoning sounds OK, and it works on ARAnyM, so, thanks again and
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>

BTW, do you plan to get the "make hardirq bits generic" series in 3.13?
Or do you want me to take this patch through the m68k tree?
So far I don't have any plans to send another pull request for 3.13.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [tip:irq/urgent] m68k: Simplify low level interrupt handling code
  2013-11-11 20:52                                   ` Thomas Gleixner
  2013-11-12  6:56                                       ` Michael Schmitz
  2013-11-12 15:08                                     ` Geert Uytterhoeven
@ 2013-11-13 19:42                                     ` tip-bot for Thomas Gleixner
  2 siblings, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-13 19:42 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, geert, schmitz, tglx, schwab, linux-m68k

Commit-ID:  09f90f6685cd88b6b904c141035d096169958cc4
Gitweb:     http://git.kernel.org/tip/09f90f6685cd88b6b904c141035d096169958cc4
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 11 Nov 2013 21:01:03 +0100
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 13 Nov 2013 20:21:46 +0100

m68k: Simplify low level interrupt handling code

The low level interrupt entry code of m68k contains the following:

    add_preempt_count(HARDIRQ_OFFSET);

    do_IRQ();
	irq_enter();
	    add_preempt_count(HARDIRQ_OFFSET);
	handle_interrupt();    
	irq_exit();    
	    sub_preempt_count(HARDIRQ_OFFSET);
	    if (in_interrupt())
       	       return; <---- On m68k always taken!
	    if (local_softirq_pending())
       	       do_softirq();

    sub_preempt_count(HARDIRQ_OFFSET);
    if (in_hardirq())
       return;
    if (status_on_stack_has_interrupt_priority_mask > 0)
       return;
    if (local_softirq_pending())
       do_softirq();

    ret_from_exception:
	if (interrupted_context_is_kernel)
	   return:
	....

I tried to find a proper explanation for this, but the changelog is
sparse and there are no mails explaining it further. But obviously
this relates to the interrupt priority levels of the m68k and tries to
be extra clever with nested interrupts. Though this cleverness just
adds code bloat to the interrupt hotpath.

For the common case of non nested interrupts the code runs through two
extra conditionals to the only important one, which checks whether the
return is to kernel or user space.

For the nested case the checks for in_hardirq() and the priority mask
value on stack catch only the case where the nested interrupt happens
inside the hard irq context of the first interrupt. If the nested
interrupt happens while the first interrupt handles soft interrupts,
then these extra checks buy nothing. The nested interrupt will fall
through to the final kernel/user space return check at
ret_from_exception.

Changing the code flow in the following way:

    do_IRQ();
	irq_enter();
	    add_preempt_count(HARDIRQ_OFFSET);
	handle_interrupt();    
	irq_exit();    
	    sub_preempt_count(HARDIRQ_OFFSET);
	    if (in_interrupt())
       	       return;
	    if (local_softirq_pending())
       	       do_softirq();

    ret_from_exception:
	if (interrupted_context_is_kernel)
	   return:

makes the region protected by the hardirq count slightly smaller and
the softirq handling is invoked with a minimal deeper stack. But
otherwise it's completely functional equivalent and saves 104 bytes of
text in arch/m68k/kernel/entry.o.

This modification allows us further to get rid of the limitations
which m68k puts on the preempt_count layout, so we can make the
preempt count bits completely generic.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Schmitz <schmitz@biophys.uni-duesseldorf.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Linux/m68k <linux-m68k@vger.kernel.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1311112052360.30673@ionos.tec.linutronix.de
---
 arch/m68k/kernel/entry.S         | 40 ++++------------------------------------
 arch/m68k/kernel/ints.c          |  6 ------
 arch/m68k/platform/68000/entry.S | 33 ++++++++-------------------------
 arch/m68k/platform/68360/entry.S | 24 +++---------------------
 4 files changed, 15 insertions(+), 88 deletions(-)

diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index a78f564..b54ac7a 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -45,7 +45,7 @@
 .globl system_call, buserr, trap, resume
 .globl sys_call_table
 .globl __sys_fork, __sys_clone, __sys_vfork
-.globl ret_from_interrupt, bad_interrupt
+.globl bad_interrupt
 .globl auto_irqhandler_fixup
 .globl user_irqvec_fixup
 
@@ -275,8 +275,6 @@ do_delayed_trace:
 ENTRY(auto_inthandler)
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 	subw	#VEC_SPUR,%d0
@@ -286,32 +284,13 @@ ENTRY(auto_inthandler)
 auto_irqhandler_fixup = . + 2
 	jsr	do_IRQ			|  process the IRQ
 	addql	#8,%sp			|  pop parameters off stack
-
-ret_from_interrupt:
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-2:	RESTORE_ALL
-
-	ALIGN
-ret_from_last_interrupt:
-	moveq	#(~ALLOWINT>>8)&0xff,%d0
-	andb	%sp@(PT_OFF_SR),%d0
-	jne	2b
-
-	/* check if we need to do software interrupts */
-	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
-	jeq	.Lret_from_exception
-	pea	ret_from_exception
-	jra	do_softirq
+	jra	ret_from_exception
 
 /* Handler for user defined interrupt vectors */
 
 ENTRY(user_inthandler)
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 					|  put exception # in d0
 	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 user_irqvec_fixup = . + 2
@@ -321,29 +300,18 @@ user_irqvec_fixup = . + 2
 	movel	%d0,%sp@-		|  put vector # on stack
 	jsr	do_IRQ			|  process the IRQ
 	addql	#8,%sp			|  pop parameters off stack
-
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
+	jra	ret_from_exception
 
 /* Handler for uninitialized and spurious interrupts */
 
 ENTRY(bad_inthandler)
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	movel	%d0,%a1
-	addqb	#1,%a1@(TINFO_PREEMPT+1)
 
 	movel	%sp,%sp@-
 	jsr	handle_badint
 	addql	#4,%sp
-
-	movel	%curptr@(TASK_STACK),%a1
-	subqb	#1,%a1@(TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
-
+	jra	ret_from_exception
 
 resume:
 	/*
diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 4d7da38..077d3a7 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -58,12 +58,6 @@ void __init init_IRQ(void)
 {
 	int i;
 
-	/* assembly irq entry code relies on this... */
-	if (HARDIRQ_MASK != 0x00ff0000) {
-		extern void hardirq_mask_is_broken(void);
-		hardirq_mask_is_broken();
-	}
-
 	for (i = IRQ_AUTO_1; i <= IRQ_AUTO_7; i++)
 		irq_set_chip_and_handler(i, &auto_irq_chip, handle_simple_irq);
 
diff --git a/arch/m68k/platform/68000/entry.S b/arch/m68k/platform/68000/entry.S
index 7f91c2f..23ac054 100644
--- a/arch/m68k/platform/68000/entry.S
+++ b/arch/m68k/platform/68000/entry.S
@@ -27,7 +27,6 @@
 .globl ret_from_exception
 .globl ret_from_signal
 .globl sys_call_table
-.globl ret_from_interrupt
 .globl bad_interrupt
 .globl inthandler1
 .globl inthandler2
@@ -137,7 +136,7 @@ inthandler1:
 	movel	#65,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler2:
 	SAVE_ALL_INT
@@ -148,7 +147,7 @@ inthandler2:
 	movel	#66,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler3:
 	SAVE_ALL_INT
@@ -159,7 +158,7 @@ inthandler3:
 	movel	#67,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler4:
 	SAVE_ALL_INT
@@ -170,7 +169,7 @@ inthandler4:
 	movel	#68,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler5:
 	SAVE_ALL_INT
@@ -181,7 +180,7 @@ inthandler5:
 	movel	#69,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler6:
 	SAVE_ALL_INT
@@ -192,7 +191,7 @@ inthandler6:
 	movel	#70,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler7:
 	SAVE_ALL_INT
@@ -203,7 +202,7 @@ inthandler7:
 	movel	#71,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
+	bra	ret_from_exception
 
 inthandler:
 	SAVE_ALL_INT
@@ -214,23 +213,7 @@ inthandler:
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
 	jbsr	process_int		/*  process the IRQ*/
 3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
-
-ret_from_interrupt:
-	jeq	1f
-2:
-	RESTORE_ALL
-1:
-	moveb	%sp@(PT_OFF_SR), %d0
-	and	#7, %d0
-	jhi	2b
-
-	/* check if we need to do software interrupts */
-	jeq	ret_from_exception
-
-	pea	ret_from_exception
-	jra	do_softirq
-
+	bra	ret_from_exception
 
 /*
  * Handler for uninitialized and spurious interrupts.
diff --git a/arch/m68k/platform/68360/entry.S b/arch/m68k/platform/68360/entry.S
index 904fd9a..447c33e 100644
--- a/arch/m68k/platform/68360/entry.S
+++ b/arch/m68k/platform/68360/entry.S
@@ -29,7 +29,6 @@
 .globl ret_from_exception
 .globl ret_from_signal
 .globl sys_call_table
-.globl ret_from_interrupt
 .globl bad_interrupt
 .globl inthandler
 
@@ -132,26 +131,9 @@ inthandler:
 
 	movel	%sp,%sp@-
 	movel	%d0,%sp@- 		/*  put vector # on stack*/
-	jbsr	do_IRQ			/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_interrupt
-
-ret_from_interrupt:
-	jeq	1f
-2:
-	RESTORE_ALL
-1:
-	moveb	%sp@(PT_OFF_SR), %d0
-	and	#7, %d0
-	jhi	2b
-	/* check if we need to do software interrupts */
-
-	movel	irq_stat+CPUSTAT_SOFTIRQ_PENDING,%d0
-	jeq	ret_from_exception
-
-	pea	ret_from_exception
-	jra	do_softirq
-
+	jbsr	do_IRQ			/*  process the IRQ */
+	addql	#8,%sp			/*  pop parameters off stack*/
+	jra	ret_from_exception
 
 /*
  * Handler for uninitialized and spurious interrupts.

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [tip:irq/urgent] hardirq: Make hardirq bits generic
  2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
  2013-09-17 20:00     ` Geert Uytterhoeven
@ 2013-11-13 19:42     ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-13 19:42 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, peterz, tglx

Commit-ID:  54197e43a4a9a0f3fc406d72d9815754e84fab1e
Gitweb:     http://git.kernel.org/tip/54197e43a4a9a0f3fc406d72d9815754e84fab1e
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 17 Sep 2013 18:53:05 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 13 Nov 2013 20:21:46 +0100

hardirq: Make hardirq bits generic

There is no reason for per arch hardirq bits. Make them all generic

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130917183628.534494408@linutronix.de
---
 arch/blackfin/include/asm/hardirq.h |  3 ---
 arch/cris/include/asm/hardirq.h     | 12 ------------
 arch/m32r/include/asm/hardirq.h     | 16 ----------------
 arch/m68k/include/asm/hardirq.h     | 11 -----------
 arch/s390/include/asm/hardirq.h     |  2 --
 arch/sparc/include/asm/hardirq_32.h |  1 -
 arch/sparc/include/asm/hardirq_64.h |  2 --
 arch/tile/include/asm/hardirq.h     |  2 --
 include/linux/preempt_mask.h        | 30 ++++++++----------------------
 9 files changed, 8 insertions(+), 71 deletions(-)

diff --git a/arch/blackfin/include/asm/hardirq.h b/arch/blackfin/include/asm/hardirq.h
index c078dd7..58b54a6 100644
--- a/arch/blackfin/include/asm/hardirq.h
+++ b/arch/blackfin/include/asm/hardirq.h
@@ -12,9 +12,6 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
-/* Define until common code gets sane defaults */
-#define HARDIRQ_BITS 9
-
 #include <asm-generic/hardirq.h>
 
 #endif
diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h
index 17bb12d..04126f7 100644
--- a/arch/cris/include/asm/hardirq.h
+++ b/arch/cris/include/asm/hardirq.h
@@ -2,18 +2,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
diff --git a/arch/m32r/include/asm/hardirq.h b/arch/m32r/include/asm/hardirq.h
index 4c31c0a..5f2ac4f 100644
--- a/arch/m32r/include/asm/hardirq.h
+++ b/arch/m32r/include/asm/hardirq.h
@@ -3,22 +3,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-
-#if NR_IRQS > 256
-#define HARDIRQ_BITS	9
-#else
-#define HARDIRQ_BITS	8
-#endif
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
diff --git a/arch/m68k/include/asm/hardirq.h b/arch/m68k/include/asm/hardirq.h
index db30ed2..6c61852 100644
--- a/arch/m68k/include/asm/hardirq.h
+++ b/arch/m68k/include/asm/hardirq.h
@@ -5,17 +5,6 @@
 #include <linux/cache.h>
 #include <asm/irq.h>
 
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #ifdef CONFIG_MMU
 
 static inline void ack_bad_irq(unsigned int irq)
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index a908d29..b7eabaa 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -18,8 +18,6 @@
 #define __ARCH_HAS_DO_SOFTIRQ
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED
 
-#define HARDIRQ_BITS	8
-
 static inline void ack_bad_irq(unsigned int irq)
 {
 	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
diff --git a/arch/sparc/include/asm/hardirq_32.h b/arch/sparc/include/asm/hardirq_32.h
index 1620076..ee93923 100644
--- a/arch/sparc/include/asm/hardirq_32.h
+++ b/arch/sparc/include/asm/hardirq_32.h
@@ -7,7 +7,6 @@
 #ifndef __SPARC_HARDIRQ_H
 #define __SPARC_HARDIRQ_H
 
-#define HARDIRQ_BITS    8
 #include <asm-generic/hardirq.h>
 
 #endif /* __SPARC_HARDIRQ_H */
diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h
index 7c29fd1..f478ff1 100644
--- a/arch/sparc/include/asm/hardirq_64.h
+++ b/arch/sparc/include/asm/hardirq_64.h
@@ -14,6 +14,4 @@
 
 void ack_bad_irq(unsigned int irq);
 
-#define HARDIRQ_BITS	8
-
 #endif /* !(__SPARC64_HARDIRQ_H) */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
index 822390f..54110af 100644
--- a/arch/tile/include/asm/hardirq.h
+++ b/arch/tile/include/asm/hardirq.h
@@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
-#define HARDIRQ_BITS	8
-
 #endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index 931bc61..810d7e3 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -11,36 +11,22 @@
  * - bits 0-7 are the preemption count (max preemption depth: 256)
  * - bits 8-15 are the softirq count (max # of softirqs: 256)
  *
- * The hardirq count can in theory reach the same as NR_IRQS.
- * In reality, the number of nested IRQS is limited to the stack
- * size as well. For archs with over 1000 IRQS it is not practical
- * to expect that they will all nest. We give a max of 10 bits for
- * hardirq nesting. An arch may choose to give less than 10 bits.
- * m68k expects it to be 8.
- *
- * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
- * - bit 26 is the NMI_MASK
- * - bit 27 is the PREEMPT_ACTIVE flag
+ * The hardirq count could in theory be the same as the number of
+ * interrupts in the system, but we run all interrupt handlers with
+ * interrupts disabled, so we cannot have nesting interrupts. Though
+ * there are a few palaeontologic drivers which reenable interrupts in
+ * the handler, so we need more than one bit here.
  *
  * PREEMPT_MASK: 0x000000ff
  * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x03ff0000
- *     NMI_MASK: 0x04000000
+ * HARDIRQ_MASK: 0x000f0000
+ *     NMI_MASK: 0x00100000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
+#define HARDIRQ_BITS	4
 #define NMI_BITS	1
 
-#define MAX_HARDIRQ_BITS 10
-
-#ifndef HARDIRQ_BITS
-# define HARDIRQ_BITS	MAX_HARDIRQ_BITS
-#endif
-
-#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
-#error HARDIRQ_BITS too high!
-#endif
-
 #define PREEMPT_SHIFT	0
 #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
 #define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [tip:irq/urgent] m32r: Use preempt_schedule_irq
  2013-09-17 18:53   ` [patch 3/6] m32r: Use preempt_schedule_irq Thomas Gleixner
@ 2013-11-13 19:42     ` tip-bot for Thomas Gleixner
  0 siblings, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-13 19:42 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, takata, tglx

Commit-ID:  650e4dc2a75959fdca1eecc0147bbb21dbfadf0f
Gitweb:     http://git.kernel.org/tip/650e4dc2a75959fdca1eecc0147bbb21dbfadf0f
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 17 Sep 2013 18:53:07 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 13 Nov 2013 20:21:46 +0100

m32r: Use preempt_schedule_irq

Use the proper core function instead of fiddling with preempt_active
and interrupt enable in the low level code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: linux-m32r-ja@ml.linux-m32r.org
Link: http://lkml.kernel.org/r/20130917183628.758421136@linutronix.de
---
 arch/m32r/kernel/entry.S | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S
index 0c01543..7c3db99 100644
--- a/arch/m32r/kernel/entry.S
+++ b/arch/m32r/kernel/entry.S
@@ -182,13 +182,7 @@ need_resched:
 	ld	r4, PSW(sp)		; interrupts off (exception path) ?
 	and3	r4, r4, #0x4000
 	beqz	r4, restore_all
-	LDIMM	(r4, PREEMPT_ACTIVE)
-	st	r4, @(TI_PRE_COUNT, r8)
-	ENABLE_INTERRUPTS(r4)
-	bl	schedule
-	ldi	r4, #0
-	st	r4, @(TI_PRE_COUNT, r8)
-	DISABLE_INTERRUPTS(r4)
+	bl	preempt_schedule_irq
 	bra	need_resched
 #endif
 

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [tip:irq/urgent] ia64: Use preempt_schedule_irq
  2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
@ 2013-11-13 19:43     ` tip-bot for Thomas Gleixner
  2013-11-20 19:59     ` [patch 4/6] " Tony Luck
  1 sibling, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-13 19:43 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, fenghua.yu, tony.luck, tglx

Commit-ID:  aa0d53260596c1bef23547537724d4bad78e6f52
Gitweb:     http://git.kernel.org/tip/aa0d53260596c1bef23547537724d4bad78e6f52
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 17 Sep 2013 18:53:08 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 13 Nov 2013 20:21:47 +0100

ia64: Use preempt_schedule_irq

Use the proper core function instead of fiddling with PREEMPT_ACTIVE
and enable/disable interrupts in the low level code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org
Link: http://lkml.kernel.org/r/20130917183628.857145384@linutronix.de
---
 arch/ia64/kernel/entry.S | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 7a53530..ddea607f 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1169,21 +1169,8 @@ skip_rbs_switch:
 .work_pending:
 	tbit.z p6,p0=r31,TIF_NEED_RESCHED	// is resched not needed?
 (p6)	br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
-	;;
-(pKStk) st4 [r20]=r21
-#endif
-	SSM_PSR_I(p0, p6, r2)	// enable interrupts
-	br.call.spnt.many rp=schedule
+	br.call.spnt.many rp=preempt_schedule_irq
 .ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
-	RSM_PSR_I(p0, r2, r20)	// disable interrupts
-	;;
-#ifdef CONFIG_PREEMPT
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
-#endif
 (pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel
 

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [tip:irq/urgent] sparc: Use preempt_schedule_irq
  2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
  2013-09-17 22:54     ` David Miller
@ 2013-11-13 19:43     ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-13 19:43 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, tglx, davem

Commit-ID:  9385d949d5bd0eb642ed05ea263c3638c9f4e372
Gitweb:     http://git.kernel.org/tip/9385d949d5bd0eb642ed05ea263c3638c9f4e372
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 17 Sep 2013 18:53:08 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 13 Nov 2013 20:21:47 +0100

sparc: Use preempt_schedule_irq

The low level preemption code fiddles with the PREEMPT_ACTIVE bit for
no reason and calls schedule() with interrupts disabled, which is
wrong to begin with. Remove the PREEMPT_ACTIVE fiddling and call the
proper schedule_preempt_irq() function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20130917183628.966769884@linutronix.de

---
 arch/sparc/kernel/rtrap_64.S | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index afa2a9e..76213db 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -306,12 +306,10 @@ to_kernel:
 		 nop
 		cmp			%l4, 0
 		bne,pn			%xcc, kern_fpucheck
-		 sethi			%hi(PREEMPT_ACTIVE), %l6
-		stw			%l6, [%g6 + TI_PRE_COUNT]
-		call			schedule
+		 nop
+		call			preempt_schedule_irq
 		 nop
 		ba,pt			%xcc, rtrap
-		 stw			%g0, [%g6 + TI_PRE_COUNT]
 #endif
 kern_fpucheck:	ldub			[%g6 + TI_FPDEPTH], %l5
 		brz,pt			%l5, rt_continue

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [tip:irq/urgent] preempt: Make PREEMPT_ACTIVE generic
  2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
  2013-09-18 10:48     ` Peter Zijlstra
@ 2013-11-13 19:43     ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-13 19:43 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, peterz, tglx

Commit-ID:  00d1a39e69d5afa7523dad515a05b21abd17c389
Gitweb:     http://git.kernel.org/tip/00d1a39e69d5afa7523dad515a05b21abd17c389
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 17 Sep 2013 18:53:09 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 13 Nov 2013 20:21:47 +0100

preempt: Make PREEMPT_ACTIVE generic

No point in having this bit defined by architecture.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130917183629.090698799@linutronix.de
---
 arch/alpha/include/asm/thread_info.h      |  2 --
 arch/arc/include/asm/thread_info.h        |  2 --
 arch/arm/include/asm/thread_info.h        |  6 ------
 arch/arm64/include/asm/thread_info.h      |  6 ------
 arch/avr32/include/asm/thread_info.h      |  2 --
 arch/blackfin/include/asm/thread_info.h   |  2 --
 arch/c6x/include/asm/thread_info.h        |  2 --
 arch/cris/include/asm/thread_info.h       |  2 --
 arch/frv/include/asm/thread_info.h        |  2 --
 arch/hexagon/include/asm/thread_info.h    |  4 ----
 arch/ia64/include/asm/thread_info.h       |  3 ---
 arch/m32r/include/asm/thread_info.h       |  2 --
 arch/m68k/include/asm/thread_info.h       |  2 --
 arch/metag/include/asm/thread_info.h      |  2 --
 arch/microblaze/include/asm/thread_info.h |  2 --
 arch/mips/include/asm/thread_info.h       |  2 --
 arch/mn10300/include/asm/thread_info.h    |  2 --
 arch/parisc/include/asm/thread_info.h     |  3 ---
 arch/powerpc/include/asm/thread_info.h    |  2 --
 arch/s390/include/asm/thread_info.h       |  2 --
 arch/score/include/asm/thread_info.h      |  2 --
 arch/sh/include/asm/thread_info.h         |  2 --
 arch/sh/kernel/entry-common.S             |  6 ++----
 arch/sparc/include/asm/thread_info_32.h   |  2 --
 arch/sparc/include/asm/thread_info_64.h   |  2 --
 arch/tile/include/asm/thread_info.h       |  2 --
 arch/um/include/asm/thread_info.h         |  2 --
 arch/unicore32/include/asm/thread_info.h  |  6 ------
 arch/x86/include/asm/thread_info.h        |  2 --
 arch/xtensa/include/asm/thread_info.h     |  2 --
 include/linux/preempt_mask.h              | 15 +++++----------
 include/linux/sched.h                     |  2 +-
 32 files changed, 8 insertions(+), 89 deletions(-)

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 52cd2a4..453597b 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -58,8 +58,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define THREAD_SIZE_ORDER 1
 #define THREAD_SIZE (2*PAGE_SIZE)
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags:
  * - these are process state flags and used from assembly
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 2d50a4c..45be216 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -80,8 +80,6 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE      0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index df5e13d..71a06b2 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -141,12 +141,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #endif
 
 /*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
-/*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SYSCAL_AUDIT	- syscall auditing active
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 23a3c47..720e70b 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -89,12 +89,6 @@ static inline struct thread_info *current_thread_info(void)
 #endif
 
 /*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
-/*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SIGPENDING	- signal pending
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 6dc62e1..a978f3f 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -66,8 +66,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags
  * - these are process state flags that various assembly files may need to access
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 3894005..55f473b 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -88,8 +88,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TI_CPU		12
 #define TI_PREEMPT	16
 
-#define	PREEMPT_ACTIVE	0x4000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h
index 4c8dc56..d4e9ef8 100644
--- a/arch/c6x/include/asm/thread_info.h
+++ b/arch/c6x/include/asm/thread_info.h
@@ -84,8 +84,6 @@ struct thread_info *current_thread_info(void)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
 #endif /* __ASSEMBLY__ */
 
-#define	PREEMPT_ACTIVE	0x10000000
-
 /*
  * thread information flag bit numbers
  * - pending work-to-be-done flags are in LSW
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 07c8c40..55dede1 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -44,8 +44,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bebd7ea..af29e17 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -52,8 +52,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h
index f7c3240..a59dad3 100644
--- a/arch/hexagon/include/asm/thread_info.h
+++ b/arch/hexagon/include/asm/thread_info.h
@@ -73,10 +73,6 @@ struct thread_info {
 
 #endif  /* __ASSEMBLY__  */
 
-/*  looks like "linux/hardirq.h" uses this.  */
-
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifndef __ASSEMBLY__
 
 #define INIT_THREAD_INFO(tsk)                   \
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index cade13d..5957cf6 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -11,9 +11,6 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 
-#define PREEMPT_ACTIVE_BIT 30
-#define PREEMPT_ACTIVE	(1 << PREEMPT_ACTIVE_BIT)
-
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index c074f4c..0017170 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -53,8 +53,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define THREAD_SIZE		(PAGE_SIZE << 1)
 #define THREAD_SIZE_ORDER	1
 /*
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 126131f..21a4784 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -35,8 +35,6 @@ struct thread_info {
 };
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
index 7c4a330..b19e9c5 100644
--- a/arch/metag/include/asm/thread_info.h
+++ b/arch/metag/include/asm/thread_info.h
@@ -46,8 +46,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SHIFT		12
 #else
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index de26ea6..8c9d365 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -106,8 +106,6 @@ static inline struct thread_info *current_thread_info(void)
 /* thread information allocation */
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index f9b24bf..4f58ef6 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -92,8 +92,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #define STACK_WARN	(THREAD_SIZE / 8)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 224b426..bf280ea 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -16,8 +16,6 @@
 
 #include <asm/page.h>
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SIZE		(4096)
 #define THREAD_SIZE_ORDER	(0)
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index bc7cf12..d5f97ea 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -46,9 +46,6 @@ struct thread_info {
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
 
-#define PREEMPT_ACTIVE_BIT	28
-#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
-
 /*
  * thread information flags
  */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index ba7b197..8fd6cf6 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -82,8 +82,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index eb5f64d..10e0fcd 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -111,6 +111,4 @@ static inline struct thread_info *current_thread_info(void)
 #define is_32bit_task()		(1)
 #endif
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 1425cc0..656b7ad 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -72,8 +72,6 @@ register struct thread_info *__current_thread_info __asm__("r28");
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 45a9366..ad27ffa 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -41,8 +41,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #if defined(CONFIG_4KSTACKS)
 #define THREAD_SHIFT	12
 #else
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index 9b6e4be..ca46834 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -108,7 +108,7 @@ need_resched:
 	and	#(0xf0>>1), r0		! interrupts off (exception path)?
 	cmp/eq	#(0xf0>>1), r0
 	bt	noresched
-	mov.l	3f, r0
+	mov.l	1f, r0
 	jsr	@r0			! call preempt_schedule_irq
 	 nop
 	bra	need_resched
@@ -119,9 +119,7 @@ noresched:
 	 nop
 
 	.align 2
-1:	.long	PREEMPT_ACTIVE
-2:	.long	schedule
-3:	.long	preempt_schedule_irq
+1:	.long	preempt_schedule_irq
 #endif
 
 ENTRY(resume_userspace)
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index dd38075..96efa7a 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -105,8 +105,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TI_W_SAVED	0x250
 /* #define TI_RESTART_BLOCK 0x25n */ /* Nobody cares */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index d5e5042..2b4e17b 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -111,8 +111,6 @@ struct thread_info {
 #define THREAD_SHIFT PAGE_SHIFT
 #endif /* PAGE_SHIFT == 13 */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index b8aa6df..729aa10 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -113,8 +113,6 @@ extern void _cpu_idle(void);
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * Thread information flags that various assembly files may need to access.
  * Keep flags accessed frequently in low bits, particular since it makes
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 2c8eeb2..1c5b2a8 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -60,8 +60,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
index 818b4a1..af36d8e 100644
--- a/arch/unicore32/include/asm/thread_info.h
+++ b/arch/unicore32/include/asm/thread_info.h
@@ -118,12 +118,6 @@ static inline struct thread_info *current_thread_info(void)
 #endif
 
 /*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
-/*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SIGPENDING	- signal pending
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c46a46b..3ba3de4 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -153,8 +153,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_X86_32
 
 #define STACK_WARN	(THREAD_SIZE/8)
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 9481004..470153e 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -76,8 +76,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index 810d7e3..d169820 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -17,10 +17,11 @@
  * there are a few palaeontologic drivers which reenable interrupts in
  * the handler, so we need more than one bit here.
  *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x000f0000
- *     NMI_MASK: 0x00100000
+ * PREEMPT_MASK:	0x000000ff
+ * SOFTIRQ_MASK:	0x0000ff00
+ * HARDIRQ_MASK:	0x000f0000
+ *     NMI_MASK:	0x00100000
+ * PREEMPT_ACTIVE:	0x00200000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
@@ -46,15 +47,9 @@
 
 #define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
 
-#ifndef PREEMPT_ACTIVE
 #define PREEMPT_ACTIVE_BITS	1
 #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
 #define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
 
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 045b0d2..55080df 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt.h>
+#include <linux/preempt_mask.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
  2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
@ 2013-11-20 19:59     ` Tony Luck
  2013-11-20 20:57       ` Thomas Gleixner
  1 sibling, 1 reply; 80+ messages in thread
From: Tony Luck @ 2013-11-20 19:59 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Tue, Sep 17, 2013 at 11:53 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> Use the proper core function instead of fiddling with PREEMPT_ACTIVE
> and enable/disable interrupts in the low level code.
>
> Index: linux-2.6/arch/ia64/kernel/entry.S
> ===================================================================
> --- linux-2.6.orig/arch/ia64/kernel/entry.S
> +++ linux-2.6/arch/ia64/kernel/entry.S
...
> -       br.call.spnt.many rp=schedule
> +       br.call.spnt.many rp=preempt_schedule_irq

This just hit mainline ... and I'm getting

arch/ia64/kernel/built-in.o: In function `skip_rbs_switch':
(.text+0xef2): undefined reference to `preempt_schedule_irq'
make: *** [vmlinux] Error 1

Probably because I have CONFIG_PREEMPT=n, and there seems only
to be a definition for this function in kernel/sched/core.c when it is =y.

Do we need a stub for the =n case:

asmlinkage void __sched preempt_schedule_irq(void)
{
           schedule();
}

Or is life more complicated than that?

-Tony

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-20 19:59     ` [patch 4/6] " Tony Luck
@ 2013-11-20 20:57       ` Thomas Gleixner
  2013-11-21 11:41         ` Thomas Gleixner
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-20 20:57 UTC (permalink / raw)
  To: Tony Luck
  Cc: LKML, Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu



On Wed, 20 Nov 2013, Tony Luck wrote:

> On Tue, Sep 17, 2013 at 11:53 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > Use the proper core function instead of fiddling with PREEMPT_ACTIVE
> > and enable/disable interrupts in the low level code.
> >
> > Index: linux-2.6/arch/ia64/kernel/entry.S
> > ===================================================================
> > --- linux-2.6.orig/arch/ia64/kernel/entry.S
> > +++ linux-2.6/arch/ia64/kernel/entry.S
> ...
> > -       br.call.spnt.many rp=schedule
> > +       br.call.spnt.many rp=preempt_schedule_irq
> 
> This just hit mainline ... and I'm getting
> 
> arch/ia64/kernel/built-in.o: In function `skip_rbs_switch':
> (.text+0xef2): undefined reference to `preempt_schedule_irq'
> make: *** [vmlinux] Error 1

Hrmpf.
 
> Probably because I have CONFIG_PREEMPT=n, and there seems only
> to be a definition for this function in kernel/sched/core.c when it is =y.

Bah. I probably compiled that with preempt=y ...
 
> Do we need a stub for the =n case:
> 
> asmlinkage void __sched preempt_schedule_irq(void)
> {
>            schedule();
> }
> 
> Or is life more complicated than that?

Hmm, I think I fubared that and you decided to ignore my patch :) 

Let me look at it tomorrow morning with full awake brain cells.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-20 20:57       ` Thomas Gleixner
@ 2013-11-21 11:41         ` Thomas Gleixner
  2013-11-21 12:39           ` Frederic Weisbecker
                             ` (2 more replies)
  0 siblings, 3 replies; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-21 11:41 UTC (permalink / raw)
  To: Tony Luck
  Cc: LKML, Peter Zijlstra, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Wed, 20 Nov 2013, Thomas Gleixner wrote:
> On Wed, 20 Nov 2013, Tony Luck wrote:
> > asmlinkage void __sched preempt_schedule_irq(void)
> > {
> >            schedule();
> > }
> > 
> > Or is life more complicated than that?
> 
> Hmm, I think I fubared that and you decided to ignore my patch :) 
> 
> Let me look at it tomorrow morning with full awake brain cells.

Ok, wrapped my brain around it. I tripped over the magic asm foo which
has a single need_resched check and schedule point for both sys call
return and interrupt return.

So you need the schedule_preempt_irq() for kernel preemption from
interrupt return while on a normal syscall preemption a schedule would
be sufficient. But using schedule_preempt_irq() is not harmful here in
any way. It just sets the preempt_active bit also in cases where it
would not be required. 

Even on preempt=n kernels adding the preempt_active bit is completely
harmless. So instead of having an extra function, moving the existing
one out of the ifdef PREEMPT looks like the sanest thing to do.

Peter, Ingo ?

Thanks,

	tglx
------
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c180860..0c59642 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2660,6 +2660,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
 	} while (need_resched());
 }
 EXPORT_SYMBOL(preempt_schedule);
+#endif /* CONFIG_PREEMPT */
 
 /*
  * this is the entry point to schedule() from kernel preemption
@@ -2693,8 +2694,6 @@ asmlinkage void __sched preempt_schedule_irq(void)
 	exception_exit(prev_state);
 }
 
-#endif /* CONFIG_PREEMPT */
-
 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {





^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-21 11:41         ` Thomas Gleixner
@ 2013-11-21 12:39           ` Frederic Weisbecker
  2013-11-21 13:06           ` Peter Zijlstra
  2013-11-27 14:07           ` [tip:sched/urgent] sched: Expose preempt_schedule_irq() tip-bot for Thomas Gleixner
  2 siblings, 0 replies; 80+ messages in thread
From: Frederic Weisbecker @ 2013-11-21 12:39 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Tony Luck, LKML, Peter Zijlstra, Ingo Molnar, linux-arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Fenghua Yu

On Thu, Nov 21, 2013 at 12:41:44PM +0100, Thomas Gleixner wrote:
> On Wed, 20 Nov 2013, Thomas Gleixner wrote:
> > On Wed, 20 Nov 2013, Tony Luck wrote:
> > > asmlinkage void __sched preempt_schedule_irq(void)
> > > {
> > >            schedule();
> > > }
> > > 
> > > Or is life more complicated than that?
> > 
> > Hmm, I think I fubared that and you decided to ignore my patch :) 
> > 
> > Let me look at it tomorrow morning with full awake brain cells.
> 
> Ok, wrapped my brain around it. I tripped over the magic asm foo which
> has a single need_resched check and schedule point for both sys call
> return and interrupt return.
> 
> So you need the schedule_preempt_irq() for kernel preemption from
> interrupt return while on a normal syscall preemption a schedule would
> be sufficient. But using schedule_preempt_irq() is not harmful here in
> any way. It just sets the preempt_active bit also in cases where it
> would not be required. 
> 
> Even on preempt=n kernels adding the preempt_active bit is completely
> harmless. So instead of having an extra function, moving the existing
> one out of the ifdef PREEMPT looks like the sanest thing to do.
> 
> Peter, Ingo ?

So that's because the schedule point is on a common user and kernel return path?

If necessary, why not having a SCHEDULE_IRQ macro in ia64 that maps to either schedule or
preempt_schedule_irq() instead?

Unless that problem happens elsewhere as well?

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-21 11:41         ` Thomas Gleixner
  2013-11-21 12:39           ` Frederic Weisbecker
@ 2013-11-21 13:06           ` Peter Zijlstra
  2013-11-21 13:30             ` Thomas Gleixner
  2013-11-27 14:07           ` [tip:sched/urgent] sched: Expose preempt_schedule_irq() tip-bot for Thomas Gleixner
  2 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-11-21 13:06 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Tony Luck, LKML, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Thu, Nov 21, 2013 at 12:41:44PM +0100, Thomas Gleixner wrote:
> On Wed, 20 Nov 2013, Thomas Gleixner wrote:
> > On Wed, 20 Nov 2013, Tony Luck wrote:
> > > asmlinkage void __sched preempt_schedule_irq(void)
> > > {
> > >            schedule();
> > > }
> > > 
> > > Or is life more complicated than that?
> > 
> > Hmm, I think I fubared that and you decided to ignore my patch :) 
> > 
> > Let me look at it tomorrow morning with full awake brain cells.
> 
> Ok, wrapped my brain around it. I tripped over the magic asm foo which
> has a single need_resched check and schedule point for both sys call
> return and interrupt return.
> 
> So you need the schedule_preempt_irq() for kernel preemption from
> interrupt return while on a normal syscall preemption a schedule would
> be sufficient. But using schedule_preempt_irq() is not harmful here in
> any way. It just sets the preempt_active bit also in cases where it
> would not be required. 
> 
> Even on preempt=n kernels adding the preempt_active bit is completely
> harmless. So instead of having an extra function, moving the existing
> one out of the ifdef PREEMPT looks like the sanest thing to do.
> 
> Peter, Ingo ?

Uhm, preempt_schedule_irq() assumes interrupts are disabled and
explicitly returns with interrupts disabled again. Does the ia64
callsite conform?

If so, schedule() would actually be actively wrong, because that will
whinge when called with interrupts disabled, and will return with
interrupts enabled.

Anyway, I don't object to the patch per se, but it might bloat a few
!ia64 kernels for having to carry the extra text.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-21 13:06           ` Peter Zijlstra
@ 2013-11-21 13:30             ` Thomas Gleixner
  2013-11-21 18:57               ` Tony Luck
  0 siblings, 1 reply; 80+ messages in thread
From: Thomas Gleixner @ 2013-11-21 13:30 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tony Luck, LKML, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Thu, 21 Nov 2013, Peter Zijlstra wrote:

> On Thu, Nov 21, 2013 at 12:41:44PM +0100, Thomas Gleixner wrote:
> > On Wed, 20 Nov 2013, Thomas Gleixner wrote:
> > > On Wed, 20 Nov 2013, Tony Luck wrote:
> > > > asmlinkage void __sched preempt_schedule_irq(void)
> > > > {
> > > >            schedule();
> > > > }
> > > > 
> > > > Or is life more complicated than that?
> > > 
> > > Hmm, I think I fubared that and you decided to ignore my patch :) 
> > > 
> > > Let me look at it tomorrow morning with full awake brain cells.
> > 
> > Ok, wrapped my brain around it. I tripped over the magic asm foo which
> > has a single need_resched check and schedule point for both sys call
> > return and interrupt return.
> > 
> > So you need the schedule_preempt_irq() for kernel preemption from
> > interrupt return while on a normal syscall preemption a schedule would
> > be sufficient. But using schedule_preempt_irq() is not harmful here in
> > any way. It just sets the preempt_active bit also in cases where it
> > would not be required. 
> > 
> > Even on preempt=n kernels adding the preempt_active bit is completely
> > harmless. So instead of having an extra function, moving the existing
> > one out of the ifdef PREEMPT looks like the sanest thing to do.
> > 
> > Peter, Ingo ?
> 
> Uhm, preempt_schedule_irq() assumes interrupts are disabled and
> explicitly returns with interrupts disabled again. Does the ia64
> callsite conform?

Yep.
 
> If so, schedule() would actually be actively wrong, because that will
> whinge when called with interrupts disabled, and will return with
> interrupts enabled.
> 
> Anyway, I don't object to the patch per se, but it might bloat a few
> !ia64 kernels for having to carry the extra text.
 
Well, we could get rid of quite some other sti/schedule/cli asm magic
all over the archs.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-21 13:30             ` Thomas Gleixner
@ 2013-11-21 18:57               ` Tony Luck
  2013-11-26 18:37                 ` Tony Luck
  0 siblings, 1 reply; 80+ messages in thread
From: Tony Luck @ 2013-11-21 18:57 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, LKML, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Thu, Nov 21, 2013 at 5:30 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Thu, 21 Nov 2013, Peter Zijlstra wrote:
>> Anyway, I don't object to the patch per se, but it might bloat a few
>> !ia64 kernels for having to carry the extra text.

I tried it out ... if does fix both build & boot for ia64 with
PREMPT=n and PREMPT=y

Tested-by: Tony Luck <tony.luck@intel.com>

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-21 18:57               ` Tony Luck
@ 2013-11-26 18:37                 ` Tony Luck
  2013-11-26 18:58                   ` Peter Zijlstra
  0 siblings, 1 reply; 80+ messages in thread
From: Tony Luck @ 2013-11-26 18:37 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, LKML, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Thu, Nov 21, 2013 at 10:57 AM, Tony Luck <tony.luck@gmail.com> wrote:
> On Thu, Nov 21, 2013 at 5:30 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>> On Thu, 21 Nov 2013, Peter Zijlstra wrote:
>>> Anyway, I don't object to the patch per se, but it might bloat a few
>>> !ia64 kernels for having to carry the extra text.
>
> I tried it out ... if does fix both build & boot for ia64 with
> PREMPT=n and PREMPT=y
>
> Tested-by: Tony Luck <tony.luck@intel.com>

So - are we going with this patch?  Or still thinking of something better?

-Tony

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-26 18:37                 ` Tony Luck
@ 2013-11-26 18:58                   ` Peter Zijlstra
  2013-11-27 13:36                     ` Ingo Molnar
  0 siblings, 1 reply; 80+ messages in thread
From: Peter Zijlstra @ 2013-11-26 18:58 UTC (permalink / raw)
  To: Tony Luck
  Cc: Thomas Gleixner, LKML, Ingo Molnar, linux-arch, Linus Torvalds,
	Andi Kleen, Peter Anvin, Mike Galbraith, Arjan van de Ven,
	Frederic Weisbecker, Fenghua Yu

On Tue, Nov 26, 2013 at 10:37:59AM -0800, Tony Luck wrote:
> On Thu, Nov 21, 2013 at 10:57 AM, Tony Luck <tony.luck@gmail.com> wrote:
> > On Thu, Nov 21, 2013 at 5:30 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >> On Thu, 21 Nov 2013, Peter Zijlstra wrote:
> >>> Anyway, I don't object to the patch per se, but it might bloat a few
> >>> !ia64 kernels for having to carry the extra text.
> >
> > I tried it out ... if does fix both build & boot for ia64 with
> > PREMPT=n and PREMPT=y
> >
> > Tested-by: Tony Luck <tony.luck@intel.com>
> 
> So - are we going with this patch?  Or still thinking of something better?

I queued it; let me prod Ingo (again) into actually merging it.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch 4/6] ia64: Use preempt_schedule_irq
  2013-11-26 18:58                   ` Peter Zijlstra
@ 2013-11-27 13:36                     ` Ingo Molnar
  0 siblings, 0 replies; 80+ messages in thread
From: Ingo Molnar @ 2013-11-27 13:36 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tony Luck, Thomas Gleixner, LKML, Ingo Molnar, linux-arch,
	Linus Torvalds, Andi Kleen, Peter Anvin, Mike Galbraith,
	Arjan van de Ven, Frederic Weisbecker, Fenghua Yu


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Tue, Nov 26, 2013 at 10:37:59AM -0800, Tony Luck wrote:
> > On Thu, Nov 21, 2013 at 10:57 AM, Tony Luck <tony.luck@gmail.com> wrote:
> > > On Thu, Nov 21, 2013 at 5:30 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > >> On Thu, 21 Nov 2013, Peter Zijlstra wrote:
> > >>> Anyway, I don't object to the patch per se, but it might bloat a few
> > >>> !ia64 kernels for having to carry the extra text.
> > >
> > > I tried it out ... if does fix both build & boot for ia64 with
> > > PREMPT=n and PREMPT=y
> > >
> > > Tested-by: Tony Luck <tony.luck@intel.com>
> > 
> > So - are we going with this patch?  Or still thinking of something better?
> 
> I queued it; let me prod Ingo (again) into actually merging it.

I merged it from you today, will try to get it to Linus later today 
(hopefully).

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [tip:sched/urgent] sched: Expose preempt_schedule_irq()
  2013-11-21 11:41         ` Thomas Gleixner
  2013-11-21 12:39           ` Frederic Weisbecker
  2013-11-21 13:06           ` Peter Zijlstra
@ 2013-11-27 14:07           ` tip-bot for Thomas Gleixner
  2 siblings, 0 replies; 80+ messages in thread
From: tip-bot for Thomas Gleixner @ 2013-11-27 14:07 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, peterz, tony.luck, tglx

Commit-ID:  32e475d76a3e40879cd9ee4f69b19615062280d7
Gitweb:     http://git.kernel.org/tip/32e475d76a3e40879cd9ee4f69b19615062280d7
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Thu, 21 Nov 2013 12:41:44 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 27 Nov 2013 11:04:53 +0100

sched: Expose preempt_schedule_irq()

Tony reported that aa0d53260596 ("ia64: Use preempt_schedule_irq")
broke PREEMPT=n builds on ia64.

Ok, wrapped my brain around it. I tripped over the magic asm foo which
has a single need_resched check and schedule point for both sys call
return and interrupt return.

So you need the schedule_preempt_irq() for kernel preemption from
interrupt return while on a normal syscall preemption a schedule would
be sufficient. But using schedule_preempt_irq() is not harmful here in
any way. It just sets the preempt_active bit also in cases where it
would not be required.

Even on preempt=n kernels adding the preempt_active bit is completely
harmless. So instead of having an extra function, moving the existing
one out of the ifdef PREEMPT looks like the sanest thing to do.

It would also allow getting rid of various other sti/schedule/cli asm
magic in other archs.

Reported-and-Tested-by: Tony Luck <tony.luck@gmail.com>
Fixes: aa0d53260596 ("ia64: Use preempt_schedule_irq")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[slightly edited Changelog]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1311211230030.30673@ionos.tec.linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 718730d..e85cda2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2660,6 +2660,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
 	} while (need_resched());
 }
 EXPORT_SYMBOL(preempt_schedule);
+#endif /* CONFIG_PREEMPT */
 
 /*
  * this is the entry point to schedule() from kernel preemption
@@ -2693,8 +2694,6 @@ asmlinkage void __sched preempt_schedule_irq(void)
 	exception_exit(prev_state);
 }
 
-#endif /* CONFIG_PREEMPT */
-
 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {

^ permalink raw reply related	[flat|nested] 80+ messages in thread

end of thread, other threads:[~2013-11-27 14:08 UTC | newest]

Thread overview: 80+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
2013-09-17  9:10 ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Peter Zijlstra
2013-09-18 18:44   ` Linus Torvalds
     [not found]     ` <4ec87843-c29a-401a-a54f-2cd4d61fba62@email.android.com>
2013-09-19  8:31       ` Andi Kleen
2013-09-19  9:39         ` Ingo Molnar
2013-09-20  4:43         ` H. Peter Anvin
2013-09-17  9:10 ` [PATCH 02/11] sched, rcu: Make RCU use resched_cpu() Peter Zijlstra
2013-09-17 14:40   ` Peter Zijlstra
2013-09-23 16:55     ` Paul E. McKenney
2013-09-23 21:18       ` Paul E. McKenney
2013-09-24  8:07         ` Peter Zijlstra
2013-09-24 13:37           ` Paul E. McKenney
2013-09-17  9:10 ` [PATCH 03/11] sched: Remove {set,clear}_need_resched Peter Zijlstra
2013-09-17  9:10 ` [PATCH 04/11] sched, idle: Fix the idle polling state logic Peter Zijlstra
2013-09-17  9:10 ` [PATCH 05/11] sched: Introduce preempt_count accessor functions Peter Zijlstra
2013-09-17  9:10 ` [PATCH 06/11] sched: Add NEED_RESCHED to the preempt_count Peter Zijlstra
2013-09-17  9:10 ` [PATCH 07/11] sched, arch: Create asm/preempt.h Peter Zijlstra
2013-09-17  9:10 ` [PATCH 08/11] sched: Create more preempt_count accessors Peter Zijlstra
2013-09-17  9:10 ` [PATCH 09/11] sched: Extract the basic add/sub preempt_count modifiers Peter Zijlstra
2013-09-17  9:10 ` [PATCH 10/11] sched, x86: Provide a per-cpu preempt_count implementation Peter Zijlstra
2013-09-17  9:10 ` [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call Peter Zijlstra
2013-09-17 20:23   ` Peter Zijlstra
2013-09-17 10:53 ` [PATCH 00/11] preempt_count rework -v3 Ingo Molnar
2013-09-17 11:22   ` Peter Zijlstra
2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
2013-09-17 20:00     ` Geert Uytterhoeven
2013-09-17 21:24       ` Thomas Gleixner
2013-09-18 14:06         ` Thomas Gleixner
2013-09-19 15:14           ` Thomas Gleixner
2013-09-19 17:02             ` Andreas Schwab
2013-09-19 18:19               ` Geert Uytterhoeven
2013-09-20  9:26                 ` Thomas Gleixner
2013-11-04 12:06                 ` Thomas Gleixner
2013-11-04 19:44                   ` Geert Uytterhoeven
2013-11-04 19:44                     ` Geert Uytterhoeven
2013-11-06 17:23                     ` Thomas Gleixner
2013-11-07 14:12                       ` Geert Uytterhoeven
2013-11-07 16:39                         ` Thomas Gleixner
2013-11-10  8:49                           ` Michael Schmitz
2013-11-10  9:12                             ` Geert Uytterhoeven
2013-11-11 14:11                               ` Thomas Gleixner
2013-11-11 19:34                                 ` Thomas Gleixner
2013-11-11 20:52                                   ` Thomas Gleixner
2013-11-12  6:56                                     ` Michael Schmitz
2013-11-12  6:56                                       ` Michael Schmitz
2013-11-12  8:44                                       ` schmitz
2013-11-12  8:44                                         ` schmitz
2013-11-12 15:08                                     ` Geert Uytterhoeven
2013-11-13 19:42                                     ` [tip:irq/urgent] m68k: Simplify low level interrupt handling code tip-bot for Thomas Gleixner
2013-11-12 14:09                                   ` [patch 1/6] hardirq: Make hardirq bits generic Geert Uytterhoeven
2013-11-11 19:42                                 ` Andreas Schwab
2013-11-12  9:18                                   ` Thomas Gleixner
2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 2/6] h8300: Use schedule_preempt_irq Thomas Gleixner
2013-09-20 17:41     ` Guenter Roeck
2013-09-20 21:46       ` Thomas Gleixner
2013-09-17 18:53   ` [patch 3/6] m32r: Use preempt_schedule_irq Thomas Gleixner
2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
2013-09-17 22:54     ` David Miller
2013-09-17 23:23       ` Thomas Gleixner
2013-09-18  0:12         ` David Miller
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-11-20 19:59     ` [patch 4/6] " Tony Luck
2013-11-20 20:57       ` Thomas Gleixner
2013-11-21 11:41         ` Thomas Gleixner
2013-11-21 12:39           ` Frederic Weisbecker
2013-11-21 13:06           ` Peter Zijlstra
2013-11-21 13:30             ` Thomas Gleixner
2013-11-21 18:57               ` Tony Luck
2013-11-26 18:37                 ` Tony Luck
2013-11-26 18:58                   ` Peter Zijlstra
2013-11-27 13:36                     ` Ingo Molnar
2013-11-27 14:07           ` [tip:sched/urgent] sched: Expose preempt_schedule_irq() tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
2013-09-18 10:48     ` Peter Zijlstra
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.