All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>, Peter Anvin <hpa@zytor.com>,
	Mike Galbraith <bitbucket@online.de>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arjan van de Ven <arjan@linux.intel.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH 04/11] sched, idle: Fix the idle polling state logic
Date: Tue, 17 Sep 2013 11:10:50 +0200	[thread overview]
Message-ID: <20130917091143.579449546@infradead.org> (raw)
In-Reply-To: 20130917082838.218329307@infradead.org

[-- Attachment #1: peterz-idle-need_resched.patch --]
[-- Type: text/plain, Size: 8388 bytes --]

Mike reported that commit 7d1a9417 ("x86: Use generic idle loop")
regressed several workloads and caused excessive reschedule
interrupts.

The patch in question failed to notice that the x86 code had an
inverted sense of the polling state versus the new generic code (x86:
default polling, generic: default !polling).

Fix the two prominent x86 mwait based idle drivers and introduce a few
new generic polling helpers (fixing the wrong smp_mb__after_clear_bit
usage).

Also switch the idle routines to using tif_need_resched() which is an
immediate TIF_NEED_RESCHED test as opposed to need_resched which will
end up being slightly different.

Cc: lenb@kernel.org
Cc: tglx@linutronix.de
Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/kernel/process.c     |    6 +--
 drivers/acpi/processor_idle.c |   46 +++++-------------------
 drivers/idle/intel_idle.c     |    2 -
 include/linux/sched.h         |   78 ++++++++++++++++++++++++++++++++++++++----
 include/linux/thread_info.h   |    2 +
 kernel/cpu/idle.c             |    9 ++--
 6 files changed, 91 insertions(+), 52 deletions(-)

--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
 		 * The switch back from broadcast mode needs to be
 		 * called with interrupts disabled.
 		 */
-		 local_irq_disable();
-		 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
-		 local_irq_enable();
+		local_irq_disable();
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		local_irq_enable();
 	} else
 		default_idle();
 }
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -119,17 +119,10 @@ static struct dmi_system_id processor_po
  */
 static void acpi_safe_halt(void)
 {
-	current_thread_info()->status &= ~TS_POLLING;
-	/*
-	 * TS_POLLING-cleared state must be visible before we
-	 * test NEED_RESCHED:
-	 */
-	smp_mb();
-	if (!need_resched()) {
+	if (!tif_need_resched()) {
 		safe_halt();
 		local_irq_disable();
 	}
-	current_thread_info()->status |= TS_POLLING;
 }
 
 #ifdef ARCH_APICTIMER_STOPS_ON_C3
@@ -737,6 +730,11 @@ static int acpi_idle_enter_c1(struct cpu
 	if (unlikely(!pr))
 		return -EINVAL;
 
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
+			return -EINVAL;
+	}
+
 	lapic_timer_state_broadcast(pr, cx, 1);
 	acpi_idle_do_entry(cx);
 
@@ -790,18 +788,9 @@ static int acpi_idle_enter_simple(struct
 	if (unlikely(!pr))
 		return -EINVAL;
 
-	if (cx->entry_method != ACPI_CSTATE_FFH) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we test
-		 * NEED_RESCHED:
-		 */
-		smp_mb();
-
-		if (unlikely(need_resched())) {
-			current_thread_info()->status |= TS_POLLING;
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
 			return -EINVAL;
-		}
 	}
 
 	/*
@@ -819,9 +808,6 @@ static int acpi_idle_enter_simple(struct
 
 	sched_clock_idle_wakeup_event(0);
 
-	if (cx->entry_method != ACPI_CSTATE_FFH)
-		current_thread_info()->status |= TS_POLLING;
-
 	lapic_timer_state_broadcast(pr, cx, 0);
 	return index;
 }
@@ -858,18 +844,9 @@ static int acpi_idle_enter_bm(struct cpu
 		}
 	}
 
-	if (cx->entry_method != ACPI_CSTATE_FFH) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we test
-		 * NEED_RESCHED:
-		 */
-		smp_mb();
-
-		if (unlikely(need_resched())) {
-			current_thread_info()->status |= TS_POLLING;
+	if (cx->entry_method == ACPI_CSTATE_FFH) {
+		if (current_set_polling_and_test())
 			return -EINVAL;
-		}
 	}
 
 	acpi_unlazy_tlb(smp_processor_id());
@@ -915,9 +892,6 @@ static int acpi_idle_enter_bm(struct cpu
 
 	sched_clock_idle_wakeup_event(0);
 
-	if (cx->entry_method != ACPI_CSTATE_FFH)
-		current_thread_info()->status |= TS_POLLING;
-
 	lapic_timer_state_broadcast(pr, cx, 0);
 	return index;
 }
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -359,7 +359,7 @@ static int intel_idle(struct cpuidle_dev
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
-	if (!need_resched()) {
+	if (!current_set_polling_and_test()) {
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2479,34 +2479,98 @@ static inline int tsk_is_polling(struct
 {
 	return task_thread_info(p)->status & TS_POLLING;
 }
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
 {
 	current_thread_info()->status |= TS_POLLING;
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
-	smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
 }
 #elif defined(TIF_POLLING_NRFLAG)
 static inline int tsk_is_polling(struct task_struct *p)
 {
 	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
 }
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
 {
 	set_thread_flag(TIF_POLLING_NRFLAG);
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 *
+	 * XXX: assumes set/clear bit are identical barrier wise.
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	clear_thread_flag(TIF_POLLING_NRFLAG);
 }
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
 #else
 static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
 #endif
 
 /*
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -118,6 +118,8 @@ static inline __deprecated void set_need
 	 */
 }
 
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*
  * An arch can define its own version of set_restore_sigmask() to get the
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -44,7 +44,7 @@ static inline int cpu_idle_poll(void)
 	rcu_idle_enter();
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
-	while (!need_resched())
+	while (!tif_need_resched())
 		cpu_relax();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	rcu_idle_exit();
@@ -92,8 +92,7 @@ static void cpu_idle_loop(void)
 			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
 				cpu_idle_poll();
 			} else {
-				current_clr_polling();
-				if (!need_resched()) {
+				if (!current_clr_polling_and_test()) {
 					stop_critical_timings();
 					rcu_idle_enter();
 					arch_cpu_idle();
@@ -103,7 +102,7 @@ static void cpu_idle_loop(void)
 				} else {
 					local_irq_enable();
 				}
-				current_set_polling();
+				__current_set_polling();
 			}
 			arch_cpu_idle_exit();
 		}
@@ -129,7 +128,7 @@ void cpu_startup_entry(enum cpuhp_state
 	 */
 	boot_init_stack_canary();
 #endif
-	current_set_polling();
+	__current_set_polling();
 	arch_cpu_idle_prepare();
 	cpu_idle_loop();
 }



  parent reply	other threads:[~2013-09-17  9:15 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-17  9:10 [PATCH 00/11] preempt_count rework -v3 Peter Zijlstra
2013-09-17  9:10 ` [PATCH 01/11] x86: Use asm goto to implement better modify_and_test() functions Peter Zijlstra
2013-09-18 18:44   ` Linus Torvalds
     [not found]     ` <4ec87843-c29a-401a-a54f-2cd4d61fba62@email.android.com>
2013-09-19  8:31       ` Andi Kleen
2013-09-19  9:39         ` Ingo Molnar
2013-09-20  4:43         ` H. Peter Anvin
2013-09-17  9:10 ` [PATCH 02/11] sched, rcu: Make RCU use resched_cpu() Peter Zijlstra
2013-09-17 14:40   ` Peter Zijlstra
2013-09-23 16:55     ` Paul E. McKenney
2013-09-23 21:18       ` Paul E. McKenney
2013-09-24  8:07         ` Peter Zijlstra
2013-09-24 13:37           ` Paul E. McKenney
2013-09-17  9:10 ` [PATCH 03/11] sched: Remove {set,clear}_need_resched Peter Zijlstra
2013-09-17  9:10 ` Peter Zijlstra [this message]
2013-09-17  9:10 ` [PATCH 05/11] sched: Introduce preempt_count accessor functions Peter Zijlstra
2013-09-17  9:10 ` [PATCH 06/11] sched: Add NEED_RESCHED to the preempt_count Peter Zijlstra
2013-09-17  9:10 ` [PATCH 07/11] sched, arch: Create asm/preempt.h Peter Zijlstra
2013-09-17  9:10 ` [PATCH 08/11] sched: Create more preempt_count accessors Peter Zijlstra
2013-09-17  9:10 ` [PATCH 09/11] sched: Extract the basic add/sub preempt_count modifiers Peter Zijlstra
2013-09-17  9:10 ` [PATCH 10/11] sched, x86: Provide a per-cpu preempt_count implementation Peter Zijlstra
2013-09-17  9:10 ` [PATCH 11/11] sched, x86: Optimize the preempt_schedule() call Peter Zijlstra
2013-09-17 20:23   ` Peter Zijlstra
2013-09-17 10:53 ` [PATCH 00/11] preempt_count rework -v3 Ingo Molnar
2013-09-17 11:22   ` Peter Zijlstra
2013-09-17 18:53 ` [patch 0/6] Make all preempt_count related constants generic Thomas Gleixner
2013-09-17 18:53   ` [patch 1/6] hardirq: Make hardirq bits generic Thomas Gleixner
2013-09-17 20:00     ` Geert Uytterhoeven
2013-09-17 21:24       ` Thomas Gleixner
2013-09-18 14:06         ` Thomas Gleixner
2013-09-19 15:14           ` Thomas Gleixner
2013-09-19 17:02             ` Andreas Schwab
2013-09-19 18:19               ` Geert Uytterhoeven
2013-09-20  9:26                 ` Thomas Gleixner
2013-11-04 12:06                 ` Thomas Gleixner
2013-11-04 19:44                   ` Geert Uytterhoeven
2013-11-04 19:44                     ` Geert Uytterhoeven
2013-11-06 17:23                     ` Thomas Gleixner
2013-11-07 14:12                       ` Geert Uytterhoeven
2013-11-07 16:39                         ` Thomas Gleixner
2013-11-10  8:49                           ` Michael Schmitz
2013-11-10  9:12                             ` Geert Uytterhoeven
2013-11-11 14:11                               ` Thomas Gleixner
2013-11-11 19:34                                 ` Thomas Gleixner
2013-11-11 20:52                                   ` Thomas Gleixner
2013-11-12  6:56                                     ` Michael Schmitz
2013-11-12  6:56                                       ` Michael Schmitz
2013-11-12  8:44                                       ` schmitz
2013-11-12  8:44                                         ` schmitz
2013-11-12 15:08                                     ` Geert Uytterhoeven
2013-11-13 19:42                                     ` [tip:irq/urgent] m68k: Simplify low level interrupt handling code tip-bot for Thomas Gleixner
2013-11-12 14:09                                   ` [patch 1/6] hardirq: Make hardirq bits generic Geert Uytterhoeven
2013-11-11 19:42                                 ` Andreas Schwab
2013-11-12  9:18                                   ` Thomas Gleixner
2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 2/6] h8300: Use schedule_preempt_irq Thomas Gleixner
2013-09-20 17:41     ` Guenter Roeck
2013-09-20 21:46       ` Thomas Gleixner
2013-09-17 18:53   ` [patch 3/6] m32r: Use preempt_schedule_irq Thomas Gleixner
2013-11-13 19:42     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 5/6] sparc: " Thomas Gleixner
2013-09-17 22:54     ` David Miller
2013-09-17 23:23       ` Thomas Gleixner
2013-09-18  0:12         ` David Miller
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 4/6] ia64: " Thomas Gleixner
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2013-11-20 19:59     ` [patch 4/6] " Tony Luck
2013-11-20 20:57       ` Thomas Gleixner
2013-11-21 11:41         ` Thomas Gleixner
2013-11-21 12:39           ` Frederic Weisbecker
2013-11-21 13:06           ` Peter Zijlstra
2013-11-21 13:30             ` Thomas Gleixner
2013-11-21 18:57               ` Tony Luck
2013-11-26 18:37                 ` Tony Luck
2013-11-26 18:58                   ` Peter Zijlstra
2013-11-27 13:36                     ` Ingo Molnar
2013-11-27 14:07           ` [tip:sched/urgent] sched: Expose preempt_schedule_irq() tip-bot for Thomas Gleixner
2013-09-17 18:53   ` [patch 6/6] preempt: Make PREEMPT_ACTIVE generic Thomas Gleixner
2013-09-18 10:48     ` Peter Zijlstra
2013-11-13 19:43     ` [tip:irq/urgent] " tip-bot for Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130917091143.579449546@infradead.org \
    --to=peterz@infradead.org \
    --cc=ak@linux.intel.com \
    --cc=arjan@linux.intel.com \
    --cc=bitbucket@online.de \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.