All of lore.kernel.org
 help / color / mirror / Atom feed
From: tip-bot for Peter Zijlstra <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: torvalds@linux-foundation.org, peterz@infradead.org,
	linux-kernel@vger.kernel.org, cs.os.kernel@gmail.com,
	hpa@zytor.com, tglx@linutronix.de, oleg@redhat.com,
	mingo@kernel.org
Subject: [tip:sched/core] sched/core: Optimize __schedule()
Date: Thu, 22 Sep 2016 06:59:51 -0700	[thread overview]
Message-ID: <tip-9af6528ee9b682df7f29dbee86fbba0b67eab944@git.kernel.org> (raw)
In-Reply-To: <20160913163729.GB5012@twins.programming.kicks-ass.net>

Commit-ID:  9af6528ee9b682df7f29dbee86fbba0b67eab944
Gitweb:     http://git.kernel.org/tip/9af6528ee9b682df7f29dbee86fbba0b67eab944
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 13 Sep 2016 18:37:29 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 22 Sep 2016 14:53:45 +0200

sched/core: Optimize __schedule()

Oleg noted that by making do_exit() use __schedule() for the TASK_DEAD
context switch, we can avoid the TASK_DEAD special case currently in
__schedule() because that avoids the extra preempt_disable() from
schedule().

In order to facilitate this, create a do_task_dead() helper which we
place in the scheduler code, such that it can access __schedule().

Also add some __noreturn annotations to the functions, there's no
coming back from do_exit().

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Cheng Chao <cs.os.kernel@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: chris@chris-wilson.co.uk
Cc: tj@kernel.org
Link: http://lkml.kernel.org/r/20160913163729.GB5012@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel.h |  9 +++------
 include/linux/sched.h  |  2 ++
 kernel/exit.c          | 26 ++------------------------
 kernel/sched/core.c    | 38 +++++++++++++++++++++++++++-----------
 4 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..74fd6f0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -259,17 +259,14 @@ static inline void might_fault(void) { }
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 __printf(1, 2)
-void panic(const char *fmt, ...)
-	__noreturn __cold;
+void panic(const char *fmt, ...) __noreturn __cold;
 void nmi_panic(struct pt_regs *regs, const char *msg);
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-void do_exit(long error_code)
-	__noreturn;
-void complete_and_exit(struct completion *, long)
-	__noreturn;
+void do_exit(long error_code) __noreturn;
+void complete_and_exit(struct completion *, long) __noreturn;
 
 /* Internal, do not use. */
 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d750240..f00ee8e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
 	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 }
 
+void __noreturn do_task_dead(void);
+
 struct nsproxy;
 struct user_namespace;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78b..1e1d913 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-void do_exit(long code)
+void __noreturn do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
@@ -882,29 +882,7 @@ void do_exit(long code)
 	exit_rcu();
 	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
 
-	/*
-	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
-	 * when the following two conditions become true.
-	 *   - There is race condition of mmap_sem (It is acquired by
-	 *     exit_mm()), and
-	 *   - SMI occurs before setting TASK_RUNINNG.
-	 *     (or hypervisor of virtual machine switches to other guest)
-	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
-	 *
-	 * To avoid it, we have to wait for releasing tsk->pi_lock which
-	 * is held by try_to_wake_up()
-	 */
-	smp_mb();
-	raw_spin_unlock_wait(&tsk->pi_lock);
-
-	/* causes final put_task_struct in finish_task_switch(). */
-	tsk->state = TASK_DEAD;
-	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
-	schedule();
-	BUG();
-	/* Avoid "noreturn function does return".  */
-	for (;;)
-		cpu_relax();	/* For when BUG is null */
+	do_task_dead();
 }
 EXPORT_SYMBOL_GPL(do_exit);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ff4e3c0..b2ec53c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3331,17 +3331,6 @@ static void __sched notrace __schedule(bool preempt)
 	rq = cpu_rq(cpu);
 	prev = rq->curr;
 
-	/*
-	 * do_exit() calls schedule() with preemption disabled as an exception;
-	 * however we must fix that up, otherwise the next task will see an
-	 * inconsistent (higher) preempt count.
-	 *
-	 * It also avoids the below schedule_debug() test from complaining
-	 * about this.
-	 */
-	if (unlikely(prev->state == TASK_DEAD))
-		preempt_enable_no_resched_notrace();
-
 	schedule_debug(prev);
 
 	if (sched_feat(HRTICK))
@@ -3409,6 +3398,33 @@ static void __sched notrace __schedule(bool preempt)
 }
 STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
 
+void __noreturn do_task_dead(void)
+{
+	/*
+	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+	 * when the following two conditions become true.
+	 *   - There is race condition of mmap_sem (It is acquired by
+	 *     exit_mm()), and
+	 *   - SMI occurs before setting TASK_RUNINNG.
+	 *     (or hypervisor of virtual machine switches to other guest)
+	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+	 *
+	 * To avoid it, we have to wait for releasing tsk->pi_lock which
+	 * is held by try_to_wake_up()
+	 */
+	smp_mb();
+	raw_spin_unlock_wait(&current->pi_lock);
+
+	/* causes final put_task_struct in finish_task_switch(). */
+	__set_current_state(TASK_DEAD);
+	current->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
+	__schedule(false);
+	BUG();
+	/* Avoid "noreturn function does return".  */
+	for (;;)
+		cpu_relax();	/* For when BUG is null */
+}
+
 static inline void sched_submit_work(struct task_struct *tsk)
 {
 	if (!tsk->state || tsk_is_pi_blocked(tsk))

  parent reply	other threads:[~2016-09-22 14:00 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <OF73BCE1E4.D2224FEC-ON48258025.0022CFA7-48258025.0022E3DC@kedacom.com>
2016-09-09  8:13 ` [PATCH v2] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE Cheng Chao
2016-09-09  8:19   ` chengchao
2016-09-09 13:14   ` Oleg Nesterov
2016-09-09 16:24     ` Peter Zijlstra
2016-09-10  8:52   ` [PATCH v3] " Cheng Chao
2016-09-10  9:51     ` Cheng Chao
2016-09-10 16:33       ` Peter Zijlstra
2016-09-12 11:03     ` Oleg Nesterov
2016-09-13  2:45       ` Cheng Chao
2016-09-12 11:37     ` Peter Zijlstra
2016-09-12 11:41       ` Peter Zijlstra
2016-09-12 13:05         ` Oleg Nesterov
2016-09-12 15:01           ` Peter Zijlstra
2016-09-13 16:14             ` Oleg Nesterov
2016-09-13 16:37               ` Peter Zijlstra
2016-09-14  2:07                 ` Cheng Chao
2016-09-14  7:50                   ` Peter Zijlstra
2016-09-14 15:45                 ` Oleg Nesterov
2016-09-22 13:59                 ` tip-bot for Peter Zijlstra [this message]
2016-09-13  4:03         ` Cheng Chao
2016-09-13  8:45           ` Peter Zijlstra
2016-09-14  2:01     ` [PATCH v4] stop_machine: Avoid a sleep and wakeup in the stop_one_cpu() Cheng Chao
2016-09-14 15:53       ` Oleg Nesterov
2016-09-18  2:07       ` Cheng Chao
2016-09-22 13:59       ` [tip:sched/core] stop_machine: Avoid a sleep and wakeup in stop_one_cpu() tip-bot for Cheng Chao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-9af6528ee9b682df7f29dbee86fbba0b67eab944@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=cs.os.kernel@gmail.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.