All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	Steven Rostedt <rostedt@goodmis.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [rfc patch 1/2]  rt/locking/hotplug: Kill hotplug_lock()/hotplug_unlock()
Date: Tue, 05 Apr 2016 14:49:47 +0200	[thread overview]
Message-ID: <1459860587.7776.1.camel@gmail.com> (raw)
In-Reply-To: <1459837988.26938.16.camel@gmail.com>


This lock is itself a source of hotplug deadlocks for RT:

1. kernfs_mutex is taken during hotplug, so any path other than hotplug
meeting hotplug_lock() thus deadlocks us.

2. notifier dependency upon RCU GP threads, same meeting hotplug_lock()
deadlocks us.

Removing it. Start migration immediately, do not stop migrating until the
cpu is down.  Have caller poll ->refcount before actually taking it down.
If someone manages to sneak in before we wake the migration thread, it
returns -EBUSY, and caller tries polling one more time.

Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
---
 kernel/cpu.c |  267 ++++++++++-------------------------------------------------
 1 file changed, 47 insertions(+), 220 deletions(-)

--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,7 @@
 #include <linux/tick.h>
 #include <linux/irq.h>
 #include <linux/smpboot.h>
+#include <linux/delay.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -166,49 +167,14 @@ static struct {
 
 /**
  * hotplug_pcp	- per cpu hotplug descriptor
- * @unplug:	set when pin_current_cpu() needs to sync tasks
- * @sync_tsk:	the task that waits for tasks to finish pinned sections
  * @refcount:	counter of tasks in pinned sections
- * @grab_lock:	set when the tasks entering pinned sections should wait
- * @synced:	notifier for @sync_tsk to tell cpu_down it's finished
- * @mutex:	the mutex to make tasks wait (used when @grab_lock is true)
- * @mutex_init:	zero if the mutex hasn't been initialized yet.
- *
- * Although @unplug and @sync_tsk may point to the same task, the @unplug
- * is used as a flag and still exists after @sync_tsk has exited and
- * @sync_tsk set to NULL.
+ * @migrate:	set when the tasks entering/leaving pinned sections should migrate
  */
 struct hotplug_pcp {
-	struct task_struct *unplug;
-	struct task_struct *sync_tsk;
 	int refcount;
-	int grab_lock;
-	struct completion synced;
-	struct completion unplug_wait;
-#ifdef CONFIG_PREEMPT_RT_FULL
-	/*
-	 * Note, on PREEMPT_RT, the hotplug lock must save the state of
-	 * the task, otherwise the mutex will cause the task to fail
-	 * to sleep when required. (Because it's called from migrate_disable())
-	 *
-	 * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
-	 * state.
-	 */
-	spinlock_t lock;
-#else
-	struct mutex mutex;
-#endif
-	int mutex_init;
+	int migrate;
 };
 
-#ifdef CONFIG_PREEMPT_RT_FULL
-# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
-# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
-#else
-# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
-# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
-#endif
-
 static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
 
 /**
@@ -221,42 +187,20 @@ static DEFINE_PER_CPU(struct hotplug_pcp
  */
 void pin_current_cpu(void)
 {
-	struct hotplug_pcp *hp;
-	int force = 0;
-
-retry:
-	hp = this_cpu_ptr(&hotplug_pcp);
+	struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
 
-	if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
-	    hp->unplug == current) {
+	if (!hp->migrate) {
 		hp->refcount++;
 		return;
 	}
-	if (hp->grab_lock) {
-		preempt_enable();
-		hotplug_lock(hp);
-		hotplug_unlock(hp);
-	} else {
-		preempt_enable();
-		/*
-		 * Try to push this task off of this CPU.
-		 */
-		if (!migrate_me()) {
-			preempt_disable();
-			hp = this_cpu_ptr(&hotplug_pcp);
-			if (!hp->grab_lock) {
-				/*
-				 * Just let it continue it's already pinned
-				 * or about to sleep.
-				 */
-				force = 1;
-				goto retry;
-			}
-			preempt_enable();
-		}
-	}
+
+	/*
+	 * Try to push this task off of this CPU.
+	 */
+	preempt_enable_no_resched();
+	migrate_me();
 	preempt_disable();
-	goto retry;
+	this_cpu_ptr(&hotplug_pcp)->refcount++;
 }
 
 /**
@@ -268,182 +212,54 @@ void unpin_current_cpu(void)
 {
 	struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
 
-	WARN_ON(hp->refcount <= 0);
-
-	/* This is safe. sync_unplug_thread is pinned to this cpu */
-	if (!--hp->refcount && hp->unplug && hp->unplug != current)
-		wake_up_process(hp->unplug);
-}
-
-static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
-{
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	while (hp->refcount) {
-		schedule_preempt_disabled();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-}
-
-static int sync_unplug_thread(void *data)
-{
-	struct hotplug_pcp *hp = data;
-
-	wait_for_completion(&hp->unplug_wait);
-	preempt_disable();
-	hp->unplug = current;
-	wait_for_pinned_cpus(hp);
-
-	/*
-	 * This thread will synchronize the cpu_down() with threads
-	 * that have pinned the CPU. When the pinned CPU count reaches
-	 * zero, we inform the cpu_down code to continue to the next step.
-	 */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	preempt_enable();
-	complete(&hp->synced);
-
-	/*
-	 * If all succeeds, the next step will need tasks to wait till
-	 * the CPU is offline before continuing. To do this, the grab_lock
-	 * is set and tasks going into pin_current_cpu() will block on the
-	 * mutex. But we still need to wait for those that are already in
-	 * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
-	 * will kick this thread out.
-	 */
-	while (!hp->grab_lock && !kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-
-	/* Make sure grab_lock is seen before we see a stale completion */
-	smp_mb();
-
-	/*
-	 * Now just before cpu_down() enters stop machine, we need to make
-	 * sure all tasks that are in pinned CPU sections are out, and new
-	 * tasks will now grab the lock, keeping them from entering pinned
-	 * CPU sections.
-	 */
-	if (!kthread_should_stop()) {
-		preempt_disable();
-		wait_for_pinned_cpus(hp);
-		preempt_enable();
-		complete(&hp->synced);
-	}
+	WARN_ON(hp->refcount-- <= 0);
 
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	while (!kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-	set_current_state(TASK_RUNNING);
+	if (!hp->migrate)
+		return;
 
 	/*
-	 * Force this thread off this CPU as it's going down and
-	 * we don't want any more work on this CPU.
+	 * Try to push this task off of this CPU.
 	 */
-	current->flags &= ~PF_NO_SETAFFINITY;
-	set_cpus_allowed_ptr(current, cpu_present_mask);
+	preempt_enable_no_resched();
 	migrate_me();
-	return 0;
-}
-
-static void __cpu_unplug_sync(struct hotplug_pcp *hp)
-{
-	wake_up_process(hp->sync_tsk);
-	wait_for_completion(&hp->synced);
+	preempt_disable();
 }
 
-static void __cpu_unplug_wait(unsigned int cpu)
+static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
 {
-	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
-	complete(&hp->unplug_wait);
-	wait_for_completion(&hp->synced);
+	while (hp->refcount) {
+		trace_printk("CHILL\n");
+		cpu_chill();
+	}
 }
 
 /*
- * Start the sync_unplug_thread on the target cpu and wait for it to
- * complete.
+ * Start migration of pinned tasks on the target cpu.
  */
 static int cpu_unplug_begin(unsigned int cpu)
 {
 	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-	int err;
-
-	/* Protected by cpu_hotplug.lock */
-	if (!hp->mutex_init) {
-#ifdef CONFIG_PREEMPT_RT_FULL
-		spin_lock_init(&hp->lock);
-#else
-		mutex_init(&hp->mutex);
-#endif
-		hp->mutex_init = 1;
-	}
 
 	/* Inform the scheduler to migrate tasks off this CPU */
 	tell_sched_cpu_down_begin(cpu);
+	hp->migrate = 1;
 
-	init_completion(&hp->synced);
-	init_completion(&hp->unplug_wait);
-
-	hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
-	if (IS_ERR(hp->sync_tsk)) {
-		err = PTR_ERR(hp->sync_tsk);
-		hp->sync_tsk = NULL;
-		return err;
-	}
-	kthread_bind(hp->sync_tsk, cpu);
+	/* Let all tasks know cpu unplug is starting */
+	smp_rmb();
 
-	/*
-	 * Wait for tasks to get out of the pinned sections,
-	 * it's still OK if new tasks enter. Some CPU notifiers will
-	 * wait for tasks that are going to enter these sections and
-	 * we must not have them block.
-	 */
-	wake_up_process(hp->sync_tsk);
 	return 0;
 }
 
-static void cpu_unplug_sync(unsigned int cpu)
-{
-	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
-	init_completion(&hp->synced);
-	/* The completion needs to be initialzied before setting grab_lock */
-	smp_wmb();
-
-	/* Grab the mutex before setting grab_lock */
-	hotplug_lock(hp);
-	hp->grab_lock = 1;
-
-	/*
-	 * The CPU notifiers have been completed.
-	 * Wait for tasks to get out of pinned CPU sections and have new
-	 * tasks block until the CPU is completely down.
-	 */
-	__cpu_unplug_sync(hp);
-
-	/* All done with the sync thread */
-	kthread_stop(hp->sync_tsk);
-	hp->sync_tsk = NULL;
-}
-
 static void cpu_unplug_done(unsigned int cpu)
 {
 	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 
-	hp->unplug = NULL;
-	/* Let all tasks know cpu unplug is finished before cleaning up */
+	/* Let all tasks know cpu unplug is finished */
 	smp_wmb();
 
-	if (hp->sync_tsk)
-		kthread_stop(hp->sync_tsk);
-
-	if (hp->grab_lock) {
-		hotplug_unlock(hp);
+	if (hp->migrate) {
 		/* protected by cpu_hotplug.lock */
-		hp->grab_lock = 0;
+		hp->migrate = 0;
 	}
 	tell_sched_cpu_down_done(cpu);
 }
@@ -951,6 +767,10 @@ static int take_cpu_down(void *_param)
 	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
 	int err, cpu = smp_processor_id();
 
+	/* RT: too bad, some task snuck in on the way here */
+	if (this_cpu_ptr(&hotplug_pcp)->refcount)
+		return -EBUSY;
+
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
 	if (err < 0)
@@ -972,7 +792,7 @@ static int take_cpu_down(void *_param)
 static int takedown_cpu(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-	int err;
+	int err, once = 0;
 
 	/*
 	 * By now we've cleared cpu_active_mask, wait for all preempt-disabled
@@ -989,25 +809,32 @@ static int takedown_cpu(unsigned int cpu
 	else
 		synchronize_rcu();
 
-	__cpu_unplug_wait(cpu);
-
 	/* Park the smpboot threads */
 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 	smpboot_park_threads(cpu);
 
-	/* Notifiers are done. Don't let any more tasks pin this CPU. */
-	cpu_unplug_sync(cpu);
-
 	/*
 	 * Prevent irq alloc/free while the dying cpu reorganizes the
 	 * interrupt affinities.
 	 */
 	irq_lock_sparse();
 
+again:
+	/* Notifiers are done.  Check for late references */
+	wait_for_pinned_cpus(&per_cpu(hotplug_pcp, cpu));
+
 	/*
 	 * So now all preempt/rcu users must observe !cpu_active().
 	 */
 	err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
+	if (err == -EBUSY) {
+		if (!once) {
+			trace_printk("BUSY, trying again\n");
+			once++;
+			goto again;
+		}
+		trace_printk("CRAP, still busy.  Deal with it caller\n");
+	}
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		cpu_notify_nofail(CPU_DOWN_FAILED, cpu);

  parent reply	other threads:[~2016-04-05 12:49 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-12 23:02 [PATCH RT 1/6] kernel: softirq: unlock with irqs on Sebastian Andrzej Siewior
2016-02-12 23:02 ` [PATCH RT 2/6] kernel: migrate_disable() do fastpath in atomic & irqs-off Sebastian Andrzej Siewior
2016-02-12 23:02 ` [PATCH RT 3/6] rtmutex: push down migrate_disable() into rt_spin_lock() Sebastian Andrzej Siewior
2016-02-12 23:02 ` [PATCH RT 4/6] rt/locking: Reenable migration accross schedule Sebastian Andrzej Siewior
2016-03-20  8:43   ` Mike Galbraith
2016-03-24 10:07     ` Mike Galbraith
2016-03-24 10:44       ` Thomas Gleixner
2016-03-24 11:06         ` Mike Galbraith
2016-03-25  5:38           ` Mike Galbraith
2016-03-25  8:52             ` Thomas Gleixner
2016-03-25  9:13               ` Mike Galbraith
2016-03-25  9:14                 ` Mike Galbraith
2016-03-25 16:24                 ` Mike Galbraith
2016-03-29  4:05                   ` Mike Galbraith
2016-03-31  6:31         ` Mike Galbraith
2016-04-01 21:11           ` Sebastian Andrzej Siewior
2016-04-02  3:12             ` Mike Galbraith
2016-04-05 12:49               ` [rfc patch 0/2] Kill hotplug_lock()/hotplug_unlock() Mike Galbraith
     [not found]               ` <1459837988.26938.16.camel@gmail.com>
2016-04-05 12:49                 ` Mike Galbraith [this message]
2016-04-05 12:49                 ` [rfc patch 2/2] rt/locking/hotplug: Fix rt_spin_lock_slowlock() migrate_disable() bug Mike Galbraith
2016-04-06 12:00                   ` Mike Galbraith
2016-04-07  4:37                     ` Mike Galbraith
2016-04-07 16:48                       ` Sebastian Andrzej Siewior
2016-04-07 19:08                         ` Mike Galbraith
2016-04-07 16:47               ` [PATCH RT 4/6] rt/locking: Reenable migration accross schedule Sebastian Andrzej Siewior
2016-04-07 19:04                 ` Mike Galbraith
2016-04-08 10:30                   ` Sebastian Andrzej Siewior
2016-04-08 12:10                     ` Mike Galbraith
2016-04-08  6:35                 ` Mike Galbraith
2016-04-08 13:44                 ` Mike Galbraith
2016-04-08 13:44                   ` Mike Galbraith
2016-04-08 13:58                   ` Sebastian Andrzej Siewior
2016-04-08 14:16                     ` Mike Galbraith
2016-04-08 14:51                       ` Sebastian Andrzej Siewior
2016-04-08 16:49                         ` Mike Galbraith
2016-04-18 17:15                           ` Sebastian Andrzej Siewior
2016-04-18 17:55                             ` Mike Galbraith
2016-04-19  7:07                               ` Sebastian Andrzej Siewior
2016-04-19  8:55                                 ` Mike Galbraith
2016-04-19  9:02                                   ` Sebastian Andrzej Siewior
2016-02-12 23:02 ` [PATCH RT 5/6] kernel/stop_machine: partly revert "stop_machine: Use raw spinlocks" Sebastian Andrzej Siewior
2016-02-12 23:02 ` [PATCH RT 6/6] rcu: disable more spots of rcu_bh Sebastian Andrzej Siewior

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1459860587.7776.1.camel@gmail.com \
    --to=umgwanakikbuti@gmail.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=bigeasy@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.