All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fix might sleep oops in irq affinity callback hook
@ 2013-08-20 17:59 Joe Korty
  2013-08-21 15:53 ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 7+ messages in thread
From: Joe Korty @ 2013-08-20 17:59 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-rt-users

Fix might_sleep oops in the irq affinity notifier logic.

Bug was due to the generic support function
irq_set_affinity() invoking schedule_work() underneath
a raw spin lock.  Our workaround is to move the
schedule_work() down a few lines to where the lock has
just been dropped.  This may not be ideal, as the resultant
code is a bit ugly.

This bug was discovered when the sfc networking driver
was loaded.  Perusal of the kernel source shows that the
mellanox and infiniband drivers are also likely to suffer
from this problem.

(applies to 3.6.11.6-rt38, appears to be applicable to
all later rt's and I expect it is valid for all earlier
rt's as well).

Signed-off-by: Joe Korty <joe.korty@ccur.com>

Index: b/kernel/irq/manage.c
===================================================================
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -163,7 +163,7 @@ int irq_do_set_affinity(struct irq_data 
 	return ret;
 }
 
-int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
+static int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
 {
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
 	struct irq_desc *desc = irq_data_to_desc(data);
@@ -179,10 +179,6 @@ int __irq_set_affinity_locked(struct irq
 		irq_copy_pending(desc, mask);
 	}
 
-	if (desc->affinity_notify) {
-		kref_get(&desc->affinity_notify->kref);
-		schedule_work(&desc->affinity_notify->work);
-	}
 	irqd_set(data, IRQD_AFFINITY_SET);
 
 	return ret;
@@ -205,7 +201,14 @@ int irq_set_affinity(unsigned int irq, c
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	ret =  __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	if (!ret && desc->affinity_notify) {
+		kref_get(&desc->affinity_notify->kref);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		schedule_work(&desc->affinity_notify->work);
+	} else
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+
 	return ret;
 }
 
Index: b/include/linux/irq.h
===================================================================
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -381,7 +381,6 @@ extern void remove_percpu_irq(unsigned i
 
 extern void irq_cpu_online(void);
 extern void irq_cpu_offline(void);
-extern int __irq_set_affinity_locked(struct irq_data *data,  const struct cpumask *cpumask);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fix might sleep oops in irq affinity callback hook
  2013-08-20 17:59 [PATCH] fix might sleep oops in irq affinity callback hook Joe Korty
@ 2013-08-21 15:53 ` Sebastian Andrzej Siewior
  2013-08-21 21:09   ` Joe Korty
  0 siblings, 1 reply; 7+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-08-21 15:53 UTC (permalink / raw)
  To: Joe Korty; +Cc: Steven Rostedt, linux-rt-users

Joe could you please test this one? In 3.10 we have one user of
__irq_set_affinity_locked() and we might get more.

Steven: in the longterm we might want a helper for this sort of things?
I remember the MCE had the same problem.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/interrupt.h |  1 +
 kernel/irq/manage.c       | 79 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 11bdb1e..838d4bd 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -263,6 +263,7 @@ struct irq_affinity_notify {
 	unsigned int irq;
 	struct kref kref;
 	struct work_struct work;
+	struct list_head list;
 	void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
 	void (*release)(struct kref *ref);
 };
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0e34a98..5999a67 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -164,6 +164,62 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	return ret;
 }
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void _irq_affinity_notify(struct irq_affinity_notify *notify);
+static struct task_struct *set_affinity_helper;
+static LIST_HEAD(affinity_list);
+static DEFINE_RAW_SPINLOCK(affinity_list_lock);
+
+static int set_affinity_thread(void *unused)
+{
+	while (1) {
+		struct irq_affinity_notify *notify;
+		int empty;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		raw_spin_lock_irq(&affinity_list_lock);
+		empty = list_empty(&affinity_list);
+		raw_spin_unlock_irq(&affinity_list_lock);
+
+		if (empty)
+			schedule();
+		if (kthread_should_stop())
+			break;
+		set_current_state(TASK_RUNNING);
+try_next:
+		notify = NULL;
+
+		raw_spin_lock_irq(&affinity_list_lock);
+		if (!list_empty(&affinity_list)) {
+			notify = list_first_entry(&affinity_list,
+					struct irq_affinity_notify, list);
+			list_del(&notify->list);
+		}
+		raw_spin_unlock_irq(&affinity_list_lock);
+
+		if (!notify)
+			continue;
+		_irq_affinity_notify(notify);
+		goto try_next;
+	}
+	return 0;
+}
+
+static void init_helper_thread(void)
+{
+	if (set_affinity_helper)
+		return;
+	set_affinity_helper = kthread_run(set_affinity_thread, NULL,
+			"affinity-cb");
+	WARN_ON(IS_ERR(set_affinity_helper));
+}
+#else
+
+static inline void init_helper_thread(void) { }
+
+#endif
+
 int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
 {
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -182,7 +238,17 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
 
 	if (desc->affinity_notify) {
 		kref_get(&desc->affinity_notify->kref);
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+		raw_spin_lock(&affinity_list_lock);
+		if (list_empty(&desc->affinity_notify->list))
+			list_add_tail(&affinity_list,
+					&desc->affinity_notify->list);
+		raw_spin_unlock(&affinity_list_lock);
+		wake_up_process(set_affinity_helper);
+#else
 		schedule_work(&desc->affinity_notify->work);
+#endif
 	}
 	irqd_set(data, IRQD_AFFINITY_SET);
 
@@ -223,10 +289,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 
-static void irq_affinity_notify(struct work_struct *work)
+static void _irq_affinity_notify(struct irq_affinity_notify *notify)
 {
-	struct irq_affinity_notify *notify =
-		container_of(work, struct irq_affinity_notify, work);
 	struct irq_desc *desc = irq_to_desc(notify->irq);
 	cpumask_var_t cpumask;
 	unsigned long flags;
@@ -248,6 +312,13 @@ static void irq_affinity_notify(struct work_struct *work)
 	kref_put(&notify->kref, notify->release);
 }
 
+static void irq_affinity_notify(struct work_struct *work)
+{
+	struct irq_affinity_notify *notify =
+		container_of(work, struct irq_affinity_notify, work);
+	_irq_affinity_notify(notify);
+}
+
 /**
  *	irq_set_affinity_notifier - control notification of IRQ affinity changes
  *	@irq:		Interrupt for which to enable/disable notification
@@ -277,6 +348,8 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 		notify->irq = irq;
 		kref_init(&notify->kref);
 		INIT_WORK(&notify->work, irq_affinity_notify);
+		INIT_LIST_HEAD(&notify->list);
+		init_helper_thread();
 	}
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-- 
1.8.4.rc3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] fix might sleep oops in irq affinity callback hook
  2013-08-21 15:53 ` Sebastian Andrzej Siewior
@ 2013-08-21 21:09   ` Joe Korty
  2013-08-22 19:01     ` Joe Korty
  0 siblings, 1 reply; 7+ messages in thread
From: Joe Korty @ 2013-08-21 21:09 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: Steven Rostedt, linux-rt-users

On Wed, Aug 21, 2013 at 11:53:47AM -0400, Sebastian Andrzej Siewior wrote:
> Joe could you please test this one? In 3.10 we have one user of
> __irq_set_affinity_locked() and we might get more.

Hi Sebastian,
I backported the patch to 3.6.11.6-rt38 and added a few printk's so that
when daemon started I would see that and when each affinity hook was
executed I would see that too.

The system came up and the printk for daemon start showed up near where
the sfc driver started, but no affinity action printk appeared.  Thus
the patch as written does not seem to work.

Joe

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fix might sleep oops in irq affinity callback hook
  2013-08-21 21:09   ` Joe Korty
@ 2013-08-22 19:01     ` Joe Korty
  2013-08-23  7:23       ` Sebastian Andrzej Siewior
  2013-08-29 11:22       ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 7+ messages in thread
From: Joe Korty @ 2013-08-22 19:01 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: Steven Rostedt, linux-rt-users

On Wed, Aug 21, 2013 at 05:09:28PM -0400, Joe Korty wrote:
> On Wed, Aug 21, 2013 at 11:53:47AM -0400, Sebastian Andrzej Siewior wrote:
> > Joe could you please test this one? In 3.10 we have one user of
> > __irq_set_affinity_locked() and we might get more.
> 
> Hi Sebastian,
> I backported the patch to 3.6.11.6-rt38 and added a few printk's so that
> when daemon started I would see that and when each affinity hook was
> executed I would see that too.
> 
> The system came up and the printk for daemon start showed up near where
> the sfc driver started, but no affinity action printk appeared.  Thus
> the patch as written does not seem to work.


Hi Sebastian,
Today I intrumented both the nort and rt paths through the
affinity hook logic, and built and booted both nort and
rt kernels.  Here is what I found:

For the nort kernel, each time I change the affinity of
one of the SFC driver IRQs listed in /proc/interrupts,
I get a notice in dmesg that the SFC callback was invoked.

For the rt kernel, I get the message only for the first
time the affinity for some SFC IRQ is changed.  Subsequent
changes of the _same_ IRQ do not have the callback invoked.
If I go and change some other SFC IRQ that will also invoke
the callback the first time only.

   finny# echo 1 >/proc/irq/46/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   finny# echo 1 >/proc/irq/46/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   finny# echo 1 >/proc/irq/46/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   finny# echo 2 >/proc/irq/46/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   finny# echo 2 >/proc/irq/47/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   genirq: NOTIFIER CALLED ...
   finny# echo 2 >/proc/irq/47/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   genirq: NOTIFIER CALLED ...
   finny# echo 1 >/proc/irq/47/smp_affinity; dmesg | fgrep genirq
   genirq: set_affinity_thread STARTED
   genirq: NOTIFIER CALLED ...
   genirq: NOTIFIER CALLED ...


Regards,
Joe

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fix might sleep oops in irq affinity callback hook
  2013-08-22 19:01     ` Joe Korty
@ 2013-08-23  7:23       ` Sebastian Andrzej Siewior
  2013-08-29 11:22       ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 7+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-08-23  7:23 UTC (permalink / raw)
  To: Joe Korty; +Cc: Steven Rostedt, linux-rt-users

On 08/22/2013 09:01 PM, Joe Korty wrote:
> Hi Sebastian,

Hi Joe,

> For the rt kernel, I get the message only for the first
> time the affinity for some SFC IRQ is changed.  Subsequent
> changes of the _same_ IRQ do not have the callback invoked.
> If I go and change some other SFC IRQ that will also invoke
> the callback the first time only.

it seems the thread does not get woken up second invocation. I try to
check this somehow here.

> 
> 
> Regards,
> Joe
> 
Sebastian

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fix might sleep oops in irq affinity callback hook
  2013-08-22 19:01     ` Joe Korty
  2013-08-23  7:23       ` Sebastian Andrzej Siewior
@ 2013-08-29 11:22       ` Sebastian Andrzej Siewior
  2013-08-30 13:53         ` Joe Korty
  1 sibling, 1 reply; 7+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-08-29 11:22 UTC (permalink / raw)
  To: Joe Korty; +Cc: Steven Rostedt, linux-rt-users

* Joe Korty | 2013-08-22 15:01:03 [-0400]:

>Hi Sebastian,
Hi Joe,

>For the rt kernel, I get the message only for the first
>time the affinity for some SFC IRQ is changed.  Subsequent
>changes of the _same_ IRQ do not have the callback invoked.
>If I go and change some other SFC IRQ that will also invoke
>the callback the first time only.

I forget to remove that item from the list so your observing was
correct. Thanks for testing.
I'm going to tae that one, as it seems to work:

--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -261,6 +261,7 @@ struct irq_affinity_notify {
 	unsigned int irq;
 	struct kref kref;
 	struct work_struct work;
+	struct list_head list;
 	void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
 	void (*release)(struct kref *ref);
 };
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -164,6 +164,62 @@ int irq_do_set_affinity(struct irq_data
 	return ret;
 }
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void _irq_affinity_notify(struct irq_affinity_notify *notify);
+static struct task_struct *set_affinity_helper;
+static LIST_HEAD(affinity_list);
+static DEFINE_RAW_SPINLOCK(affinity_list_lock);
+
+static int set_affinity_thread(void *unused)
+{
+	while (1) {
+		struct irq_affinity_notify *notify;
+		int empty;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		raw_spin_lock_irq(&affinity_list_lock);
+		empty = list_empty(&affinity_list);
+		raw_spin_unlock_irq(&affinity_list_lock);
+
+		if (empty)
+			schedule();
+		if (kthread_should_stop())
+			break;
+		set_current_state(TASK_RUNNING);
+try_next:
+		notify = NULL;
+
+		raw_spin_lock_irq(&affinity_list_lock);
+		if (!list_empty(&affinity_list)) {
+			notify = list_first_entry(&affinity_list,
+					struct irq_affinity_notify, list);
+			list_del_init(&notify->list);
+		}
+		raw_spin_unlock_irq(&affinity_list_lock);
+
+		if (!notify)
+			continue;
+		_irq_affinity_notify(notify);
+		goto try_next;
+	}
+	return 0;
+}
+
+static void init_helper_thread(void)
+{
+	if (set_affinity_helper)
+		return;
+	set_affinity_helper = kthread_run(set_affinity_thread, NULL,
+			"affinity-cb");
+	WARN_ON(IS_ERR(set_affinity_helper));
+}
+#else
+
+static inline void init_helper_thread(void) { }
+
+#endif
+
 int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
 {
 	struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -182,7 +238,17 @@ int __irq_set_affinity_locked(struct irq
 
 	if (desc->affinity_notify) {
 		kref_get(&desc->affinity_notify->kref);
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+		raw_spin_lock(&affinity_list_lock);
+		if (list_empty(&desc->affinity_notify->list))
+			list_add_tail(&affinity_list,
+					&desc->affinity_notify->list);
+		raw_spin_unlock(&affinity_list_lock);
+		wake_up_process(set_affinity_helper);
+#else
 		schedule_work(&desc->affinity_notify->work);
+#endif
 	}
 	irqd_set(data, IRQD_AFFINITY_SET);
 
@@ -223,10 +289,8 @@ int irq_set_affinity_hint(unsigned int i
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 
-static void irq_affinity_notify(struct work_struct *work)
+static void _irq_affinity_notify(struct irq_affinity_notify *notify)
 {
-	struct irq_affinity_notify *notify =
-		container_of(work, struct irq_affinity_notify, work);
 	struct irq_desc *desc = irq_to_desc(notify->irq);
 	cpumask_var_t cpumask;
 	unsigned long flags;
@@ -248,6 +312,13 @@ out:
 	kref_put(&notify->kref, notify->release);
 }
 
+static void irq_affinity_notify(struct work_struct *work)
+{
+	struct irq_affinity_notify *notify =
+		container_of(work, struct irq_affinity_notify, work);
+	_irq_affinity_notify(notify);
+}
+
 /**
  *	irq_set_affinity_notifier - control notification of IRQ affinity changes
  *	@irq:		Interrupt for which to enable/disable notification
@@ -277,6 +348,8 @@ irq_set_affinity_notifier(unsigned int i
 		notify->irq = irq;
 		kref_init(&notify->kref);
 		INIT_WORK(&notify->work, irq_affinity_notify);
+		INIT_LIST_HEAD(&notify->list);
+		init_helper_thread();
 	}
 
 	raw_spin_lock_irqsave(&desc->lock, flags);

>Regards,
>Joe

Sebastian

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fix might sleep oops in irq affinity callback hook
  2013-08-29 11:22       ` Sebastian Andrzej Siewior
@ 2013-08-30 13:53         ` Joe Korty
  0 siblings, 0 replies; 7+ messages in thread
From: Joe Korty @ 2013-08-30 13:53 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: Steven Rostedt, linux-rt-users

On Thu, Aug 29, 2013 at 07:22:57AM -0400, Sebastian Andrzej Siewior wrote:
>> For the rt kernel, I get the message only for the first
>> time the affinity for some SFC IRQ is changed.  Subsequent
>> changes of the _same_ IRQ do not have the callback invoked.
>> If I go and change some other SFC IRQ that will also invoke
>> the callback the first time only.
> 
> I forget to remove that item from the list so your observing was
> correct. Thanks for testing.
> I'm going to tae that one, as it seems to work:


Hi Sebastian,
I backported the new version to 3.6.11.6-rt38, it works perfectly.
Thanks!

Joe

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-08-30 13:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-20 17:59 [PATCH] fix might sleep oops in irq affinity callback hook Joe Korty
2013-08-21 15:53 ` Sebastian Andrzej Siewior
2013-08-21 21:09   ` Joe Korty
2013-08-22 19:01     ` Joe Korty
2013-08-23  7:23       ` Sebastian Andrzej Siewior
2013-08-29 11:22       ` Sebastian Andrzej Siewior
2013-08-30 13:53         ` Joe Korty

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.