All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
@ 2014-07-24 21:26 Peter Zijlstra
  2014-07-24 22:02 ` Rafael J. Wysocki
                   ` (2 more replies)
  0 siblings, 3 replies; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-24 21:26 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: rjw, linux-kernel

Subject: irq: Rework IRQF_NO_SUSPENDED
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Jul 24 22:34:50 CEST 2014

Typically when devices are suspended they're quiesced such that they
will not generate any further interrupts.

However some devices should still generate interrupts, even when
suspended, typically used to wake the machine back up.

Such logic should ideally be contained inside each driver, if it can
generate interrupts when suspended, it knows about this and the
interrupt handler can deal with it.

Except of course for shared interrupts, when such a wakeup device is
sharing an interrupt line with a device that does not expect
interrupts while suspended things can go funny.

This is where IRQF_NO_SUSPEND comes in, the idea is that drivers that
have the capability to wake the machine set IRQF_NO_SUSPEND and their
handler will still be called, even when the IRQ subsystem is formally
suspended. Handlers without IRQF_NO_SUSPEND will not be called.

This replaced the prior implementation of IRQF_NO_SUSPEND which had
a number of fatal issues in that it didn't actually work for the
shared case, exactly the case it should be helping.

There is still enable_irq_wake()/IRQD_WAKEUP_STATE that tries to serve
a similar purpose but is equially wrecked for shared interrupts,
ideally this would be removed.

Cc: rjw@rjwysocki.net
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 kernel/irq/chip.c      |    4 +---
 kernel/irq/handle.c    |   24 +++++++++++++++++++++---
 kernel/irq/internals.h |    6 ++++--
 kernel/irq/manage.c    |   31 ++++++-------------------------
 kernel/irq/pm.c        |   17 +++++++++--------
 5 files changed, 41 insertions(+), 41 deletions(-)

--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -677,9 +677,7 @@ void handle_percpu_devid_irq(unsigned in
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
 
-	trace_irq_handler_entry(irq, action);
-	res = action->handler(irq, dev_id);
-	trace_irq_handler_exit(irq, action, res);
+	res = do_irqaction(desc, action, irq, dev_id);
 
 	if (chip->irq_eoi)
 		chip->irq_eoi(&desc->irq_data);
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -131,6 +131,23 @@ void __irq_wake_thread(struct irq_desc *
 }
 
 irqreturn_t
+do_irqaction(struct irq_desc *desc, struct irqaction *action,
+	     unsigned int irq, void *dev_id)
+{
+	irqreturn_t ret;
+
+	if (unlikely((desc->istate & IRQS_SUSPENDED) &&
+		     !(action->flags & IRQF_NO_SUSPEND)))
+		return IRQ_NONE;
+
+	trace_irq_handler_entry(irq, action);
+	ret = action->handler(irq, dev_id);
+	trace_irq_handler_exit(irq, action, ret);
+
+	return ret;
+}
+
+irqreturn_t
 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
 {
 	irqreturn_t retval = IRQ_NONE;
@@ -139,9 +156,7 @@ handle_irq_event_percpu(struct irq_desc
 	do {
 		irqreturn_t res;
 
-		trace_irq_handler_entry(irq, action);
-		res = action->handler(irq, action->dev_id);
-		trace_irq_handler_exit(irq, action, res);
+		res = do_irqaction(desc, action, irq, action->dev_id);
 
 		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
 			      irq, action->handler))
@@ -175,6 +190,9 @@ handle_irq_event_percpu(struct irq_desc
 
 	add_interrupt_randomness(irq, flags);
 
+	if (unlikely(desc->istate & IRQS_SUSPENDED) && retval == IRQ_NONE)
+		retval = IRQ_HANDLED;
+
 	if (!noirqdebug)
 		note_interrupt(irq, desc, retval);
 	return retval;
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -63,8 +63,10 @@ enum {
 
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
-extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
-extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+
+extern irqreturn_t
+do_irqaction(struct irq_desc *desc, struct irqaction *action,
+	     unsigned int irq, void *dev_id);
 
 extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
-void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+void __disable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND) ||
-		    irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
-			return;
-		desc->istate |= IRQS_SUSPENDED;
-	}
-
 	if (!desc->depth++)
 		irq_disable(desc);
 }
@@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
 
 	if (!desc)
 		return -EINVAL;
-	__disable_irq(desc, irq, false);
+	__disable_irq(desc, irq);
 	irq_put_desc_busunlock(desc, flags);
 	return 0;
 }
@@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(disable_irq);
 
-void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
+void __enable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
-			if (!desc->action)
-				return;
-			if (!(desc->action->flags & IRQF_FORCE_RESUME))
-				return;
-			/* Pretend that it got disabled ! */
-			desc->depth++;
-		}
-		desc->istate &= ~IRQS_SUSPENDED;
-	}
-
 	switch (desc->depth) {
 	case 0:
  err_out:
@@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
 		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
 		goto out;
 
-	__enable_irq(desc, irq, false);
+	__enable_irq(desc, irq);
 out:
 	irq_put_desc_busunlock(desc, flags);
 }
@@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
@@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
 	 */
 	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
 		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
-		__enable_irq(desc, irq, false);
+		__enable_irq(desc, irq);
 	}
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -29,14 +29,20 @@ void suspend_device_irqs(void)
 	for_each_irq_desc(irq, desc) {
 		unsigned long flags;
 
+		/*
+		 * Ideally this would be a global state, but we cannot
+		 * for the trainwreck that is IRQD_WAKEUP_STATE.
+		 */
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__disable_irq(desc, irq, true);
+		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
+			desc->istate |= IRQS_SUSPENDED;
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
-	for_each_irq_desc(irq, desc)
+	for_each_irq_desc(irq, desc) {
 		if (desc->istate & IRQS_SUSPENDED)
 			synchronize_irq(irq);
+	}
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
@@ -47,14 +53,9 @@ static void resume_irqs(bool want_early)
 
 	for_each_irq_desc(irq, desc) {
 		unsigned long flags;
-		bool is_early = desc->action &&
-			desc->action->flags & IRQF_EARLY_RESUME;
-
-		if (!is_early && want_early)
-			continue;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__enable_irq(desc, irq, true);
+		desc->istate &= ~IRQS_SUSPENDED;
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-24 21:26 [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Peter Zijlstra
@ 2014-07-24 22:02 ` Rafael J. Wysocki
  2014-07-24 23:10 ` Rafael J. Wysocki
  2014-07-25  9:40 ` Thomas Gleixner
  2 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-24 22:02 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, linux-kernel, Linux PM list

On Thursday, July 24, 2014 11:26:20 PM Peter Zijlstra wrote:
> Subject: irq: Rework IRQF_NO_SUSPENDED
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu Jul 24 22:34:50 CEST 2014
> 
> Typically when devices are suspended they're quiesced such that they
> will not generate any further interrupts.
> 
> However some devices should still generate interrupts, even when
> suspended, typically used to wake the machine back up.
> 
> Such logic should ideally be contained inside each driver, if it can
> generate interrupts when suspended, it knows about this and the
> interrupt handler can deal with it.
> 
> Except of course for shared interrupts, when such a wakeup device is
> sharing an interrupt line with a device that does not expect
> interrupts while suspended things can go funny.
> 
> This is where IRQF_NO_SUSPEND comes in, the idea is that drivers that
> have the capability to wake the machine set IRQF_NO_SUSPEND and their
> handler will still be called, even when the IRQ subsystem is formally
> suspended. Handlers without IRQF_NO_SUSPEND will not be called.
> 
> This replaced the prior implementation of IRQF_NO_SUSPEND which had
> a number of fatal issues in that it didn't actually work for the
> shared case, exactly the case it should be helping.
> 
> There is still enable_irq_wake()/IRQD_WAKEUP_STATE that tries to serve
> a similar purpose but is equially wrecked for shared interrupts,
> ideally this would be removed.
> 
> Cc: rjw@rjwysocki.net
> Signed-off-by: Peter Zijlstra <peterz@infradead.org>

Acked-by: Rafael J. Wysocki <rjw@rjwysocki.net>

> ---
>  kernel/irq/chip.c      |    4 +---
>  kernel/irq/handle.c    |   24 +++++++++++++++++++++---
>  kernel/irq/internals.h |    6 ++++--
>  kernel/irq/manage.c    |   31 ++++++-------------------------
>  kernel/irq/pm.c        |   17 +++++++++--------
>  5 files changed, 41 insertions(+), 41 deletions(-)
> 
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -677,9 +677,7 @@ void handle_percpu_devid_irq(unsigned in
>  	if (chip->irq_ack)
>  		chip->irq_ack(&desc->irq_data);
>  
> -	trace_irq_handler_entry(irq, action);
> -	res = action->handler(irq, dev_id);
> -	trace_irq_handler_exit(irq, action, res);
> +	res = do_irqaction(desc, action, irq, dev_id);
>  
>  	if (chip->irq_eoi)
>  		chip->irq_eoi(&desc->irq_data);
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -131,6 +131,23 @@ void __irq_wake_thread(struct irq_desc *
>  }
>  
>  irqreturn_t
> +do_irqaction(struct irq_desc *desc, struct irqaction *action,
> +	     unsigned int irq, void *dev_id)
> +{
> +	irqreturn_t ret;
> +
> +	if (unlikely((desc->istate & IRQS_SUSPENDED) &&
> +		     !(action->flags & IRQF_NO_SUSPEND)))
> +		return IRQ_NONE;
> +
> +	trace_irq_handler_entry(irq, action);
> +	ret = action->handler(irq, dev_id);
> +	trace_irq_handler_exit(irq, action, ret);
> +
> +	return ret;
> +}
> +
> +irqreturn_t
>  handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
>  {
>  	irqreturn_t retval = IRQ_NONE;
> @@ -139,9 +156,7 @@ handle_irq_event_percpu(struct irq_desc
>  	do {
>  		irqreturn_t res;
>  
> -		trace_irq_handler_entry(irq, action);
> -		res = action->handler(irq, action->dev_id);
> -		trace_irq_handler_exit(irq, action, res);
> +		res = do_irqaction(desc, action, irq, action->dev_id);
>  
>  		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
>  			      irq, action->handler))
> @@ -175,6 +190,9 @@ handle_irq_event_percpu(struct irq_desc
>  
>  	add_interrupt_randomness(irq, flags);
>  
> +	if (unlikely(desc->istate & IRQS_SUSPENDED) && retval == IRQ_NONE)
> +		retval = IRQ_HANDLED;
> +
>  	if (!noirqdebug)
>  		note_interrupt(irq, desc, retval);
>  	return retval;
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -63,8 +63,10 @@ enum {
>  
>  extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
>  		unsigned long flags);
> -extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
> -extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
> +
> +extern irqreturn_t
> +do_irqaction(struct irq_desc *desc, struct irqaction *action,
> +	     unsigned int irq, void *dev_id);
>  
>  extern int irq_startup(struct irq_desc *desc, bool resend);
>  extern void irq_shutdown(struct irq_desc *desc);
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
>  }
>  #endif
>  
> -void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
> +void __disable_irq(struct irq_desc *desc, unsigned int irq)
>  {
> -	if (suspend) {
> -		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND) ||
> -		    irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> -			return;
> -		desc->istate |= IRQS_SUSPENDED;
> -	}
> -
>  	if (!desc->depth++)
>  		irq_disable(desc);
>  }
> @@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
>  
>  	if (!desc)
>  		return -EINVAL;
> -	__disable_irq(desc, irq, false);
> +	__disable_irq(desc, irq);
>  	irq_put_desc_busunlock(desc, flags);
>  	return 0;
>  }
> @@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
>  }
>  EXPORT_SYMBOL(disable_irq);
>  
> -void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
> +void __enable_irq(struct irq_desc *desc, unsigned int irq)
>  {
> -	if (resume) {
> -		if (!(desc->istate & IRQS_SUSPENDED)) {
> -			if (!desc->action)
> -				return;
> -			if (!(desc->action->flags & IRQF_FORCE_RESUME))
> -				return;
> -			/* Pretend that it got disabled ! */
> -			desc->depth++;
> -		}
> -		desc->istate &= ~IRQS_SUSPENDED;
> -	}
> -
>  	switch (desc->depth) {
>  	case 0:
>   err_out:
> @@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
>  		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
>  		goto out;
>  
> -	__enable_irq(desc, irq, false);
> +	__enable_irq(desc, irq);
>  out:
>  	irq_put_desc_busunlock(desc, flags);
>  }
> @@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
>  		 */
>  
>  #define IRQF_MISMATCH \
> -	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
> +	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
>  
>  		if (!((old->flags & new->flags) & IRQF_SHARED) ||
>  		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
> @@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
>  	 */
>  	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
>  		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
> -		__enable_irq(desc, irq, false);
> +		__enable_irq(desc, irq);
>  	}
>  
>  	raw_spin_unlock_irqrestore(&desc->lock, flags);
> --- a/kernel/irq/pm.c
> +++ b/kernel/irq/pm.c
> @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
>  	for_each_irq_desc(irq, desc) {
>  		unsigned long flags;
>  
> +		/*
> +		 * Ideally this would be a global state, but we cannot
> +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> +		 */
>  		raw_spin_lock_irqsave(&desc->lock, flags);
> -		__disable_irq(desc, irq, true);
> +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> +			desc->istate |= IRQS_SUSPENDED;
>  		raw_spin_unlock_irqrestore(&desc->lock, flags);
>  	}
>  
> -	for_each_irq_desc(irq, desc)
> +	for_each_irq_desc(irq, desc) {
>  		if (desc->istate & IRQS_SUSPENDED)
>  			synchronize_irq(irq);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(suspend_device_irqs);
>  
> @@ -47,14 +53,9 @@ static void resume_irqs(bool want_early)
>  
>  	for_each_irq_desc(irq, desc) {
>  		unsigned long flags;
> -		bool is_early = desc->action &&
> -			desc->action->flags & IRQF_EARLY_RESUME;
> -
> -		if (!is_early && want_early)
> -			continue;
>  
>  		raw_spin_lock_irqsave(&desc->lock, flags);
> -		__enable_irq(desc, irq, true);
> +		desc->istate &= ~IRQS_SUSPENDED;
>  		raw_spin_unlock_irqrestore(&desc->lock, flags);
>  	}
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-24 21:26 [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Peter Zijlstra
  2014-07-24 22:02 ` Rafael J. Wysocki
@ 2014-07-24 23:10 ` Rafael J. Wysocki
  2014-07-25  5:58   ` Peter Zijlstra
  2014-07-25  9:27   ` Thomas Gleixner
  2014-07-25  9:40 ` Thomas Gleixner
  2 siblings, 2 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-24 23:10 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: linux-kernel, Dmitry Torokhov, Linux PM list, Dmitry Torokhov

On Thursday, July 24, 2014 11:26:20 PM Peter Zijlstra wrote:
> Subject: irq: Rework IRQF_NO_SUSPENDED
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu Jul 24 22:34:50 CEST 2014
> 
> Typically when devices are suspended they're quiesced such that they
> will not generate any further interrupts.
> 
> However some devices should still generate interrupts, even when
> suspended, typically used to wake the machine back up.
> 
> Such logic should ideally be contained inside each driver, if it can
> generate interrupts when suspended, it knows about this and the
> interrupt handler can deal with it.
> 
> Except of course for shared interrupts, when such a wakeup device is
> sharing an interrupt line with a device that does not expect
> interrupts while suspended things can go funny.
> 
> This is where IRQF_NO_SUSPEND comes in, the idea is that drivers that
> have the capability to wake the machine set IRQF_NO_SUSPEND and their
> handler will still be called, even when the IRQ subsystem is formally
> suspended. Handlers without IRQF_NO_SUSPEND will not be called.
> 
> This replaced the prior implementation of IRQF_NO_SUSPEND which had
> a number of fatal issues in that it didn't actually work for the
> shared case, exactly the case it should be helping.
> 
> There is still enable_irq_wake()/IRQD_WAKEUP_STATE that tries to serve
> a similar purpose but is equially wrecked for shared interrupts,
> ideally this would be removed.

Let me comment about this particular thing.

I had a discussion with Dmitry about that and his argument was that
enable_irq_wake() should imply IRQF_NO_SUSPEND, because drivers that
set up interrupts for system wakeup should expect those interrupts to
trigger at any time, including system suspend.  Hence the patch that
added the IRQD_WAKEUP_STATE check to __disable_irq().

However, in the face of the problem that is being addressed here I'm
not really sure that this argument is valid, because if the driver
calling enable_irq_wake() is sharing the IRQ with another one, the
other driver may not actually know that the IRQ will be a wakeup one
and still may not expect interrupts to come to it during system
suspend/resume.

Yes, drivers using enable_irq_wake() will likely want IRQF_NO_SUSPEND to
be set for their irqactions, but that should not imply "no suspend" for
all irqactions sharing the same desc.  So I guess it may be better to go
forth and use a global "interrupts suspended" state variable instead of the
IRQS_SUSPENDED flag for each desc and throw away the IRQD_WAKEUP_STATE
check from suspend_device_irqs() entirely.

Peter, it looks like you'd prefer that?

Rafael


> Cc: rjw@rjwysocki.net
> Signed-off-by: Peter Zijlstra <peterz@infradead.org>
> ---
>  kernel/irq/chip.c      |    4 +---
>  kernel/irq/handle.c    |   24 +++++++++++++++++++++---
>  kernel/irq/internals.h |    6 ++++--
>  kernel/irq/manage.c    |   31 ++++++-------------------------
>  kernel/irq/pm.c        |   17 +++++++++--------
>  5 files changed, 41 insertions(+), 41 deletions(-)
> 
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -677,9 +677,7 @@ void handle_percpu_devid_irq(unsigned in
>  	if (chip->irq_ack)
>  		chip->irq_ack(&desc->irq_data);
>  
> -	trace_irq_handler_entry(irq, action);
> -	res = action->handler(irq, dev_id);
> -	trace_irq_handler_exit(irq, action, res);
> +	res = do_irqaction(desc, action, irq, dev_id);
>  
>  	if (chip->irq_eoi)
>  		chip->irq_eoi(&desc->irq_data);
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -131,6 +131,23 @@ void __irq_wake_thread(struct irq_desc *
>  }
>  
>  irqreturn_t
> +do_irqaction(struct irq_desc *desc, struct irqaction *action,
> +	     unsigned int irq, void *dev_id)
> +{
> +	irqreturn_t ret;
> +
> +	if (unlikely((desc->istate & IRQS_SUSPENDED) &&
> +		     !(action->flags & IRQF_NO_SUSPEND)))
> +		return IRQ_NONE;
> +
> +	trace_irq_handler_entry(irq, action);
> +	ret = action->handler(irq, dev_id);
> +	trace_irq_handler_exit(irq, action, ret);
> +
> +	return ret;
> +}
> +
> +irqreturn_t
>  handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
>  {
>  	irqreturn_t retval = IRQ_NONE;
> @@ -139,9 +156,7 @@ handle_irq_event_percpu(struct irq_desc
>  	do {
>  		irqreturn_t res;
>  
> -		trace_irq_handler_entry(irq, action);
> -		res = action->handler(irq, action->dev_id);
> -		trace_irq_handler_exit(irq, action, res);
> +		res = do_irqaction(desc, action, irq, action->dev_id);
>  
>  		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
>  			      irq, action->handler))
> @@ -175,6 +190,9 @@ handle_irq_event_percpu(struct irq_desc
>  
>  	add_interrupt_randomness(irq, flags);
>  
> +	if (unlikely(desc->istate & IRQS_SUSPENDED) && retval == IRQ_NONE)
> +		retval = IRQ_HANDLED;
> +
>  	if (!noirqdebug)
>  		note_interrupt(irq, desc, retval);
>  	return retval;
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -63,8 +63,10 @@ enum {
>  
>  extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
>  		unsigned long flags);
> -extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
> -extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
> +
> +extern irqreturn_t
> +do_irqaction(struct irq_desc *desc, struct irqaction *action,
> +	     unsigned int irq, void *dev_id);
>  
>  extern int irq_startup(struct irq_desc *desc, bool resend);
>  extern void irq_shutdown(struct irq_desc *desc);
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
>  }
>  #endif
>  
> -void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
> +void __disable_irq(struct irq_desc *desc, unsigned int irq)
>  {
> -	if (suspend) {
> -		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND) ||
> -		    irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> -			return;
> -		desc->istate |= IRQS_SUSPENDED;
> -	}
> -
>  	if (!desc->depth++)
>  		irq_disable(desc);
>  }
> @@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
>  
>  	if (!desc)
>  		return -EINVAL;
> -	__disable_irq(desc, irq, false);
> +	__disable_irq(desc, irq);
>  	irq_put_desc_busunlock(desc, flags);
>  	return 0;
>  }
> @@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
>  }
>  EXPORT_SYMBOL(disable_irq);
>  
> -void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
> +void __enable_irq(struct irq_desc *desc, unsigned int irq)
>  {
> -	if (resume) {
> -		if (!(desc->istate & IRQS_SUSPENDED)) {
> -			if (!desc->action)
> -				return;
> -			if (!(desc->action->flags & IRQF_FORCE_RESUME))
> -				return;
> -			/* Pretend that it got disabled ! */
> -			desc->depth++;
> -		}
> -		desc->istate &= ~IRQS_SUSPENDED;
> -	}
> -
>  	switch (desc->depth) {
>  	case 0:
>   err_out:
> @@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
>  		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
>  		goto out;
>  
> -	__enable_irq(desc, irq, false);
> +	__enable_irq(desc, irq);
>  out:
>  	irq_put_desc_busunlock(desc, flags);
>  }
> @@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
>  		 */
>  
>  #define IRQF_MISMATCH \
> -	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
> +	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
>  
>  		if (!((old->flags & new->flags) & IRQF_SHARED) ||
>  		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
> @@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
>  	 */
>  	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
>  		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
> -		__enable_irq(desc, irq, false);
> +		__enable_irq(desc, irq);
>  	}
>  
>  	raw_spin_unlock_irqrestore(&desc->lock, flags);
> --- a/kernel/irq/pm.c
> +++ b/kernel/irq/pm.c
> @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
>  	for_each_irq_desc(irq, desc) {
>  		unsigned long flags;
>  
> +		/*
> +		 * Ideally this would be a global state, but we cannot
> +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> +		 */
>  		raw_spin_lock_irqsave(&desc->lock, flags);
> -		__disable_irq(desc, irq, true);
> +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> +			desc->istate |= IRQS_SUSPENDED;
>  		raw_spin_unlock_irqrestore(&desc->lock, flags);
>  	}
>  
> -	for_each_irq_desc(irq, desc)
> +	for_each_irq_desc(irq, desc) {
>  		if (desc->istate & IRQS_SUSPENDED)
>  			synchronize_irq(irq);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(suspend_device_irqs);
>  
> @@ -47,14 +53,9 @@ static void resume_irqs(bool want_early)
>  
>  	for_each_irq_desc(irq, desc) {
>  		unsigned long flags;
> -		bool is_early = desc->action &&
> -			desc->action->flags & IRQF_EARLY_RESUME;
> -
> -		if (!is_early && want_early)
> -			continue;
>  
>  		raw_spin_lock_irqsave(&desc->lock, flags);
> -		__enable_irq(desc, irq, true);
> +		desc->istate &= ~IRQS_SUSPENDED;
>  		raw_spin_unlock_irqrestore(&desc->lock, flags);
>  	}
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-24 23:10 ` Rafael J. Wysocki
@ 2014-07-25  5:58   ` Peter Zijlstra
  2014-07-29 19:20     ` Brian Norris
  2014-07-25  9:27   ` Thomas Gleixner
  1 sibling, 1 reply; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-25  5:58 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Thomas Gleixner, linux-kernel, Dmitry Torokhov, Linux PM list,
	Dmitry Torokhov

On Fri, Jul 25, 2014 at 01:10:36AM +0200, Rafael J. Wysocki wrote:
> > There is still enable_irq_wake()/IRQD_WAKEUP_STATE that tries to serve
> > a similar purpose but is equially wrecked for shared interrupts,
> > ideally this would be removed.
> 
> Let me comment about this particular thing.
> 
> I had a discussion with Dmitry about that and his argument was that
> enable_irq_wake() should imply IRQF_NO_SUSPEND, because drivers that
> set up interrupts for system wakeup should expect those interrupts to
> trigger at any time, including system suspend.  Hence the patch that
> added the IRQD_WAKEUP_STATE check to __disable_irq().
> 
> However, in the face of the problem that is being addressed here I'm
> not really sure that this argument is valid, because if the driver
> calling enable_irq_wake() is sharing the IRQ with another one, the
> other driver may not actually know that the IRQ will be a wakeup one
> and still may not expect interrupts to come to it during system
> suspend/resume.
> 
> Yes, drivers using enable_irq_wake() will likely want IRQF_NO_SUSPEND to
> be set for their irqactions, but that should not imply "no suspend" for
> all irqactions sharing the same desc.  So I guess it may be better to go
> forth and use a global "interrupts suspended" state variable instead of the
> IRQS_SUSPENDED flag for each desc and throw away the IRQD_WAKEUP_STATE
> check from suspend_device_irqs() entirely.
> 
> Peter, it looks like you'd prefer that?

My preference would be to shoot enable_irq_wake() in the head, its
fundamentally broken.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-24 23:10 ` Rafael J. Wysocki
  2014-07-25  5:58   ` Peter Zijlstra
@ 2014-07-25  9:27   ` Thomas Gleixner
  2014-07-25 12:49     ` Rafael J. Wysocki
  1 sibling, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-25  9:27 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Dmitry Torokhov, Linux PM list,
	Dmitry Torokhov

On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 11:26:20 PM Peter Zijlstra wrote:
> > Subject: irq: Rework IRQF_NO_SUSPENDED
> > From: Peter Zijlstra <peterz@infradead.org>
> > Date: Thu Jul 24 22:34:50 CEST 2014
> > 
> > Typically when devices are suspended they're quiesced such that they
> > will not generate any further interrupts.
> > 
> > However some devices should still generate interrupts, even when
> > suspended, typically used to wake the machine back up.
> > 
> > Such logic should ideally be contained inside each driver, if it can
> > generate interrupts when suspended, it knows about this and the
> > interrupt handler can deal with it.
> > 
> > Except of course for shared interrupts, when such a wakeup device is
> > sharing an interrupt line with a device that does not expect
> > interrupts while suspended things can go funny.
> > 
> > This is where IRQF_NO_SUSPEND comes in, the idea is that drivers that
> > have the capability to wake the machine set IRQF_NO_SUSPEND and their
> > handler will still be called, even when the IRQ subsystem is formally
> > suspended. Handlers without IRQF_NO_SUSPEND will not be called.
> > 
> > This replaced the prior implementation of IRQF_NO_SUSPEND which had
> > a number of fatal issues in that it didn't actually work for the
> > shared case, exactly the case it should be helping.
> > 
> > There is still enable_irq_wake()/IRQD_WAKEUP_STATE that tries to serve
> > a similar purpose but is equially wrecked for shared interrupts,
> > ideally this would be removed.
> 
> Let me comment about this particular thing.
> 
> I had a discussion with Dmitry about that and his argument was that
> enable_irq_wake() should imply IRQF_NO_SUSPEND, because drivers that
> set up interrupts for system wakeup should expect those interrupts to
> trigger at any time, including system suspend.  Hence the patch that
> added the IRQD_WAKEUP_STATE check to __disable_irq().
> 
> However, in the face of the problem that is being addressed here I'm
> not really sure that this argument is valid, because if the driver
> calling enable_irq_wake() is sharing the IRQ with another one, the
> other driver may not actually know that the IRQ will be a wakeup one
> and still may not expect interrupts to come to it during system
> suspend/resume.
> 
> Yes, drivers using enable_irq_wake() will likely want IRQF_NO_SUSPEND to
> be set for their irqactions, but that should not imply "no suspend" for
> all irqactions sharing the same desc.  So I guess it may be better to go
> forth and use a global "interrupts suspended" state variable instead of the
> IRQS_SUSPENDED flag for each desc and throw away the IRQD_WAKEUP_STATE
> check from suspend_device_irqs() entirely.

How should that global state work?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-24 21:26 [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Peter Zijlstra
  2014-07-24 22:02 ` Rafael J. Wysocki
  2014-07-24 23:10 ` Rafael J. Wysocki
@ 2014-07-25  9:40 ` Thomas Gleixner
  2014-07-25 12:40   ` Peter Zijlstra
  2014-07-25 12:47   ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
  2 siblings, 2 replies; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-25  9:40 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: rjw, linux-kernel

On Thu, 24 Jul 2014, Peter Zijlstra wrote:
> @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
>  	for_each_irq_desc(irq, desc) {
>  		unsigned long flags;
>  
> +		/*
> +		 * Ideally this would be a global state, but we cannot
> +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> +		 */
>  		raw_spin_lock_irqsave(&desc->lock, flags);
> -		__disable_irq(desc, irq, true);
> +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> +			desc->istate |= IRQS_SUSPENDED;
>  		raw_spin_unlock_irqrestore(&desc->lock, flags);
>  	}
>  
> -	for_each_irq_desc(irq, desc)
> +	for_each_irq_desc(irq, desc) {
>  		if (desc->istate & IRQS_SUSPENDED)
>  			synchronize_irq(irq);
> +	}
>  }

So, instead of disabling the interrupt you just mark it
suspended. Good luck with level triggered interrupt lines then.

Assume the interrupt fires after you marked it suspended. Then the
flow handler will call handle_irq_event() which will do nothing and
return handled. So the flow handler will reenable the interrupt line,
which will cause the interrupt to fire immediately again after the
RETI. Guess how much progress the system is going to make when that
happens.

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25  9:40 ` Thomas Gleixner
@ 2014-07-25 12:40   ` Peter Zijlstra
  2014-07-25 13:25     ` Peter Zijlstra
  2014-07-25 12:47   ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
  1 sibling, 1 reply; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-25 12:40 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: rjw, linux-kernel

On Fri, Jul 25, 2014 at 11:40:48AM +0200, Thomas Gleixner wrote:
> On Thu, 24 Jul 2014, Peter Zijlstra wrote:
> > @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
> >  	for_each_irq_desc(irq, desc) {
> >  		unsigned long flags;
> >  
> > +		/*
> > +		 * Ideally this would be a global state, but we cannot
> > +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> > +		 */
> >  		raw_spin_lock_irqsave(&desc->lock, flags);
> > -		__disable_irq(desc, irq, true);
> > +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> > +			desc->istate |= IRQS_SUSPENDED;
> >  		raw_spin_unlock_irqrestore(&desc->lock, flags);
> >  	}
> >  
> > -	for_each_irq_desc(irq, desc)
> > +	for_each_irq_desc(irq, desc) {
> >  		if (desc->istate & IRQS_SUSPENDED)
> >  			synchronize_irq(irq);
> > +	}
> >  }
> 
> So, instead of disabling the interrupt you just mark it
> suspended. Good luck with level triggered interrupt lines then.
> 
> Assume the interrupt fires after you marked it suspended. Then the
> flow handler will call handle_irq_event() which will do nothing and
> return handled. So the flow handler will reenable the interrupt line,
> which will cause the interrupt to fire immediately again after the
> RETI. Guess how much progress the system is going to make when that
> happens.

Urgh, right. I knew it was too easy. Can we have do_irqhandler() ACK the
interrupt and not call the handler?

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25  9:40 ` Thomas Gleixner
  2014-07-25 12:40   ` Peter Zijlstra
@ 2014-07-25 12:47   ` Rafael J. Wysocki
  2014-07-25 13:22     ` Peter Zijlstra
  1 sibling, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-25 12:47 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Friday, July 25, 2014 11:40:48 AM Thomas Gleixner wrote:
> On Thu, 24 Jul 2014, Peter Zijlstra wrote:
> > @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
> >  	for_each_irq_desc(irq, desc) {
> >  		unsigned long flags;
> >  
> > +		/*
> > +		 * Ideally this would be a global state, but we cannot
> > +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> > +		 */
> >  		raw_spin_lock_irqsave(&desc->lock, flags);
> > -		__disable_irq(desc, irq, true);
> > +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> > +			desc->istate |= IRQS_SUSPENDED;
> >  		raw_spin_unlock_irqrestore(&desc->lock, flags);
> >  	}
> >  
> > -	for_each_irq_desc(irq, desc)
> > +	for_each_irq_desc(irq, desc) {
> >  		if (desc->istate & IRQS_SUSPENDED)
> >  			synchronize_irq(irq);
> > +	}
> >  }
> 
> So, instead of disabling the interrupt you just mark it
> suspended. Good luck with level triggered interrupt lines then.
> 
> Assume the interrupt fires after you marked it suspended. Then the
> flow handler will call handle_irq_event() which will do nothing and
> return handled. So the flow handler will reenable the interrupt line,
> which will cause the interrupt to fire immediately again after the
> RETI. Guess how much progress the system is going to make when that
> happens.

Good point.

So it looks like we really need the "suspend" thing to either disable
the interrupt entirely (in which case all handlers for all actions
will not be invoked after it's been suspended) or leave it enabled
(causing all handlers to be invoked all the time).

I'm not sure if we can do much beyond what's already in your tree
about that, then.

Well, perhaps the patch failing the irq request in case of the
IRQF_NO_SUSPEND mismatch is a bit too drastic.  Instead, we could
just print a warning in __setup_irq() in that case and then check
IRQF_NO_SUSPEND for all actions in __disable_irq().

What about this?

---
 kernel/irq/manage.c |   22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -385,8 +385,17 @@ setup_affinity(unsigned int irq, struct
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
+		struct irqaction *action = desc->action;
+		unsigned int flags;
+
+		if (!action || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
+			return;
+		flags = IRQF_NO_SUSPEND;
+		do {
+			flags &= action->flags;
+			action = action->next;
+		} while (action);
+		if (flags)
 			return;
 		desc->istate |= IRQS_SUSPENDED;
 	}
@@ -1079,7 +1088,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
@@ -1090,6 +1099,13 @@ __setup_irq(unsigned int irq, struct irq
 		    (new->flags & IRQF_PERCPU))
 			goto mismatch;
 
+		if ((old->flags & IRQF_NO_SUSPEND) !=
+		    (new->flags & IRQF_NO_SUSPEND)) {
+			pr_err("irq %d: IRQF_NO_SUSPEND mismatch %08x (%s) vs. %08x (%s)\n",
+			       irq, new->flags, new->name, old->flags, old->name);
+			pr_err("irq %d: IRQF_NO_SUSPEND will be ignored\n");
+		}
+
 		/* add new interrupt at end of irq queue */
 		do {
 			/*


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25  9:27   ` Thomas Gleixner
@ 2014-07-25 12:49     ` Rafael J. Wysocki
  2014-07-25 13:55       ` Thomas Gleixner
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-25 12:49 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Dmitry Torokhov, Linux PM list,
	Dmitry Torokhov

On Friday, July 25, 2014 11:27:25 AM Thomas Gleixner wrote:
> On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > On Thursday, July 24, 2014 11:26:20 PM Peter Zijlstra wrote:
> > > Subject: irq: Rework IRQF_NO_SUSPENDED
> > > From: Peter Zijlstra <peterz@infradead.org>
> > > Date: Thu Jul 24 22:34:50 CEST 2014
> > > 
> > > Typically when devices are suspended they're quiesced such that they
> > > will not generate any further interrupts.
> > > 
> > > However some devices should still generate interrupts, even when
> > > suspended, typically used to wake the machine back up.
> > > 
> > > Such logic should ideally be contained inside each driver, if it can
> > > generate interrupts when suspended, it knows about this and the
> > > interrupt handler can deal with it.
> > > 
> > > Except of course for shared interrupts, when such a wakeup device is
> > > sharing an interrupt line with a device that does not expect
> > > interrupts while suspended things can go funny.
> > > 
> > > This is where IRQF_NO_SUSPEND comes in, the idea is that drivers that
> > > have the capability to wake the machine set IRQF_NO_SUSPEND and their
> > > handler will still be called, even when the IRQ subsystem is formally
> > > suspended. Handlers without IRQF_NO_SUSPEND will not be called.
> > > 
> > > This replaced the prior implementation of IRQF_NO_SUSPEND which had
> > > a number of fatal issues in that it didn't actually work for the
> > > shared case, exactly the case it should be helping.
> > > 
> > > There is still enable_irq_wake()/IRQD_WAKEUP_STATE that tries to serve
> > > a similar purpose but is equially wrecked for shared interrupts,
> > > ideally this would be removed.
> > 
> > Let me comment about this particular thing.
> > 
> > I had a discussion with Dmitry about that and his argument was that
> > enable_irq_wake() should imply IRQF_NO_SUSPEND, because drivers that
> > set up interrupts for system wakeup should expect those interrupts to
> > trigger at any time, including system suspend.  Hence the patch that
> > added the IRQD_WAKEUP_STATE check to __disable_irq().
> > 
> > However, in the face of the problem that is being addressed here I'm
> > not really sure that this argument is valid, because if the driver
> > calling enable_irq_wake() is sharing the IRQ with another one, the
> > other driver may not actually know that the IRQ will be a wakeup one
> > and still may not expect interrupts to come to it during system
> > suspend/resume.
> > 
> > Yes, drivers using enable_irq_wake() will likely want IRQF_NO_SUSPEND to
> > be set for their irqactions, but that should not imply "no suspend" for
> > all irqactions sharing the same desc.  So I guess it may be better to go
> > forth and use a global "interrupts suspended" state variable instead of the
> > IRQS_SUSPENDED flag for each desc and throw away the IRQD_WAKEUP_STATE
> > check from suspend_device_irqs() entirely.
> 
> How should that global state work?

Well, never mind.

If the face of your remark about the level-triggered interrupts that's all moot.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 12:47   ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
@ 2014-07-25 13:22     ` Peter Zijlstra
  0 siblings, 0 replies; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-25 13:22 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Thomas Gleixner, linux-kernel, Linux PM list

On Fri, Jul 25, 2014 at 02:47:25PM +0200, Rafael J. Wysocki wrote:
> So it looks like we really need the "suspend" thing to either disable
> the interrupt entirely (in which case all handlers for all actions
> will not be invoked after it's been suspended) or leave it enabled
> (causing all handlers to be invoked all the time).
> 
> I'm not sure if we can do much beyond what's already in your tree
> about that, then.
> 
> Well, perhaps the patch failing the irq request in case of the
> IRQF_NO_SUSPEND mismatch is a bit too drastic.  Instead, we could
> just print a warning in __setup_irq() in that case and then check
> IRQF_NO_SUSPEND for all actions in __disable_irq().
> 
> What about this?

I suppose, not failing the request_irq allows the machine to boot and
mostly work, any funnies will be when the machine gets suspended.

And disabling suspend might be overkill because it might just all work
anyhow because the user didn't need that one particular wakeup source.

What a terrible mess this.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 12:40   ` Peter Zijlstra
@ 2014-07-25 13:25     ` Peter Zijlstra
  2014-07-25 17:03       ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-25 13:25 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: rjw, linux-kernel

On Fri, Jul 25, 2014 at 02:40:37PM +0200, Peter Zijlstra wrote:
> On Fri, Jul 25, 2014 at 11:40:48AM +0200, Thomas Gleixner wrote:
> > On Thu, 24 Jul 2014, Peter Zijlstra wrote:
> > > @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
> > >  	for_each_irq_desc(irq, desc) {
> > >  		unsigned long flags;
> > >  
> > > +		/*
> > > +		 * Ideally this would be a global state, but we cannot
> > > +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> > > +		 */
> > >  		raw_spin_lock_irqsave(&desc->lock, flags);
> > > -		__disable_irq(desc, irq, true);
> > > +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> > > +			desc->istate |= IRQS_SUSPENDED;
> > >  		raw_spin_unlock_irqrestore(&desc->lock, flags);
> > >  	}
> > >  
> > > -	for_each_irq_desc(irq, desc)
> > > +	for_each_irq_desc(irq, desc) {
> > >  		if (desc->istate & IRQS_SUSPENDED)
> > >  			synchronize_irq(irq);
> > > +	}
> > >  }
> > 
> > So, instead of disabling the interrupt you just mark it
> > suspended. Good luck with level triggered interrupt lines then.
> > 
> > Assume the interrupt fires after you marked it suspended. Then the
> > flow handler will call handle_irq_event() which will do nothing and
> > return handled. So the flow handler will reenable the interrupt line,
> > which will cause the interrupt to fire immediately again after the
> > RETI. Guess how much progress the system is going to make when that
> > happens.
> 
> Urgh, right. I knew it was too easy. Can we have do_irqhandler() ACK the
> interrupt and not call the handler?

OK, so Rafael said there's devices that keep on raising their interrupt
until they get attention. Ideally this won't happen because the device
is suspended etc.. But I'm sure there's some broken piece of hardware
out there that'll make it go boom.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 12:49     ` Rafael J. Wysocki
@ 2014-07-25 13:55       ` Thomas Gleixner
  0 siblings, 0 replies; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-25 13:55 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Dmitry Torokhov, Linux PM list,
	Dmitry Torokhov

On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> If the face of your remark about the level-triggered interrupts that's all moot.

It's not only level. That you notice right away.

But for edge you lose the edge and some devices will wait forever that
their irq is handled. Not fun to debug :)

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 17:03       ` Rafael J. Wysocki
@ 2014-07-25 16:58         ` Peter Zijlstra
  2014-07-25 21:00         ` Thomas Gleixner
  1 sibling, 0 replies; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-25 16:58 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Thomas Gleixner, linux-kernel, Linux PM list

[-- Attachment #1: Type: text/plain, Size: 896 bytes --]

On Fri, Jul 25, 2014 at 07:03:38PM +0200, Rafael J. Wysocki wrote:
> So here's an idea.
> 
> What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> interrupts (after all, that's what a sane driver would do for a
> suspended device I suppose)?
> 
> If the line is really shared and the interrupt is taken care of by
> the other guy sharing the line, we'll be all fine.
> 
> If that is not the case, on the other hand, and something's really
> broken, we'll end up disabling the interrupt and marking it as
> IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> 
> But then, we can re-enable it and clear the IRQS_SPURIOUS_DISABLED
> flag at the resume_device_irqs() time so the driver can use it again.
> And we'll have a trace of the breakage in dmesg, so possibly we can
> go forth and fix the bad guy.
> 
> Would that make sense?

Works for me.. 

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 13:25     ` Peter Zijlstra
@ 2014-07-25 17:03       ` Rafael J. Wysocki
  2014-07-25 16:58         ` Peter Zijlstra
  2014-07-25 21:00         ` Thomas Gleixner
  0 siblings, 2 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-25 17:03 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner; +Cc: linux-kernel, Linux PM list

On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> On Fri, Jul 25, 2014 at 02:40:37PM +0200, Peter Zijlstra wrote:
> > On Fri, Jul 25, 2014 at 11:40:48AM +0200, Thomas Gleixner wrote:
> > > On Thu, 24 Jul 2014, Peter Zijlstra wrote:
> > > > @@ -29,14 +29,20 @@ void suspend_device_irqs(void)
> > > >  	for_each_irq_desc(irq, desc) {
> > > >  		unsigned long flags;
> > > >  
> > > > +		/*
> > > > +		 * Ideally this would be a global state, but we cannot
> > > > +		 * for the trainwreck that is IRQD_WAKEUP_STATE.
> > > > +		 */
> > > >  		raw_spin_lock_irqsave(&desc->lock, flags);
> > > > -		__disable_irq(desc, irq, true);
> > > > +		if (!irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
> > > > +			desc->istate |= IRQS_SUSPENDED;
> > > >  		raw_spin_unlock_irqrestore(&desc->lock, flags);
> > > >  	}
> > > >  
> > > > -	for_each_irq_desc(irq, desc)
> > > > +	for_each_irq_desc(irq, desc) {
> > > >  		if (desc->istate & IRQS_SUSPENDED)
> > > >  			synchronize_irq(irq);
> > > > +	}
> > > >  }
> > > 
> > > So, instead of disabling the interrupt you just mark it
> > > suspended. Good luck with level triggered interrupt lines then.
> > > 
> > > Assume the interrupt fires after you marked it suspended. Then the
> > > flow handler will call handle_irq_event() which will do nothing and
> > > return handled. So the flow handler will reenable the interrupt line,
> > > which will cause the interrupt to fire immediately again after the
> > > RETI. Guess how much progress the system is going to make when that
> > > happens.
> > 
> > Urgh, right. I knew it was too easy. Can we have do_irqhandler() ACK the
> > interrupt and not call the handler?
> 
> OK, so Rafael said there's devices that keep on raising their interrupt
> until they get attention. Ideally this won't happen because the device
> is suspended etc.. But I'm sure there's some broken piece of hardware
> out there that'll make it go boom.

So here's an idea.

What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
interrupts (after all, that's what a sane driver would do for a
suspended device I suppose)?

If the line is really shared and the interrupt is taken care of by
the other guy sharing the line, we'll be all fine.

If that is not the case, on the other hand, and something's really
broken, we'll end up disabling the interrupt and marking it as
IRQS_SPURIOUS_DISABLED (if I understand things correctly).

But then, we can re-enable it and clear the IRQS_SPURIOUS_DISABLED
flag at the resume_device_irqs() time so the driver can use it again.
And we'll have a trace of the breakage in dmesg, so possibly we can
go forth and fix the bad guy.

Would that make sense?

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 17:03       ` Rafael J. Wysocki
  2014-07-25 16:58         ` Peter Zijlstra
@ 2014-07-25 21:00         ` Thomas Gleixner
  2014-07-25 22:25           ` Rafael J. Wysocki
  1 sibling, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-25 21:00 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > OK, so Rafael said there's devices that keep on raising their interrupt
> > until they get attention. Ideally this won't happen because the device
> > is suspended etc.. But I'm sure there's some broken piece of hardware
> > out there that'll make it go boom.
> 
> So here's an idea.
> 
> What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> interrupts (after all, that's what a sane driver would do for a
> suspended device I suppose)?
> 
> If the line is really shared and the interrupt is taken care of by
> the other guy sharing the line, we'll be all fine.
> 
> If that is not the case, on the other hand, and something's really
> broken, we'll end up disabling the interrupt and marking it as
> IRQS_SPURIOUS_DISABLED (if I understand things correctly).

We should not wait 100k unhandled interrupts in that case. We know
already at the first unhandled interrupt that the shit hit the fan.

I'll have a deeper look how we can sanitize the whole wake/no_suspend
logic vs. shared interrupts. Need to look at the usage sites first.

Thanks,

	tglx








^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 21:00         ` Thomas Gleixner
@ 2014-07-25 22:25           ` Rafael J. Wysocki
  2014-07-25 23:07             ` Rafael J. Wysocki
                               ` (2 more replies)
  0 siblings, 3 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-25 22:25 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Friday, July 25, 2014 11:00:12 PM Thomas Gleixner wrote:
> On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > > OK, so Rafael said there's devices that keep on raising their interrupt
> > > until they get attention. Ideally this won't happen because the device
> > > is suspended etc.. But I'm sure there's some broken piece of hardware
> > > out there that'll make it go boom.
> > 
> > So here's an idea.
> > 
> > What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> > interrupts (after all, that's what a sane driver would do for a
> > suspended device I suppose)?
> > 
> > If the line is really shared and the interrupt is taken care of by
> > the other guy sharing the line, we'll be all fine.
> > 
> > If that is not the case, on the other hand, and something's really
> > broken, we'll end up disabling the interrupt and marking it as
> > IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> 
> We should not wait 100k unhandled interrupts in that case. We know
> already at the first unhandled interrupt that the shit hit the fan.

The first one may be a bus glitch or some such.  Also I guess we still need to
allow the legitimate "no suspend" guy to handle his interrupts until it gets
too worse.

Also does it really hurt to rely on the generic mechanism here?  We regard
it as fine at all other times after all.

> I'll have a deeper look how we can sanitize the whole wake/no_suspend
> logic vs. shared interrupts.

Cool, thanks!

> Need to look at the usage sites first.

There will be more of them, like this:

https://patchwork.kernel.org/patch/4618531/

Essentially, all wakeup interrupts will need at least one no_suspend irqaction
going forward.

Below is my take on this (untested) in case it is useful for anything.

It is targeted at the problematic case (that is, a shared interrupt with at least
one irqaction that has IRQF_NO_SUSPEND set and at least one that doesn't) only and
is not supposed to change behavior in the other cases (the do_irqaction thing
shamelessly stolen from the Peter's patch).  It drops the IRQD_WAKEUP_STATE check,
because that has the same problem with shared interrupts as no_suspend.

Rafael


---
 kernel/irq/handle.c |   21 ++++++++++++++++++---
 kernel/irq/manage.c |   30 +++++++++++++++++++++++++-----
 2 files changed, 43 insertions(+), 8 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -385,10 +385,23 @@ setup_affinity(unsigned int irq, struct
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
+		struct irqaction *action = desc->action;
+		unsigned int no_suspend, flags;
+
+		if (!action)
+			return;
+		no_suspend = IRQF_NO_SUSPEND;
+		flags = 0;
+		do {
+			no_suspend &= action->flags;
+			flags |= action->flags;
+			action = action->next;
+		} while (action);
+		if (no_suspend)
 			return;
 		desc->istate |= IRQS_SUSPENDED;
+		if (flags & IRQF_NO_SUSPEND)
+			return;
 	}
 
 	if (!desc->depth++)
@@ -446,7 +459,15 @@ EXPORT_SYMBOL(disable_irq);
 void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
 {
 	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
+		if (desc->istate & IRQS_SUSPENDED) {
+			desc->istate &= ~IRQS_SUSPENDED;
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("WARNING! Unhandled events during suspend for IRQ %d\n", irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else if (desc->depth == 0) {
+				return;
+			}
+		} else {
 			if (!desc->action)
 				return;
 			if (!(desc->action->flags & IRQF_FORCE_RESUME))
@@ -454,7 +475,6 @@ void __enable_irq(struct irq_desc *desc,
 			/* Pretend that it got disabled ! */
 			desc->depth++;
 		}
-		desc->istate &= ~IRQS_SUSPENDED;
 	}
 
 	switch (desc->depth) {
@@ -1079,7 +1099,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -131,6 +131,23 @@ void __irq_wake_thread(struct irq_desc *
 }
 
 irqreturn_t
+do_irqaction(struct irq_desc *desc, struct irqaction *action,
+	     unsigned int irq, void *dev_id)
+{
+	irqreturn_t ret;
+
+	if (unlikely((desc->istate & IRQS_SUSPENDED) &&
+		     !(action->flags & IRQF_NO_SUSPEND)))
+		return IRQ_NONE;
+
+	trace_irq_handler_entry(irq, action);
+	ret = action->handler(irq, dev_id);
+	trace_irq_handler_exit(irq, action, ret);
+
+	return ret;
+}
+
+irqreturn_t
 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
 {
 	irqreturn_t retval = IRQ_NONE;
@@ -139,9 +156,7 @@ handle_irq_event_percpu(struct irq_desc
 	do {
 		irqreturn_t res;
 
-		trace_irq_handler_entry(irq, action);
-		res = action->handler(irq, action->dev_id);
-		trace_irq_handler_exit(irq, action, res);
+		res = do_irqaction(desc, action, irq, action->dev_id);
 
 		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
 			      irq, action->handler))


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 22:25           ` Rafael J. Wysocki
@ 2014-07-25 23:07             ` Rafael J. Wysocki
  2014-07-26 11:49             ` Rafael J. Wysocki
  2014-07-27 15:53             ` Rafael J. Wysocki
  2 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-25 23:07 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Saturday, July 26, 2014 12:25:29 AM Rafael J. Wysocki wrote:
> On Friday, July 25, 2014 11:00:12 PM Thomas Gleixner wrote:
> > On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > > On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > > > OK, so Rafael said there's devices that keep on raising their interrupt
> > > > until they get attention. Ideally this won't happen because the device
> > > > is suspended etc.. But I'm sure there's some broken piece of hardware
> > > > out there that'll make it go boom.
> > > 
> > > So here's an idea.
> > > 
> > > What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> > > interrupts (after all, that's what a sane driver would do for a
> > > suspended device I suppose)?
> > > 
> > > If the line is really shared and the interrupt is taken care of by
> > > the other guy sharing the line, we'll be all fine.
> > > 
> > > If that is not the case, on the other hand, and something's really
> > > broken, we'll end up disabling the interrupt and marking it as
> > > IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> > 
> > We should not wait 100k unhandled interrupts in that case. We know
> > already at the first unhandled interrupt that the shit hit the fan.
> 
> The first one may be a bus glitch or some such.  Also I guess we still need to
> allow the legitimate "no suspend" guy to handle his interrupts until it gets
> too worse.

s/worse/bad/ (ah, grammar).

> Also does it really hurt to rely on the generic mechanism here?  We regard
> it as fine at all other times after all.
> 
> > I'll have a deeper look how we can sanitize the whole wake/no_suspend
> > logic vs. shared interrupts.
> 
> Cool, thanks!
> 
> > Need to look at the usage sites first.
> 
> There will be more of them, like this:
> 
> https://patchwork.kernel.org/patch/4618531/
> 
> Essentially, all wakeup interrupts will need at least one no_suspend irqaction
> going forward.
> 
> Below is my take on this (untested) in case it is useful for anything.
> 
> It is targeted at the problematic case (that is, a shared interrupt with at least
> one irqaction that has IRQF_NO_SUSPEND set and at least one that doesn't) only and
> is not supposed to change behavior in the other cases (the do_irqaction thing
> shamelessly stolen from the Peter's patch).  It drops the IRQD_WAKEUP_STATE check,
> because that has the same problem with shared interrupts as no_suspend.

Self-correction ->

> ---
>  kernel/irq/handle.c |   21 ++++++++++++++++++---
>  kernel/irq/manage.c |   30 +++++++++++++++++++++++++-----
>  2 files changed, 43 insertions(+), 8 deletions(-)
> 
> Index: linux-pm/kernel/irq/manage.c
> ===================================================================
> --- linux-pm.orig/kernel/irq/manage.c
> +++ linux-pm/kernel/irq/manage.c

[cut]

> @@ -446,7 +459,15 @@ EXPORT_SYMBOL(disable_irq);
>  void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
>  {
>  	if (resume) {
> -		if (!(desc->istate & IRQS_SUSPENDED)) {
> +		if (desc->istate & IRQS_SUSPENDED) {
> +			desc->istate &= ~IRQS_SUSPENDED;
> +			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
> +				pr_err("WARNING! Unhandled events during suspend for IRQ %d\n", irq);

-> This should be printed for desc->irqs_unhandled > 0 I suppose.  That will cover
the cases when we don't have to disable it too.  The value of desc->irqs_unhandled
can be included into the warning too.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 22:25           ` Rafael J. Wysocki
  2014-07-25 23:07             ` Rafael J. Wysocki
@ 2014-07-26 11:49             ` Rafael J. Wysocki
  2014-07-26 11:53               ` Rafael J. Wysocki
  2014-07-28  6:49               ` Peter Zijlstra
  2014-07-27 15:53             ` Rafael J. Wysocki
  2 siblings, 2 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-26 11:49 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Saturday, July 26, 2014 12:25:29 AM Rafael J. Wysocki wrote:
> On Friday, July 25, 2014 11:00:12 PM Thomas Gleixner wrote:
> > On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > > On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > > > OK, so Rafael said there's devices that keep on raising their interrupt
> > > > until they get attention. Ideally this won't happen because the device
> > > > is suspended etc.. But I'm sure there's some broken piece of hardware
> > > > out there that'll make it go boom.
> > > 
> > > So here's an idea.
> > > 
> > > What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> > > interrupts (after all, that's what a sane driver would do for a
> > > suspended device I suppose)?
> > > 
> > > If the line is really shared and the interrupt is taken care of by
> > > the other guy sharing the line, we'll be all fine.
> > > 
> > > If that is not the case, on the other hand, and something's really
> > > broken, we'll end up disabling the interrupt and marking it as
> > > IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> > 
> > We should not wait 100k unhandled interrupts in that case. We know
> > already at the first unhandled interrupt that the shit hit the fan.
> 
> The first one may be a bus glitch or some such.  Also I guess we still need to
> allow the legitimate "no suspend" guy to handle his interrupts until it gets
> too worse.
> 
> Also does it really hurt to rely on the generic mechanism here?  We regard
> it as fine at all other times after all.
> 
> > I'll have a deeper look how we can sanitize the whole wake/no_suspend
> > logic vs. shared interrupts.

One more idea, on top of the prototype patch that I posted
(https://patchwork.kernel.org/patch/4625921/).

The problem with enable_irq_wake() is that it only takes one argument, so
if that's a shared interrupt, we can't really say which irqaction is supposed
to handle wakeup interrupts should they occur.

To address this we can introduce enable_device_irq_wake() that will take
an additional dev_id argument (that must be the one passed to request_irq() or
the operation will fail) that can be used to identify the irqaction for
handling the wakeup interrupts.  It can work by setting IRQF_NO_SUSPEND
for the irqaction in question and doing the rest along the lines of
irq_set_irq_wake(irq, 1).  disable_device_irq_wake() will then clear
IRQF_NO_SUSPEND (it also has to be passed the dev_id argument).

If we have that, the guys who need to set up device interrupts (ie. interrupts
normally used for signaling input events etc) for system wakeup will need to
use enable_device_irq_wake() and that should just work.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-26 11:49             ` Rafael J. Wysocki
@ 2014-07-26 11:53               ` Rafael J. Wysocki
  2014-07-28  6:49               ` Peter Zijlstra
  1 sibling, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-26 11:53 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Saturday, July 26, 2014 01:49:17 PM Rafael J. Wysocki wrote:
> On Saturday, July 26, 2014 12:25:29 AM Rafael J. Wysocki wrote:
> > On Friday, July 25, 2014 11:00:12 PM Thomas Gleixner wrote:
> > > On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > > > On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > > > > OK, so Rafael said there's devices that keep on raising their interrupt
> > > > > until they get attention. Ideally this won't happen because the device
> > > > > is suspended etc.. But I'm sure there's some broken piece of hardware
> > > > > out there that'll make it go boom.
> > > > 
> > > > So here's an idea.
> > > > 
> > > > What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> > > > interrupts (after all, that's what a sane driver would do for a
> > > > suspended device I suppose)?
> > > > 
> > > > If the line is really shared and the interrupt is taken care of by
> > > > the other guy sharing the line, we'll be all fine.
> > > > 
> > > > If that is not the case, on the other hand, and something's really
> > > > broken, we'll end up disabling the interrupt and marking it as
> > > > IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> > > 
> > > We should not wait 100k unhandled interrupts in that case. We know
> > > already at the first unhandled interrupt that the shit hit the fan.
> > 
> > The first one may be a bus glitch or some such.  Also I guess we still need to
> > allow the legitimate "no suspend" guy to handle his interrupts until it gets
> > too worse.
> > 
> > Also does it really hurt to rely on the generic mechanism here?  We regard
> > it as fine at all other times after all.
> > 
> > > I'll have a deeper look how we can sanitize the whole wake/no_suspend
> > > logic vs. shared interrupts.
> 
> One more idea, on top of the prototype patch that I posted
> (https://patchwork.kernel.org/patch/4625921/).
> 
> The problem with enable_irq_wake() is that it only takes one argument, so
> if that's a shared interrupt, we can't really say which irqaction is supposed
> to handle wakeup interrupts should they occur.
> 
> To address this we can introduce enable_device_irq_wake() that will take
> an additional dev_id argument (that must be the one passed to request_irq() or
> the operation will fail) that can be used to identify the irqaction for
> handling the wakeup interrupts.  It can work by setting IRQF_NO_SUSPEND
> for the irqaction in question and doing the rest along the lines of
> irq_set_irq_wake(irq, 1).  disable_device_irq_wake() will then clear
> IRQF_NO_SUSPEND (it also has to be passed the dev_id argument).
> 
> If we have that, the guys who need to set up device interrupts (ie. interrupts
> normally used for signaling input events etc) for system wakeup will need to
> use enable_device_irq_wake() and that should just work.

That could be used to address the PCIe PME interrupt wakeup and gpio-keys driver
wakeup in the same way at least.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25 22:25           ` Rafael J. Wysocki
  2014-07-25 23:07             ` Rafael J. Wysocki
  2014-07-26 11:49             ` Rafael J. Wysocki
@ 2014-07-27 15:53             ` Rafael J. Wysocki
  2014-07-27 22:00               ` [PATCH, v2] Rafael J. Wysocki
  2 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-27 15:53 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Saturday, July 26, 2014 12:25:29 AM Rafael J. Wysocki wrote:
> On Friday, July 25, 2014 11:00:12 PM Thomas Gleixner wrote:
> > On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > > On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > > > OK, so Rafael said there's devices that keep on raising their interrupt
> > > > until they get attention. Ideally this won't happen because the device
> > > > is suspended etc.. But I'm sure there's some broken piece of hardware
> > > > out there that'll make it go boom.
> > > 
> > > So here's an idea.
> > > 
> > > What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> > > interrupts (after all, that's what a sane driver would do for a
> > > suspended device I suppose)?
> > > 
> > > If the line is really shared and the interrupt is taken care of by
> > > the other guy sharing the line, we'll be all fine.
> > > 
> > > If that is not the case, on the other hand, and something's really
> > > broken, we'll end up disabling the interrupt and marking it as
> > > IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> > 
> > We should not wait 100k unhandled interrupts in that case. We know
> > already at the first unhandled interrupt that the shit hit the fan.
> 
> The first one may be a bus glitch or some such.  Also I guess we still need to
> allow the legitimate "no suspend" guy to handle his interrupts until it gets
> too worse.
> 
> Also does it really hurt to rely on the generic mechanism here?  We regard
> it as fine at all other times after all.

Having considered this for some more time, I think that we need to address
the IRQF_NO_SUSPEND problem with shared interrupts regardless of what's
decided about the device wakeup and suspending interrupts, because (1) it's
already there (and the Peter's patch to add IRQF_NO_SUSPEND to "mismatch"
flags may break working systems that aren't even buggy, sorry for noticing
that so late) and (2) we'll need something like IRQF_NO_SUSPEND idependently
of wakeup anyway, for timers and things like the ACPI SCI.

Below is my attempt to do that based on the prototype I sent previously.
It contains a mechanism to disable IRQs generating spurious interrupts
during system suspend/resume faster, but otherwise works along the same
lines.

Rafael


---
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Subject: irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, modify the handling of IRQF_NO_SUSPEND by
suspend_device_irqs() and resume_device_irqs() and make
handle_irq_event_percpu() and note_interrupt() handle IRQS_SUSPENDED.

First of all, make suspend_device_irqs() check IRQF_NO_SUSPEND for
all irqactions sharing the same irq_desc and avoid disabling the IRQ
if at least one of them has IRQF_NO_SUSPEND unset.  If that flag
is set for at least one irqaction in the given irq_desc, however,
suspend_device_irqs() will stil mark the IRQ as IRQS_SUSPENDED.

Second, make handle_irq_event_percpu() return IRQ_NONE without
invoking the interrupt handler for irqactions with IRQF_NO_SUSPEND
unset if their irq_desc is marked as IRQS_SUSPENDED.

Next, modify note_interrupt() to disable misbehaving IRQs faster if
spuroius interrupts occur during system suspend/resume (that is, for
an irq_desc with IRQS_SUSPENDED set).

Finally, make resume_device_irqs() clear the IRQS_SPURIOUS_DISABLED
flag for IRQs that were emergency disabled after suspend_device_irqs()
had returned and log warning messages for them.

This should address the problematic case (when IRQF_NO_SUSPEND is set
for at least one and unset for at least one irqaction sharing one
irq_desc) and that case only without changing behavior in any other
cases.

It also effectively reverts a previous change to refuse interrupt
requests with IRQF_NO_SUSPEND settings that do not match the
existing ones (as it is not needed any more) and a previous change
that added a IRQD_WAKEUP_STATE check to __disable_irqs() (as that
mechanism has the same problem with shared interrupts as described
above).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/irq/handle.c   |   21 ++++++++++++++++++---
 kernel/irq/manage.c   |   31 ++++++++++++++++++++++++++-----
 kernel/irq/spurious.c |   27 ++++++++++++++++++---------
 3 files changed, 62 insertions(+), 17 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -385,10 +385,23 @@ setup_affinity(unsigned int irq, struct
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
+		struct irqaction *action = desc->action;
+		unsigned int no_suspend, flags;
+
+		if (!action || (desc->istate & IRQS_SPURIOUS_DISABLED))
+			return;
+		no_suspend = IRQF_NO_SUSPEND;
+		flags = 0;
+		do {
+			no_suspend &= action->flags;
+			flags |= action->flags;
+			action = action->next;
+		} while (action);
+		if (no_suspend)
 			return;
 		desc->istate |= IRQS_SUSPENDED;
+		if (flags & IRQF_NO_SUSPEND)
+			return;
 	}
 
 	if (!desc->depth++)
@@ -446,7 +459,16 @@ EXPORT_SYMBOL(disable_irq);
 void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
 {
 	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
+		if (desc->istate & IRQS_SUSPENDED) {
+			desc->istate &= ~IRQS_SUSPENDED;
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("Re-enabling emergency disabled IRQ %d\n",
+				       irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else if (desc->depth == 0) {
+				return;
+			}
+		} else {
 			if (!desc->action)
 				return;
 			if (!(desc->action->flags & IRQF_FORCE_RESUME))
@@ -454,7 +476,6 @@ void __enable_irq(struct irq_desc *desc,
 			/* Pretend that it got disabled ! */
 			desc->depth++;
 		}
-		desc->istate &= ~IRQS_SUSPENDED;
 	}
 
 	switch (desc->depth) {
@@ -1079,7 +1100,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -131,6 +131,23 @@ void __irq_wake_thread(struct irq_desc *
 }
 
 irqreturn_t
+do_irqaction(struct irq_desc *desc, struct irqaction *action,
+	     unsigned int irq, void *dev_id)
+{
+	irqreturn_t ret;
+
+	if (unlikely((desc->istate & IRQS_SUSPENDED) &&
+		     !(action->flags & IRQF_NO_SUSPEND)))
+		return IRQ_NONE;
+
+	trace_irq_handler_entry(irq, action);
+	ret = action->handler(irq, dev_id);
+	trace_irq_handler_exit(irq, action, ret);
+
+	return ret;
+}
+
+irqreturn_t
 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
 {
 	irqreturn_t retval = IRQ_NONE;
@@ -139,9 +156,7 @@ handle_irq_event_percpu(struct irq_desc
 	do {
 		irqreturn_t res;
 
-		trace_irq_handler_entry(irq, action);
-		res = action->handler(irq, action->dev_id);
-		trace_irq_handler_exit(irq, action, res);
+		res = do_irqaction(desc, action, irq, action->dev_id);
 
 		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
 			      irq, action->handler))
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -275,6 +275,8 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
+	int misrouted;
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
@@ -384,6 +386,9 @@ void note_interrupt(unsigned int irq, st
 		}
 	}
 
+	misrouted = unlikely(try_misrouted_irq(irq, desc, action_ret)) ?
+			misrouted_irq(irq) : 0;
+
 	if (unlikely(action_ret == IRQ_NONE)) {
 		/*
 		 * If we are seeing only the odd spurious IRQ caused by
@@ -391,19 +396,23 @@ void note_interrupt(unsigned int irq, st
 		 * otherwise the counter becomes a doomsday timer for otherwise
 		 * working systems
 		 */
-		if (time_after(jiffies, desc->last_unhandled + HZ/10))
-			desc->irqs_unhandled = 1;
-		else
+		if (time_after(jiffies, desc->last_unhandled + HZ/10)) {
+			desc->irqs_unhandled = 1 - misrouted;
+		} else if (!misrouted) {
 			desc->irqs_unhandled++;
+			if (unlikely(desc->istate & IRQS_SUSPENDED)) {
+				/*
+				 * That shouldn't happen.  It means IRQs from
+				 * a device that is supposed to be suspended at
+				 * this point.  Decay faster.
+				 */
+				desc->irqs_unhandled += 999;
+				desc->irq_count += 999;
+			}
+		}
 		desc->last_unhandled = jiffies;
 	}
 
-	if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
-		int ok = misrouted_irq(irq);
-		if (action_ret == IRQ_NONE)
-			desc->irqs_unhandled -= ok;
-	}
-
 	desc->irq_count++;
 	if (likely(desc->irq_count < 100000))
 		return;


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH, v2]
  2014-07-27 15:53             ` Rafael J. Wysocki
@ 2014-07-27 22:00               ` Rafael J. Wysocki
  2014-07-28 12:11                 ` Thomas Gleixner
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-27 22:00 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Sunday, July 27, 2014 05:53:07 PM Rafael J. Wysocki wrote:
> On Saturday, July 26, 2014 12:25:29 AM Rafael J. Wysocki wrote:
> > On Friday, July 25, 2014 11:00:12 PM Thomas Gleixner wrote:
> > > On Fri, 25 Jul 2014, Rafael J. Wysocki wrote:
> > > > On Friday, July 25, 2014 03:25:41 PM Peter Zijlstra wrote:
> > > > > OK, so Rafael said there's devices that keep on raising their interrupt
> > > > > until they get attention. Ideally this won't happen because the device
> > > > > is suspended etc.. But I'm sure there's some broken piece of hardware
> > > > > out there that'll make it go boom.
> > > > 
> > > > So here's an idea.
> > > > 
> > > > What about returning IRQ_NONE rather than IRQ_HANDLED for "suspended"
> > > > interrupts (after all, that's what a sane driver would do for a
> > > > suspended device I suppose)?
> > > > 
> > > > If the line is really shared and the interrupt is taken care of by
> > > > the other guy sharing the line, we'll be all fine.
> > > > 
> > > > If that is not the case, on the other hand, and something's really
> > > > broken, we'll end up disabling the interrupt and marking it as
> > > > IRQS_SPURIOUS_DISABLED (if I understand things correctly).
> > > 
> > > We should not wait 100k unhandled interrupts in that case. We know
> > > already at the first unhandled interrupt that the shit hit the fan.
> > 
> > The first one may be a bus glitch or some such.  Also I guess we still need to
> > allow the legitimate "no suspend" guy to handle his interrupts until it gets
> > too worse.
> > 
> > Also does it really hurt to rely on the generic mechanism here?  We regard
> > it as fine at all other times after all.
> 
> Having considered this for some more time, I think that we need to address
> the IRQF_NO_SUSPEND problem with shared interrupts regardless of what's
> decided about the device wakeup and suspending interrupts, because (1) it's
> already there (and the Peter's patch to add IRQF_NO_SUSPEND to "mismatch"
> flags may break working systems that aren't even buggy, sorry for noticing
> that so late) and (2) we'll need something like IRQF_NO_SUSPEND idependently
> of wakeup anyway, for timers and things like the ACPI SCI.
> 
> Below is my attempt to do that based on the prototype I sent previously.
> It contains a mechanism to disable IRQs generating spurious interrupts
> during system suspend/resume faster, but otherwise works along the same
> lines.

I realized that it had a problem where resume_device_irqs() theoretically
could re-enable an IRQ that was not disabled by suspend_device_irqs().

Unfortunately, to plug that hole I needed a new IRQS_ flag that would be
set for shared suspended interrupts that required special handling.

I also fixed up the changelog as it had one thing described upside down.

Updated patch below (I tested it on my MSI Wind in which PCI PME IRQs are
shared with other devices).

Rafael


---
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Subject: irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, modify the handling of IRQF_NO_SUSPEND by
suspend_device_irqs() and resume_device_irqs() and make
handle_irq_event_percpu() and note_interrupt() handle suspended
interrupts.

First of all, make suspend_device_irqs() check IRQF_NO_SUSPEND for
all irqactions sharing the same irq_desc and avoid disabling the IRQ
if at least one of them has IRQF_NO_SUSPEND set.  If that flag is
also unset for at least one irqaction in the given irq_desc, however,
suspend_device_irqs() will mark the IRQ as IRQS_SHARED_SUSPENDED
(new flag).

Second, make handle_irq_event_percpu() return IRQ_NONE without
invoking the interrupt handler for irqactions with IRQF_NO_SUSPEND
unset if their irq_desc is marked as IRQS_SHARED_SUSPENDED.

Next, modify note_interrupt() to disable misbehaving IRQs faster if
spuroius interrupts occur during system suspend/resume (that is, for
an irq_desc with IRQS_SHARED_SUSPENDED set).

Finally, make resume_device_irqs() clear the IRQS_SPURIOUS_DISABLED
flag for IRQS_SHARED_SUSPENDED IRQs that were emergency disabled
after suspend_device_irqs() had returned and log warning messages for
them.

This should address the problematic case (when IRQF_NO_SUSPEND is set
for at least one and unset for at least one irqaction sharing one
irq_desc) and that case only without changing behavior in any other
cases.

It also effectively reverts a previous change to refuse interrupt
requests with IRQF_NO_SUSPEND settings that do not match the
existing ones (as it is not needed any more) and a previous change
that added a IRQD_WAKEUP_STATE check to __disable_irqs() (as that
mechanism has the same problem with shared interrupts as described
above).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/irq/handle.c    |   21 ++++++++++++++++++---
 kernel/irq/internals.h |    2 ++
 kernel/irq/manage.c    |   35 +++++++++++++++++++++++++++++++----
 kernel/irq/spurious.c  |   27 ++++++++++++++++++---------
 4 files changed, 69 insertions(+), 16 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -385,9 +385,27 @@ setup_affinity(unsigned int irq, struct
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
+		struct irqaction *action = desc->action;
+
+		if (!action)
 			return;
+		if (!(desc->istate & IRQS_SPURIOUS_DISABLED)) {
+			unsigned int no_suspend, flags;
+
+			no_suspend = IRQF_NO_SUSPEND;
+			flags = 0;
+			do {
+				no_suspend &= action->flags;
+				flags |= action->flags;
+				action = action->next;
+			} while (action);
+			if (no_suspend)
+				return;
+			if (flags & IRQF_NO_SUSPEND) {
+				desc->istate |= IRQS_SHARED_SUSPENDED;
+				return;
+			}
+		}
 		desc->istate |= IRQS_SUSPENDED;
 	}
 
@@ -446,7 +464,16 @@ EXPORT_SYMBOL(disable_irq);
 void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
 {
 	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
+		if (desc->istate & IRQS_SHARED_SUSPENDED) {
+			desc->istate &= ~IRQS_SHARED_SUSPENDED;
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("Re-enabling emergency disabled IRQ %d\n",
+				       irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else {
+				return;
+			}
+		} else if (!(desc->istate & IRQS_SUSPENDED)) {
 			if (!desc->action)
 				return;
 			if (!(desc->action->flags & IRQF_FORCE_RESUME))
@@ -1079,7 +1106,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -131,6 +131,23 @@ void __irq_wake_thread(struct irq_desc *
 }
 
 irqreturn_t
+do_irqaction(struct irq_desc *desc, struct irqaction *action,
+	     unsigned int irq, void *dev_id)
+{
+	irqreturn_t ret;
+
+	if (unlikely((desc->istate & IRQS_SHARED_SUSPENDED) &&
+		     !(action->flags & IRQF_NO_SUSPEND)))
+		return IRQ_NONE;
+
+	trace_irq_handler_entry(irq, action);
+	ret = action->handler(irq, dev_id);
+	trace_irq_handler_exit(irq, action, ret);
+
+	return ret;
+}
+
+irqreturn_t
 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
 {
 	irqreturn_t retval = IRQ_NONE;
@@ -139,9 +156,7 @@ handle_irq_event_percpu(struct irq_desc
 	do {
 		irqreturn_t res;
 
-		trace_irq_handler_entry(irq, action);
-		res = action->handler(irq, action->dev_id);
-		trace_irq_handler_exit(irq, action, res);
+		res = do_irqaction(desc, action, irq, action->dev_id);
 
 		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
 			      irq, action->handler))
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -275,6 +275,8 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
+	int misrouted;
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
@@ -384,6 +386,9 @@ void note_interrupt(unsigned int irq, st
 		}
 	}
 
+	misrouted = unlikely(try_misrouted_irq(irq, desc, action_ret)) ?
+			misrouted_irq(irq) : 0;
+
 	if (unlikely(action_ret == IRQ_NONE)) {
 		/*
 		 * If we are seeing only the odd spurious IRQ caused by
@@ -391,19 +396,23 @@ void note_interrupt(unsigned int irq, st
 		 * otherwise the counter becomes a doomsday timer for otherwise
 		 * working systems
 		 */
-		if (time_after(jiffies, desc->last_unhandled + HZ/10))
-			desc->irqs_unhandled = 1;
-		else
+		if (time_after(jiffies, desc->last_unhandled + HZ/10)) {
+			desc->irqs_unhandled = 1 - misrouted;
+		} else if (!misrouted) {
 			desc->irqs_unhandled++;
+			if (unlikely(desc->istate & IRQS_SHARED_SUSPENDED)) {
+				/*
+				 * That shouldn't happen.  It means IRQs from
+				 * a device that is supposed to be suspended at
+				 * this point.  Decay faster.
+				 */
+				desc->irqs_unhandled += 999;
+				desc->irq_count += 999;
+			}
+		}
 		desc->last_unhandled = jiffies;
 	}
 
-	if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
-		int ok = misrouted_irq(irq);
-		if (action_ret == IRQ_NONE)
-			desc->irqs_unhandled -= ok;
-	}
-
 	desc->irq_count++;
 	if (likely(desc->irq_count < 100000))
 		return;
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -43,6 +43,7 @@ enum {
  * IRQS_REPLAY			- irq is replayed
  * IRQS_WAITING			- irq is waiting
  * IRQS_PENDING			- irq is pending and replayed later
+ * IRQS_SHARED_SUSPENDED	- irq is shared, some handlers are suspended
  * IRQS_SUSPENDED		- irq is suspended
  */
 enum {
@@ -53,6 +54,7 @@ enum {
 	IRQS_REPLAY		= 0x00000040,
 	IRQS_WAITING		= 0x00000080,
 	IRQS_PENDING		= 0x00000200,
+	IRQS_SHARED_SUSPENDED	= 0x00000400,
 	IRQS_SUSPENDED		= 0x00000800,
 };
 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-26 11:49             ` Rafael J. Wysocki
  2014-07-26 11:53               ` Rafael J. Wysocki
@ 2014-07-28  6:49               ` Peter Zijlstra
  2014-07-28 12:33                 ` Thomas Gleixner
  2014-07-28 21:27                 ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
  1 sibling, 2 replies; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-28  6:49 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Thomas Gleixner, linux-kernel, Linux PM list

On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> One more idea, on top of the prototype patch that I posted
> (https://patchwork.kernel.org/patch/4625921/).
> 
> The problem with enable_irq_wake() is that it only takes one argument, so
> if that's a shared interrupt, we can't really say which irqaction is supposed
> to handle wakeup interrupts should they occur.

Right.

> To address this we can introduce enable_device_irq_wake() that will take
> an additional dev_id argument (that must be the one passed to request_irq() or
> the operation will fail) that can be used to identify the irqaction for
> handling the wakeup interrupts.  It can work by setting IRQF_NO_SUSPEND
> for the irqaction in question and doing the rest along the lines of
> irq_set_irq_wake(irq, 1).  disable_device_irq_wake() will then clear
> IRQF_NO_SUSPEND (it also has to be passed the dev_id argument).
> 
> If we have that, the guys who need to set up device interrupts (ie. interrupts
> normally used for signaling input events etc) for system wakeup will need to
> use enable_device_irq_wake() and that should just work.

So in the patch I posted I described why NO_SUSPEND is useful, I still
have to hear a solid reason for why we also need enable_irq_wake()? What
does it do that cannot be achieved with NO_SUSPEND?

I realize its dynamic, but that's crap, at device registration time it
pretty much already knows if its a wakeup source or not, right?

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH, v2]
  2014-07-27 22:00               ` [PATCH, v2] Rafael J. Wysocki
@ 2014-07-28 12:11                 ` Thomas Gleixner
  2014-07-28 21:17                   ` [PATCH, v3] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts (was: Re: [PATCH, v2]) Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-28 12:11 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Mon, 28 Jul 2014, Rafael J. Wysocki wrote:
> On Sunday, July 27, 2014 05:53:07 PM Rafael J. Wysocki wrote:
>  irqreturn_t
> +do_irqaction(struct irq_desc *desc, struct irqaction *action,
> +	     unsigned int irq, void *dev_id)
> +{
> +	irqreturn_t ret;
> +
> +	if (unlikely((desc->istate & IRQS_SHARED_SUSPENDED) &&
> +		     !(action->flags & IRQF_NO_SUSPEND)))
> +		return IRQ_NONE;

I really want to avoid that conditional. We burden it on every
interrupt just to deal with this nonsense.

A simple solution for this is to add irq_desc::action_suspended and
move the shared actions which are not flagged NO_SUSPEND over and
bring them back on resume.

> Index: linux-pm/kernel/irq/spurious.c
> ===================================================================
> --- linux-pm.orig/kernel/irq/spurious.c
> +++ linux-pm/kernel/irq/spurious.c
> @@ -275,6 +275,8 @@ try_misrouted_irq(unsigned int irq, stru
>  void note_interrupt(unsigned int irq, struct irq_desc *desc,
>  		    irqreturn_t action_ret)
>  {
> +	int misrouted;
> +
>  	if (desc->istate & IRQS_POLL_INPROGRESS ||
>  	    irq_settings_is_polled(desc))
>  		return;
> @@ -384,6 +386,9 @@ void note_interrupt(unsigned int irq, st
>  		}
>  	}
>  
> +	misrouted = unlikely(try_misrouted_irq(irq, desc, action_ret)) ?
> +			misrouted_irq(irq) : 0;

If the system is suspended, why would we try misrouted irqs at all?
All non wakeup irqs are disabled, so we just spend a gazillion of
cycles for nothing.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-28  6:49               ` Peter Zijlstra
@ 2014-07-28 12:33                 ` Thomas Gleixner
  2014-07-28 13:04                   ` Peter Zijlstra
  2014-07-28 21:53                   ` Rafael J. Wysocki
  2014-07-28 21:27                 ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
  1 sibling, 2 replies; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-28 12:33 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Rafael J. Wysocki, linux-kernel, Linux PM list

On Mon, 28 Jul 2014, Peter Zijlstra wrote:
> On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> > One more idea, on top of the prototype patch that I posted
> > (https://patchwork.kernel.org/patch/4625921/).
> > 
> > The problem with enable_irq_wake() is that it only takes one argument, so
> > if that's a shared interrupt, we can't really say which irqaction is supposed
> > to handle wakeup interrupts should they occur.
> 
> Right.

Historically no hardware manufacturer was so stupid to have wakeup
sources on shared interrupts. Just because x86 is brain damaged is no
reason to claim that stuff that worked fine for years is crap.

We never needed this until now, so we simply go and do what we've done
always in such situations. We look for a solution which copes with the
new hardware brain farts and keeps the existing stuff working. It's
that simple.

> > To address this we can introduce enable_device_irq_wake() that will take
> > an additional dev_id argument (that must be the one passed to request_irq() or
> > the operation will fail) that can be used to identify the irqaction for
> > handling the wakeup interrupts.  It can work by setting IRQF_NO_SUSPEND
> > for the irqaction in question and doing the rest along the lines of
> > irq_set_irq_wake(irq, 1).  disable_device_irq_wake() will then clear
> > IRQF_NO_SUSPEND (it also has to be passed the dev_id argument).
> > 
> > If we have that, the guys who need to set up device interrupts (ie. interrupts
> > normally used for signaling input events etc) for system wakeup will need to
> > use enable_device_irq_wake() and that should just work.
> 
> So in the patch I posted I described why NO_SUSPEND is useful, I still
> have to hear a solid reason for why we also need enable_irq_wake()? What
> does it do that cannot be achieved with NO_SUSPEND?
> 
> I realize its dynamic, but that's crap, at device registration time it
> pretty much already knows if its a wakeup source or not, right?

It does, but that doesn't make it crap. There are situations where you
want certain wakeup sources enabled or disabled and you can't do that
with IRQF_NO_SUSPEND. And to support this, you need the wake_depth
counter as well. And that's what we always had.

I'd rather say, that the "I can wakeup the machine and will do so no
matter what flag" is more stupid than the wake mechanism.

It was added to support XEN wreckage and I wish we've never done that
in the first place.

So we are not going to make everything a single stupid flag and limit
the usability of existing code. We rather go and try to remove the
stupid flag before it becomes more wide spread.

And we cannot treat the wakeup thing the same way as the
IRQF_NO_SUSPEND flag, because there is hardware where the irq line
must be disabled at the normal (non suspend) interrupt controller, and
the wake mechanism tells the PM microcontroller to monitor the
interrupt line and kick the machine back to life.

So we need to very carefully look at all the existing cases instead of
yelling crap and inflicting x86 specific horror on everyone. I said on
friday, that I need to look at ALL use cases first and I meant it.

Thanks,

	tglx










^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-28 12:33                 ` Thomas Gleixner
@ 2014-07-28 13:04                   ` Peter Zijlstra
  2014-07-28 21:53                   ` Rafael J. Wysocki
  1 sibling, 0 replies; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-28 13:04 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Rafael J. Wysocki, linux-kernel, Linux PM list

On Mon, Jul 28, 2014 at 02:33:41PM +0200, Thomas Gleixner wrote:
> > I realize its dynamic, but that's crap, at device registration time it
> > pretty much already knows if its a wakeup source or not, right?
> 
> It does, but that doesn't make it crap. There are situations where you
> want certain wakeup sources enabled or disabled and you can't do that
> with IRQF_NO_SUSPEND. And to support this, you need the wake_depth
> counter as well. And that's what we always had.

But if the driver is PM aware it can change its behaviour in runtime. We
don't need to disable the line in the PIC if its well behaved. This
force disable is more a bandaid to quiesce ignorant drivers than
anything else.

> I'd rather say, that the "I can wakeup the machine and will do so no
> matter what flag" is more stupid than the wake mechanism.

I never said such. I think the NO_SUSPEND name bad one that confuses
thing more than anything as it describes the implementation not the
semantics.

But as above, if a driver sets this it basically says I'm PM aware and
will behave right, no need to go force the PIC on me.

With that the driver can or can not, depending on runtime circumstances
wake or not etc.

> So we are not going to make everything a single stupid flag and limit
> the usability of existing code. We rather go and try to remove the
> stupid flag before it becomes more wide spread.

Sure, having two different means of PM for the irq layer is one too
many.

> And we cannot treat the wakeup thing the same way as the
> IRQF_NO_SUSPEND flag, because there is hardware where the irq line
> must be disabled at the normal (non suspend) interrupt controller, and
> the wake mechanism tells the PM microcontroller to monitor the
> interrupt line and kick the machine back to life.

Ah, see that is useful information.. Yes such a thing would invalidate
the flag scheme.

> So we need to very carefully look at all the existing cases instead of
> yelling crap and inflicting x86 specific horror on everyone. I said on
> friday, that I need to look at ALL use cases first and I meant it.

Well, the reason I yelled crap is because its IRQ nr based and not
handler based. Yes, shared lines are a pain, but we have to deal with
them, so allowing 'new' interfaces in that do not deal with it is very
unfortunate at best.

What further didn't help is that it wasn't at all clear to me how it was
supposed to work.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH, v3] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts (was: Re: [PATCH, v2])
  2014-07-28 12:11                 ` Thomas Gleixner
@ 2014-07-28 21:17                   ` Rafael J. Wysocki
  2014-07-29  7:28                     ` [PATCH, v4] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-28 21:17 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Monday, July 28, 2014 02:11:35 PM Thomas Gleixner wrote:
> On Mon, 28 Jul 2014, Rafael J. Wysocki wrote:
> > On Sunday, July 27, 2014 05:53:07 PM Rafael J. Wysocki wrote:
> >  irqreturn_t
> > +do_irqaction(struct irq_desc *desc, struct irqaction *action,
> > +	     unsigned int irq, void *dev_id)
> > +{
> > +	irqreturn_t ret;
> > +
> > +	if (unlikely((desc->istate & IRQS_SHARED_SUSPENDED) &&
> > +		     !(action->flags & IRQF_NO_SUSPEND)))
> > +		return IRQ_NONE;
> 
> I really want to avoid that conditional. We burden it on every
> interrupt just to deal with this nonsense.

Yes, that's concerning.  I had an idea to use a dummy interrupt
handler for the suspended ones, but ->

> A simple solution for this is to add irq_desc::action_suspended and
> move the shared actions which are not flagged NO_SUSPEND over and
> bring them back on resume.

-> this is a better idea even, thanks!  And it doesn't matter if they are
reordered in the process, right?

> > Index: linux-pm/kernel/irq/spurious.c
> > ===================================================================
> > --- linux-pm.orig/kernel/irq/spurious.c
> > +++ linux-pm/kernel/irq/spurious.c
> > @@ -275,6 +275,8 @@ try_misrouted_irq(unsigned int irq, stru
> >  void note_interrupt(unsigned int irq, struct irq_desc *desc,
> >  		    irqreturn_t action_ret)
> >  {
> > +	int misrouted;
> > +
> >  	if (desc->istate & IRQS_POLL_INPROGRESS ||
> >  	    irq_settings_is_polled(desc))
> >  		return;
> > @@ -384,6 +386,9 @@ void note_interrupt(unsigned int irq, st
> >  		}
> >  	}
> >  
> > +	misrouted = unlikely(try_misrouted_irq(irq, desc, action_ret)) ?
> > +			misrouted_irq(irq) : 0;
> 
> If the system is suspended, why would we try misrouted irqs at all?
> All non wakeup irqs are disabled,

Well, not only wakeup interrupts are enabled then.  Timer interrupts are still
enabled too as well as the ACPI ones, for example.

> so we just spend a gazillion of cycles for nothing.

My concern was that if turned out to be misrouted for some oddball reason, it
would get the penalty for nothing, but perhaps that doesn't matter anyway.

Version 3 of the patch is below.  I moved the PM-related code to pm.c in this
one.  Tested on MSI Wind with some debug stuff on top.

Rafael


---
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Subject: irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, modify the handling of IRQF_NO_SUSPEND by
suspend_device_irqs() and resume_device_irqs() and make
note_interrupt() handle suspended interrupts.

First of all, make suspend_device_irqs() check IRQF_NO_SUSPEND for
all irqactions sharing the same irq_desc and avoid disabling the IRQ
if at least one of them has IRQF_NO_SUSPEND set.  If that flag is
also unset for at least one irqaction in the given irq_desc, however,
suspend_device_irqs() will move the irqactions with IRQF_NO_SUSPEND
unset over to a list of "suspended actions" whose interrupt handlers
won't be invoked going forward.

Second, modify note_interrupt() to disable misbehaving IRQs faster if
spuroius interrupts occur during system suspend/resume (that is, for
an irq_desc with IRQS_SUSPENDED set).

Finally, make resume_device_irqs() move irqactions from the list of
"suspended actions" created by suspend_device_irqs() back to the
regular "action" list.  It also will clear the IRQS_SPURIOUS_DISABLED
flag for IRQS_SUSPENDED IRQs that were emergency disabled after
suspend_device_irqs() had returned and will log warning messages for
them.

This should address the problematic case (when IRQF_NO_SUSPEND is set
for at least one and unset for at least one irqaction sharing one
irq_desc) and that case only without changing behavior in any other
cases.

It also effectively reverts a previous change to refuse interrupt
requests with IRQF_NO_SUSPEND settings that do not match the
existing ones (as it is not needed any more) and a previous change
that added a IRQD_WAKEUP_STATE check to __disable_irqs() (as that
mechanism has the same problem with shared interrupts as described
above).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/irqdesc.h |    2 +
 kernel/irq/internals.h  |    4 +-
 kernel/irq/manage.c     |   31 +++--------------
 kernel/irq/pm.c         |   86 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/irq/spurious.c   |   14 ++++++-
 5 files changed, 106 insertions(+), 31 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
-void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+void __disable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
-			return;
-		desc->istate |= IRQS_SUSPENDED;
-	}
-
 	if (!desc->depth++)
 		irq_disable(desc);
 }
@@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
 
 	if (!desc)
 		return -EINVAL;
-	__disable_irq(desc, irq, false);
+	__disable_irq(desc, irq);
 	irq_put_desc_busunlock(desc, flags);
 	return 0;
 }
@@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(disable_irq);
 
-void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
+void __enable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
-			if (!desc->action)
-				return;
-			if (!(desc->action->flags & IRQF_FORCE_RESUME))
-				return;
-			/* Pretend that it got disabled ! */
-			desc->depth++;
-		}
-		desc->istate &= ~IRQS_SUSPENDED;
-	}
-
 	switch (desc->depth) {
 	case 0:
  err_out:
@@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
 		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
 		goto out;
 
-	__enable_irq(desc, irq, false);
+	__enable_irq(desc, irq);
 out:
 	irq_put_desc_busunlock(desc, flags);
 }
@@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
@@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
 	 */
 	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
 		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
-		__enable_irq(desc, irq, false);
+		__enable_irq(desc, irq);
 	}
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -391,10 +391,20 @@ void note_interrupt(unsigned int irq, st
 		 * otherwise the counter becomes a doomsday timer for otherwise
 		 * working systems
 		 */
-		if (time_after(jiffies, desc->last_unhandled + HZ/10))
+		if (time_after(jiffies, desc->last_unhandled + HZ/10)) {
 			desc->irqs_unhandled = 1;
-		else
+		} else {
 			desc->irqs_unhandled++;
+			if (unlikely(desc->istate & IRQS_SUSPENDED)) {
+				/*
+				 * That shouldn't happen.  It means IRQs from
+				 * a device that is supposed to be suspended at
+				 * this point.  Decay faster.
+				 */
+				desc->irqs_unhandled += 999;
+				desc->irq_count += 999;
+			}
+		}
 		desc->last_unhandled = jiffies;
 	}
 
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -20,6 +20,7 @@ struct irq_desc;
  * @handle_irq:		highlevel irq-events handler
  * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
+ * @action_suspended:	suspended irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
@@ -47,6 +48,7 @@ struct irq_desc {
 	irq_preflow_handler_t	preflow_handler;
 #endif
 	struct irqaction	*action;	/* IRQ action list */
+	struct irqaction	*action_suspended;
 	unsigned int		status_use_accessors;
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -63,8 +63,8 @@ enum {
 
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
-extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
-extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+extern void __disable_irq(struct irq_desc *desc, unsigned int irq);
+extern void __enable_irq(struct irq_desc *desc, unsigned int irq);
 
 extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -13,6 +13,47 @@
 
 #include "internals.h"
 
+static bool suspend_irq(struct irq_desc *desc)
+{
+	struct irqaction *action = desc->action;
+	unsigned int no_suspend, flags;
+
+	if (!action)
+		return false;
+
+	no_suspend = IRQF_NO_SUSPEND;
+	flags = 0;
+	do {
+		no_suspend &= action->flags;
+		flags |= action->flags;
+		action = action->next;
+	} while (action);
+	if (no_suspend)
+		return false;
+
+	desc->istate |= IRQS_SUSPENDED;
+
+	if ((flags & IRQF_NO_SUSPEND) &&
+	    !(desc->istate & IRQS_SPURIOUS_DISABLED)) {
+		struct irqaction *no_suspend = NULL;
+
+		do {
+			action = desc->action;
+			desc->action = action->next;
+			if (action->flags & IRQF_NO_SUSPEND) {
+				action->next = no_suspend;
+				no_suspend = action;
+			} else {
+				action->next = desc->action_suspended;
+				desc->action_suspended = action;
+			}
+		} while (desc->action);
+		desc->action = no_suspend;
+		return false;
+	}
+	return true;
+}
+
 /**
  * suspend_device_irqs - disable all currently enabled interrupt lines
  *
@@ -30,7 +71,10 @@ void suspend_device_irqs(void)
 		unsigned long flags;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__disable_irq(desc, irq, true);
+
+		if (suspend_irq(desc))
+			__disable_irq(desc, irq);
+
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
@@ -40,6 +84,41 @@ void suspend_device_irqs(void)
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
+static bool resume_irq(struct irq_desc *desc, int irq)
+{
+	if (desc->istate & IRQS_SUSPENDED) {
+		desc->istate &= ~IRQS_SUSPENDED;
+		if (desc->action_suspended) {
+			struct irqaction *action;
+
+			do {
+				action = desc->action_suspended;
+				desc->action_suspended = action->next;
+				action->next = desc->action;
+				desc->action = action;
+			} while (desc->action_suspended);
+
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("Re-enabling emergency disabled IRQ %d\n",
+				       irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else {
+				return false;
+			}
+		}
+	} else {
+		if (!desc->action)
+			return false;
+
+		if (!(desc->action->flags & IRQF_FORCE_RESUME))
+			return false;
+
+		/* Pretend that it got disabled ! */
+		desc->depth++;
+	}
+	return true;
+}
+
 static void resume_irqs(bool want_early)
 {
 	struct irq_desc *desc;
@@ -54,7 +133,10 @@ static void resume_irqs(bool want_early)
 			continue;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__enable_irq(desc, irq, true);
+
+		if (resume_irq(desc, irq))
+			__enable_irq(desc, irq);
+
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-28  6:49               ` Peter Zijlstra
  2014-07-28 12:33                 ` Thomas Gleixner
@ 2014-07-28 21:27                 ` Rafael J. Wysocki
  1 sibling, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-28 21:27 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, linux-kernel, Linux PM list

On Monday, July 28, 2014 08:49:13 AM Peter Zijlstra wrote:
> On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> > One more idea, on top of the prototype patch that I posted
> > (https://patchwork.kernel.org/patch/4625921/).
> > 
> > The problem with enable_irq_wake() is that it only takes one argument, so
> > if that's a shared interrupt, we can't really say which irqaction is supposed
> > to handle wakeup interrupts should they occur.
> 
> Right.
> 
> > To address this we can introduce enable_device_irq_wake() that will take
> > an additional dev_id argument (that must be the one passed to request_irq() or
> > the operation will fail) that can be used to identify the irqaction for
> > handling the wakeup interrupts.  It can work by setting IRQF_NO_SUSPEND
> > for the irqaction in question and doing the rest along the lines of
> > irq_set_irq_wake(irq, 1).  disable_device_irq_wake() will then clear
> > IRQF_NO_SUSPEND (it also has to be passed the dev_id argument).
> > 
> > If we have that, the guys who need to set up device interrupts (ie. interrupts
> > normally used for signaling input events etc) for system wakeup will need to
> > use enable_device_irq_wake() and that should just work.
> 
> So in the patch I posted I described why NO_SUSPEND is useful, I still
> have to hear a solid reason for why we also need enable_irq_wake()? What
> does it do that cannot be achieved with NO_SUSPEND?
> 
> I realize its dynamic, but that's crap, at device registration time it
> pretty much already knows if its a wakeup source or not, right?

It knows that it can be a wakeup source, but it doesn't know if it will be
use that way (user space may not want that, for example).

It still makes sense to use IRQF_NO_SUSPEND for it, but people were complaining
about having to do that in addition to using enable_irq_wake().  Quite
understandably, because usually you want both or at least "wakeup" should
imply "no suspend".

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-28 12:33                 ` Thomas Gleixner
  2014-07-28 13:04                   ` Peter Zijlstra
@ 2014-07-28 21:53                   ` Rafael J. Wysocki
  2014-07-28 23:01                     ` Rafael J. Wysocki
  1 sibling, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-28 21:53 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Monday, July 28, 2014 02:33:41 PM Thomas Gleixner wrote:
> On Mon, 28 Jul 2014, Peter Zijlstra wrote:
> > On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> > > One more idea, on top of the prototype patch that I posted
> > > (https://patchwork.kernel.org/patch/4625921/).
> > > 
> > > The problem with enable_irq_wake() is that it only takes one argument, so
> > > if that's a shared interrupt, we can't really say which irqaction is supposed
> > > to handle wakeup interrupts should they occur.
> > 
> > Right.
> 
> Historically no hardware manufacturer was so stupid to have wakeup
> sources on shared interrupts. Just because x86 is brain damaged is no
> reason to claim that stuff that worked fine for years is crap.

I don't honestly think that's x86 only.  The PCIe PME interrupt has always been
designed as shareable and that's not limited to x86.

Also enable_irq_wake() has the problem that on some platforms it makes
the interrupt wakeup-only, so it stops signaling input events after that.
AT91 seems to be one of these platforms (you may argue that this is a platform
bug, but still).  So if it is called in a driver's .suspend() callback,
it will create a "blind" period for potential wakeup events between
that point and when the platform is eventually turned off.

> We never needed this until now, so we simply go and do what we've done
> always in such situations. We look for a solution which copes with the
> new hardware brain farts and keeps the existing stuff working. It's
> that simple.
> 
> > > To address this we can introduce enable_device_irq_wake() that will take
> > > an additional dev_id argument (that must be the one passed to request_irq() or
> > > the operation will fail) that can be used to identify the irqaction for
> > > handling the wakeup interrupts.  It can work by setting IRQF_NO_SUSPEND
> > > for the irqaction in question and doing the rest along the lines of
> > > irq_set_irq_wake(irq, 1).  disable_device_irq_wake() will then clear
> > > IRQF_NO_SUSPEND (it also has to be passed the dev_id argument).
> > > 
> > > If we have that, the guys who need to set up device interrupts (ie. interrupts
> > > normally used for signaling input events etc) for system wakeup will need to
> > > use enable_device_irq_wake() and that should just work.
> > 
> > So in the patch I posted I described why NO_SUSPEND is useful, I still
> > have to hear a solid reason for why we also need enable_irq_wake()? What
> > does it do that cannot be achieved with NO_SUSPEND?
> > 
> > I realize its dynamic, but that's crap, at device registration time it
> > pretty much already knows if its a wakeup source or not, right?
> 
> It does, but that doesn't make it crap. There are situations where you
> want certain wakeup sources enabled or disabled and you can't do that
> with IRQF_NO_SUSPEND. And to support this, you need the wake_depth
> counter as well. And that's what we always had.
> 
> I'd rather say, that the "I can wakeup the machine and will do so no
> matter what flag" is more stupid than the wake mechanism.
> 
> It was added to support XEN wreckage and I wish we've never done that
> in the first place.

We needed to do that for timers to start with.  We also need it for ACPI
and pretty much everything that needs to react to events during system
suspend/resume too.  I would argue for limiting its use to things that
need their interrupts to be always enabled, though.

> So we are not going to make everything a single stupid flag and limit
> the usability of existing code. We rather go and try to remove the
> stupid flag before it becomes more wide spread.
> 
> And we cannot treat the wakeup thing the same way as the
> IRQF_NO_SUSPEND flag, because there is hardware where the irq line
> must be disabled at the normal (non suspend) interrupt controller, and
> the wake mechanism tells the PM microcontroller to monitor the
> interrupt line and kick the machine back to life.
> 
> So we need to very carefully look at all the existing cases instead of
> yelling crap and inflicting x86 specific horror on everyone. I said on
> friday, that I need to look at ALL use cases first and I meant it.

Regardless of the use case, I don't think it is necessary to manipulate
the interrupt controller settings before the syscore_suspend stage, because
if an interrupt happens earlier, we need to handle it pretty much in a normal
way, unless it has been suspended.

So I'd argue for not using anything like enable_irq_wake() that goes all
the way to the hardware in drivers.  Instead, we could allow drivers to
mark interrupts as "set this up for system wakeup" and really do the setup
right before putting the platform into the final "suspended" state.  And that
is totally independend of the IRQF_NO_SUSPEND thing.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-28 21:53                   ` Rafael J. Wysocki
@ 2014-07-28 23:01                     ` Rafael J. Wysocki
  2014-07-29 12:46                       ` Thomas Gleixner
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-28 23:01 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Monday, July 28, 2014 11:53:15 PM Rafael J. Wysocki wrote:
> On Monday, July 28, 2014 02:33:41 PM Thomas Gleixner wrote:
> > On Mon, 28 Jul 2014, Peter Zijlstra wrote:
> > > On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:

[cut]

> > So we are not going to make everything a single stupid flag and limit
> > the usability of existing code. We rather go and try to remove the
> > stupid flag before it becomes more wide spread.
> > 
> > And we cannot treat the wakeup thing the same way as the
> > IRQF_NO_SUSPEND flag, because there is hardware where the irq line
> > must be disabled at the normal (non suspend) interrupt controller, and
> > the wake mechanism tells the PM microcontroller to monitor the
> > interrupt line and kick the machine back to life.
> > 
> > So we need to very carefully look at all the existing cases instead of
> > yelling crap and inflicting x86 specific horror on everyone. I said on
> > friday, that I need to look at ALL use cases first and I meant it.
> 
> Regardless of the use case, I don't think it is necessary to manipulate
> the interrupt controller settings before the syscore_suspend stage, because
> if an interrupt happens earlier, we need to handle it pretty much in a normal
> way, unless it has been suspended.
> 
> So I'd argue for not using anything like enable_irq_wake() that goes all
> the way to the hardware in drivers.  Instead, we could allow drivers to
> mark interrupts as "set this up for system wakeup" and really do the setup
> right before putting the platform into the final "suspended" state.  And that
> is totally independend of the IRQF_NO_SUSPEND thing.

In addition to that we need the interrupt handler of the driver that requested
the irq to be set up for system wakeup to be invoked after suspend_device_irqs()
in case there are interrupts that should abort the suspend transition or we
can lose a wakeup event.  So whatever interface we decide to use it has to
affect suspend/resume_device_irqs() pretty much in the same way as the
IRQF_NO_SUSPEND flag.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH, v4] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts
  2014-07-28 21:17                   ` [PATCH, v3] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts (was: Re: [PATCH, v2]) Rafael J. Wysocki
@ 2014-07-29  7:28                     ` Rafael J. Wysocki
  2014-07-29 13:46                       ` [PATCH, v5] " Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-29  7:28 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, modify the handling of IRQF_NO_SUSPEND by
suspend_device_irqs() and resume_device_irqs() and make
note_interrupt() handle suspended interrupts.

First of all, make suspend_device_irqs() check IRQF_NO_SUSPEND for
all irqactions sharing the same irq_desc and avoid disabling the IRQ
if at least one of them has IRQF_NO_SUSPEND set.  If that flag is
also unset for at least one irqaction in the given irq_desc, however,
suspend_device_irqs() will move the irqactions with IRQF_NO_SUSPEND
unset over to a list of "suspended actions" whose interrupt handlers
won't be invoked going forward.

Second, modify note_interrupt() to disable misbehaving IRQs faster if
spuroius interrupts occur during system suspend/resume (that is, for
an irq_desc with IRQS_SUSPENDED set).

Finally, make resume_device_irqs() move irqactions from the list of
"suspended actions" created by suspend_device_irqs() back to the
regular "action" list.  It also will clear the IRQS_SPURIOUS_DISABLED
flag for IRQS_SUSPENDED IRQs that were emergency disabled after
suspend_device_irqs() had returned and will log warning messages for
them.

This should address the problematic case (when IRQF_NO_SUSPEND is set
for at least one and unset for at least one irqaction sharing one
irq_desc) and that case only without changing behavior in any other
cases.

It also effectively reverts a previous change to refuse interrupt
requests with IRQF_NO_SUSPEND settings that do not match the
existing ones (as it is not needed any more) and a previous change
that added a IRQD_WAKEUP_STATE check to __disable_irqs() (as that
mechanism has the same problem with shared interrupts as described
above).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

I'm not sure why I make suspend_irq() and resume_irq() return bool
instead of just calling __disable_irq() and __enable_irq() from them
directly in the previous version.  Do it better this time.

Rafael

---
 include/linux/irqdesc.h |    2 +
 kernel/irq/internals.h  |    4 +-
 kernel/irq/manage.c     |   31 +++---------------
 kernel/irq/pm.c         |   80 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/irq/spurious.c   |   14 +++++++-
 5 files changed, 100 insertions(+), 31 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
-void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+void __disable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
-			return;
-		desc->istate |= IRQS_SUSPENDED;
-	}
-
 	if (!desc->depth++)
 		irq_disable(desc);
 }
@@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
 
 	if (!desc)
 		return -EINVAL;
-	__disable_irq(desc, irq, false);
+	__disable_irq(desc, irq);
 	irq_put_desc_busunlock(desc, flags);
 	return 0;
 }
@@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(disable_irq);
 
-void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
+void __enable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
-			if (!desc->action)
-				return;
-			if (!(desc->action->flags & IRQF_FORCE_RESUME))
-				return;
-			/* Pretend that it got disabled ! */
-			desc->depth++;
-		}
-		desc->istate &= ~IRQS_SUSPENDED;
-	}
-
 	switch (desc->depth) {
 	case 0:
  err_out:
@@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
 		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
 		goto out;
 
-	__enable_irq(desc, irq, false);
+	__enable_irq(desc, irq);
 out:
 	irq_put_desc_busunlock(desc, flags);
 }
@@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
@@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
 	 */
 	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
 		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
-		__enable_irq(desc, irq, false);
+		__enable_irq(desc, irq);
 	}
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -391,10 +391,20 @@ void note_interrupt(unsigned int irq, st
 		 * otherwise the counter becomes a doomsday timer for otherwise
 		 * working systems
 		 */
-		if (time_after(jiffies, desc->last_unhandled + HZ/10))
+		if (time_after(jiffies, desc->last_unhandled + HZ/10)) {
 			desc->irqs_unhandled = 1;
-		else
+		} else {
 			desc->irqs_unhandled++;
+			if (unlikely(desc->istate & IRQS_SUSPENDED)) {
+				/*
+				 * That shouldn't happen.  It means IRQs from
+				 * a device that is supposed to be suspended at
+				 * this point.  Decay faster.
+				 */
+				desc->irqs_unhandled += 999;
+				desc->irq_count += 999;
+			}
+		}
 		desc->last_unhandled = jiffies;
 	}
 
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -20,6 +20,7 @@ struct irq_desc;
  * @handle_irq:		highlevel irq-events handler
  * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
+ * @action_suspended:	suspended irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
@@ -47,6 +48,7 @@ struct irq_desc {
 	irq_preflow_handler_t	preflow_handler;
 #endif
 	struct irqaction	*action;	/* IRQ action list */
+	struct irqaction	*action_suspended;
 	unsigned int		status_use_accessors;
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -63,8 +63,8 @@ enum {
 
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
-extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
-extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+extern void __disable_irq(struct irq_desc *desc, unsigned int irq);
+extern void __enable_irq(struct irq_desc *desc, unsigned int irq);
 
 extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -13,6 +13,47 @@
 
 #include "internals.h"
 
+static void suspend_irq(struct irq_desc *desc, int irq)
+{
+	struct irqaction *action = desc->action;
+	unsigned int no_suspend, flags;
+
+	if (!action)
+		return;
+
+	no_suspend = IRQF_NO_SUSPEND;
+	flags = 0;
+	do {
+		no_suspend &= action->flags;
+		flags |= action->flags;
+		action = action->next;
+	} while (action);
+	if (no_suspend)
+		return;
+
+	desc->istate |= IRQS_SUSPENDED;
+
+	if ((flags & IRQF_NO_SUSPEND) &&
+	    !(desc->istate & IRQS_SPURIOUS_DISABLED)) {
+		struct irqaction *no_suspend = NULL;
+
+		do {
+			action = desc->action;
+			desc->action = action->next;
+			if (action->flags & IRQF_NO_SUSPEND) {
+				action->next = no_suspend;
+				no_suspend = action;
+			} else {
+				action->next = desc->action_suspended;
+				desc->action_suspended = action;
+			}
+		} while (desc->action);
+		desc->action = no_suspend;
+		return;
+	}
+	__disable_irq(desc, irq);
+}
+
 /**
  * suspend_device_irqs - disable all currently enabled interrupt lines
  *
@@ -30,7 +71,7 @@ void suspend_device_irqs(void)
 		unsigned long flags;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__disable_irq(desc, irq, true);
+		suspend_irq(desc, irq);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
@@ -40,6 +81,41 @@ void suspend_device_irqs(void)
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
+static void resume_irq(struct irq_desc *desc, int irq)
+{
+	if (desc->istate & IRQS_SUSPENDED) {
+		desc->istate &= ~IRQS_SUSPENDED;
+		if (desc->action_suspended) {
+			struct irqaction *action;
+
+			do {
+				action = desc->action_suspended;
+				desc->action_suspended = action->next;
+				action->next = desc->action;
+				desc->action = action;
+			} while (desc->action_suspended);
+
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("Re-enabling emergency disabled IRQ %d\n",
+				       irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else {
+				return;
+			}
+		}
+	} else {
+		if (!desc->action)
+			return;
+
+		if (!(desc->action->flags & IRQF_FORCE_RESUME))
+			return;
+
+		/* Pretend that it got disabled ! */
+		desc->depth++;
+	}
+	__enable_irq(desc, irq);
+}
+
 static void resume_irqs(bool want_early)
 {
 	struct irq_desc *desc;
@@ -54,7 +130,7 @@ static void resume_irqs(bool want_early)
 			continue;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__enable_irq(desc, irq, true);
+		resume_irq(desc, irq);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-28 23:01                     ` Rafael J. Wysocki
@ 2014-07-29 12:46                       ` Thomas Gleixner
  2014-07-29 13:33                         ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-29 12:46 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Tue, 29 Jul 2014, Rafael J. Wysocki wrote:

> On Monday, July 28, 2014 11:53:15 PM Rafael J. Wysocki wrote:
> > On Monday, July 28, 2014 02:33:41 PM Thomas Gleixner wrote:
> > > On Mon, 28 Jul 2014, Peter Zijlstra wrote:
> > > > On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> 
> [cut]
> 
> > > So we are not going to make everything a single stupid flag and limit
> > > the usability of existing code. We rather go and try to remove the
> > > stupid flag before it becomes more wide spread.
> > > 
> > > And we cannot treat the wakeup thing the same way as the
> > > IRQF_NO_SUSPEND flag, because there is hardware where the irq line
> > > must be disabled at the normal (non suspend) interrupt controller, and
> > > the wake mechanism tells the PM microcontroller to monitor the
> > > interrupt line and kick the machine back to life.
> > > 
> > > So we need to very carefully look at all the existing cases instead of
> > > yelling crap and inflicting x86 specific horror on everyone. I said on
> > > friday, that I need to look at ALL use cases first and I meant it.
> > 
> > Regardless of the use case, I don't think it is necessary to manipulate
> > the interrupt controller settings before the syscore_suspend stage, because
> > if an interrupt happens earlier, we need to handle it pretty much in a normal
> > way, unless it has been suspended.
> > 
> > So I'd argue for not using anything like enable_irq_wake() that goes all
> > the way to the hardware in drivers.  Instead, we could allow drivers to
> > mark interrupts as "set this up for system wakeup" and really do the setup
> > right before putting the platform into the final "suspended" state.  And that
> > is totally independend of the IRQF_NO_SUSPEND thing.
> 
> In addition to that we need the interrupt handler of the driver that requested
> the irq to be set up for system wakeup to be invoked after suspend_device_irqs()
> in case there are interrupts that should abort the suspend transition or we
> can lose a wakeup event.  So whatever interface we decide to use it has to
> affect suspend/resume_device_irqs() pretty much in the same way as the
> IRQF_NO_SUSPEND flag.

Right, that's a different issue. We probably want that even for the
existing irq_wake() users.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-29 12:46                       ` Thomas Gleixner
@ 2014-07-29 13:33                         ` Rafael J. Wysocki
  2014-07-30 21:46                           ` [PATCH 0/3] irq / PM: wakeup interrupt interface for drivers (was: Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED) Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-29 13:33 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

On Tuesday, July 29, 2014 02:46:41 PM Thomas Gleixner wrote:
> On Tue, 29 Jul 2014, Rafael J. Wysocki wrote:
> 
> > On Monday, July 28, 2014 11:53:15 PM Rafael J. Wysocki wrote:
> > > On Monday, July 28, 2014 02:33:41 PM Thomas Gleixner wrote:
> > > > On Mon, 28 Jul 2014, Peter Zijlstra wrote:
> > > > > On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> > 
> > [cut]
> > 
> > > > So we are not going to make everything a single stupid flag and limit
> > > > the usability of existing code. We rather go and try to remove the
> > > > stupid flag before it becomes more wide spread.
> > > > 
> > > > And we cannot treat the wakeup thing the same way as the
> > > > IRQF_NO_SUSPEND flag, because there is hardware where the irq line
> > > > must be disabled at the normal (non suspend) interrupt controller, and
> > > > the wake mechanism tells the PM microcontroller to monitor the
> > > > interrupt line and kick the machine back to life.
> > > > 
> > > > So we need to very carefully look at all the existing cases instead of
> > > > yelling crap and inflicting x86 specific horror on everyone. I said on
> > > > friday, that I need to look at ALL use cases first and I meant it.
> > > 
> > > Regardless of the use case, I don't think it is necessary to manipulate
> > > the interrupt controller settings before the syscore_suspend stage, because
> > > if an interrupt happens earlier, we need to handle it pretty much in a normal
> > > way, unless it has been suspended.
> > > 
> > > So I'd argue for not using anything like enable_irq_wake() that goes all
> > > the way to the hardware in drivers.  Instead, we could allow drivers to
> > > mark interrupts as "set this up for system wakeup" and really do the setup
> > > right before putting the platform into the final "suspended" state.  And that
> > > is totally independend of the IRQF_NO_SUSPEND thing.
> > 
> > In addition to that we need the interrupt handler of the driver that requested
> > the irq to be set up for system wakeup to be invoked after suspend_device_irqs()
> > in case there are interrupts that should abort the suspend transition or we
> > can lose a wakeup event.  So whatever interface we decide to use it has to
> > affect suspend/resume_device_irqs() pretty much in the same way as the
> > IRQF_NO_SUSPEND flag.
> 
> Right, that's a different issue. We probably want that even for the
> existing irq_wake() users.

I agree.

There's one more thing to consider here.  Going forward we'll want to avoid
touching runtime-suspended devices during system suspend.  Then, system wakeup
devices will need to mark their IRQs for system wakeup at the runtime suspend
time and I'm not sure if that's the right time for calling enable_irq_wake().

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH, v5] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts
  2014-07-29  7:28                     ` [PATCH, v4] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts Rafael J. Wysocki
@ 2014-07-29 13:46                       ` Rafael J. Wysocki
  2014-07-30  0:54                         ` [PATCH, v6] " Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-29 13:46 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, modify the handling of IRQF_NO_SUSPEND by
suspend_device_irqs() and resume_device_irqs() and make
note_interrupt() handle suspended interrupts.

First of all, make suspend_device_irqs() check IRQF_NO_SUSPEND for
all irqactions sharing the same irq_desc and avoid disabling the IRQ
if at least one of them has IRQF_NO_SUSPEND set.  If that flag is
also unset for at least one irqaction in the given irq_desc, however,
suspend_device_irqs() will move the irqactions with IRQF_NO_SUSPEND
unset over to a list of "suspended actions" whose interrupt handlers
won't be invoked going forward.

Second, modify note_interrupt() to disable misbehaving IRQs faster if
spuroius interrupts occur during system suspend/resume (that is, for
an irq_desc with IRQS_SUSPENDED set).

Finally, make resume_device_irqs() move irqactions from the list of
"suspended actions" created by suspend_device_irqs() back to the
regular "action" list.  It also will clear the IRQS_SPURIOUS_DISABLED
flag for IRQS_SUSPENDED IRQs that were emergency disabled after
suspend_device_irqs() had returned and will log warning messages for
them.

This should address the problematic case (when IRQF_NO_SUSPEND is set
for at least one and unset for at least one irqaction sharing one
irq_desc) and that case only without changing behavior in any other
cases.

It also effectively reverts a previous change to refuse interrupt
requests with IRQF_NO_SUSPEND settings that do not match the
existing ones (as it is not needed any more) and a previous change
that added a IRQD_WAKEUP_STATE check to __disable_irqs() (as that
mechanism has the same problem with shared interrupts as described
above).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

This version has a reduced number of field references in the irqaction
list manipulations in suspend_irq() and resume_irq().  No other changes
with respect to v4.

Rafael

---
 include/linux/irqdesc.h |    2 +
 kernel/irq/internals.h  |    4 +-
 kernel/irq/manage.c     |   31 +++---------------
 kernel/irq/pm.c         |   82 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/irq/spurious.c   |   14 +++++++-
 5 files changed, 102 insertions(+), 31 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
-void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+void __disable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
-			return;
-		desc->istate |= IRQS_SUSPENDED;
-	}
-
 	if (!desc->depth++)
 		irq_disable(desc);
 }
@@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
 
 	if (!desc)
 		return -EINVAL;
-	__disable_irq(desc, irq, false);
+	__disable_irq(desc, irq);
 	irq_put_desc_busunlock(desc, flags);
 	return 0;
 }
@@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(disable_irq);
 
-void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
+void __enable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
-			if (!desc->action)
-				return;
-			if (!(desc->action->flags & IRQF_FORCE_RESUME))
-				return;
-			/* Pretend that it got disabled ! */
-			desc->depth++;
-		}
-		desc->istate &= ~IRQS_SUSPENDED;
-	}
-
 	switch (desc->depth) {
 	case 0:
  err_out:
@@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
 		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
 		goto out;
 
-	__enable_irq(desc, irq, false);
+	__enable_irq(desc, irq);
 out:
 	irq_put_desc_busunlock(desc, flags);
 }
@@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
@@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
 	 */
 	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
 		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
-		__enable_irq(desc, irq, false);
+		__enable_irq(desc, irq);
 	}
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -391,10 +391,20 @@ void note_interrupt(unsigned int irq, st
 		 * otherwise the counter becomes a doomsday timer for otherwise
 		 * working systems
 		 */
-		if (time_after(jiffies, desc->last_unhandled + HZ/10))
+		if (time_after(jiffies, desc->last_unhandled + HZ/10)) {
 			desc->irqs_unhandled = 1;
-		else
+		} else {
 			desc->irqs_unhandled++;
+			if (unlikely(desc->istate & IRQS_SUSPENDED)) {
+				/*
+				 * That shouldn't happen.  It means IRQs from
+				 * a device that is supposed to be suspended at
+				 * this point.  Decay faster.
+				 */
+				desc->irqs_unhandled += 999;
+				desc->irq_count += 999;
+			}
+		}
 		desc->last_unhandled = jiffies;
 	}
 
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -20,6 +20,7 @@ struct irq_desc;
  * @handle_irq:		highlevel irq-events handler
  * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
+ * @action_suspended:	suspended irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
@@ -47,6 +48,7 @@ struct irq_desc {
 	irq_preflow_handler_t	preflow_handler;
 #endif
 	struct irqaction	*action;	/* IRQ action list */
+	struct irqaction	*action_suspended;
 	unsigned int		status_use_accessors;
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -63,8 +63,8 @@ enum {
 
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
-extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
-extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+extern void __disable_irq(struct irq_desc *desc, unsigned int irq);
+extern void __enable_irq(struct irq_desc *desc, unsigned int irq);
 
 extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -13,6 +13,50 @@
 
 #include "internals.h"
 
+static void suspend_irq(struct irq_desc *desc, int irq)
+{
+	struct irqaction *action = desc->action;
+	unsigned int no_suspend, flags;
+
+	if (!action)
+		return;
+
+	no_suspend = IRQF_NO_SUSPEND;
+	flags = 0;
+	do {
+		no_suspend &= action->flags;
+		flags |= action->flags;
+		action = action->next;
+	} while (action);
+	if (no_suspend)
+		return;
+
+	desc->istate |= IRQS_SUSPENDED;
+
+	if ((flags & IRQF_NO_SUSPEND) &&
+	    !(desc->istate & IRQS_SPURIOUS_DISABLED)) {
+		struct irqaction *no_suspend = NULL;
+		struct irqaction *suspended = NULL;
+		struct irqaction *head = desc->action;
+
+		do {
+			action = head;
+			head = action->next;
+			if (action->flags & IRQF_NO_SUSPEND) {
+				action->next = no_suspend;
+				no_suspend = action;
+			} else {
+				action->next = suspended;
+				suspended = action;
+			}
+		} while (head);
+		desc->action = no_suspend;
+		desc->action_suspended = suspended;
+		return;
+	}
+	__disable_irq(desc, irq);
+}
+
 /**
  * suspend_device_irqs - disable all currently enabled interrupt lines
  *
@@ -30,7 +74,7 @@ void suspend_device_irqs(void)
 		unsigned long flags;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__disable_irq(desc, irq, true);
+		suspend_irq(desc, irq);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
@@ -40,6 +84,40 @@ void suspend_device_irqs(void)
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
+static void resume_irq(struct irq_desc *desc, int irq)
+{
+	if (desc->istate & IRQS_SUSPENDED) {
+		desc->istate &= ~IRQS_SUSPENDED;
+		if (desc->action_suspended) {
+			struct irqaction *action = desc->action;
+
+			while (action->next)
+				action = action->next;
+
+			action->next = desc->action_suspended;
+			desc->action_suspended = NULL;
+
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("Re-enabling emergency disabled IRQ %d\n",
+				       irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else {
+				return;
+			}
+		}
+	} else {
+		if (!desc->action)
+			return;
+
+		if (!(desc->action->flags & IRQF_FORCE_RESUME))
+			return;
+
+		/* Pretend that it got disabled ! */
+		desc->depth++;
+	}
+	__enable_irq(desc, irq);
+}
+
 static void resume_irqs(bool want_early)
 {
 	struct irq_desc *desc;
@@ -54,7 +132,7 @@ static void resume_irqs(bool want_early)
 			continue;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__enable_irq(desc, irq, true);
+		resume_irq(desc, irq);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-25  5:58   ` Peter Zijlstra
@ 2014-07-29 19:20     ` Brian Norris
  2014-07-29 19:28       ` Peter Zijlstra
  0 siblings, 1 reply; 75+ messages in thread
From: Brian Norris @ 2014-07-29 19:20 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Rafael J. Wysocki, Thomas Gleixner, linux-kernel,
	Dmitry Torokhov, Linux PM list, Dmitry Torokhov

Hi Peter,

On Fri, Jul 25, 2014 at 07:58:47AM +0200, Peter Zijlstra wrote:
> On Fri, Jul 25, 2014 at 01:10:36AM +0200, Rafael J. Wysocki wrote:
> > Yes, drivers using enable_irq_wake() will likely want IRQF_NO_SUSPEND to
> > be set for their irqactions, but that should not imply "no suspend" for
> > all irqactions sharing the same desc.  So I guess it may be better to go
> > forth and use a global "interrupts suspended" state variable instead of the
> > IRQS_SUSPENDED flag for each desc and throw away the IRQD_WAKEUP_STATE
> > check from suspend_device_irqs() entirely.
> > 
> > Peter, it looks like you'd prefer that?
> 
> My preference would be to shoot enable_irq_wake() in the head, its
> fundamentally broken.

I'm curious what you mean. Are you referring to the fact that its input
is simply an IRQ number (regardless of whether the IRQ is shared), not
something that identifies the particular handler (e.g., struct
irqaction)?

Brian

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-29 19:20     ` Brian Norris
@ 2014-07-29 19:28       ` Peter Zijlstra
  2014-07-29 20:41         ` Brian Norris
  0 siblings, 1 reply; 75+ messages in thread
From: Peter Zijlstra @ 2014-07-29 19:28 UTC (permalink / raw)
  To: Brian Norris
  Cc: Rafael J. Wysocki, Thomas Gleixner, linux-kernel,
	Dmitry Torokhov, Linux PM list, Dmitry Torokhov

On Tue, Jul 29, 2014 at 12:20:08PM -0700, Brian Norris wrote:

> I'm curious what you mean. Are you referring to the fact that its input
> is simply an IRQ number (regardless of whether the IRQ is shared), not
> something that identifies the particular handler (e.g., struct
> irqaction)?

Yes. I know that shared stuff is a massive head-ache, but I feel we
should not introduce primitives that do not work with it.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED
  2014-07-29 19:28       ` Peter Zijlstra
@ 2014-07-29 20:41         ` Brian Norris
  0 siblings, 0 replies; 75+ messages in thread
From: Brian Norris @ 2014-07-29 20:41 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Rafael J. Wysocki, Thomas Gleixner, linux-kernel,
	Dmitry Torokhov, Linux PM list, Dmitry Torokhov

On Tue, Jul 29, 2014 at 09:28:43PM +0200, Peter Zijlstra wrote:
> On Tue, Jul 29, 2014 at 12:20:08PM -0700, Brian Norris wrote:
> 
> > I'm curious what you mean. Are you referring to the fact that its input
> > is simply an IRQ number (regardless of whether the IRQ is shared), not
> > something that identifies the particular handler (e.g., struct
> > irqaction)?
> 
> Yes. I know that shared stuff is a massive head-ache, but I feel we
> should not introduce primitives that do not work with it.

Thanks for the reply. I could also have just read the rest of the thread
[1], in which this very question was addressed already. Shame on me.

Brian

[1] https://lkml.org/lkml/2014/7/28/60

^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH, v6] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts
  2014-07-29 13:46                       ` [PATCH, v5] " Rafael J. Wysocki
@ 2014-07-30  0:54                         ` Rafael J. Wysocki
  0 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-30  0:54 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, linux-kernel, Linux PM list

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, modify the handling of IRQF_NO_SUSPEND by
suspend_device_irqs() and resume_device_irqs() and make
note_interrupt() handle suspended interrupts.

First of all, make suspend_device_irqs() check IRQF_NO_SUSPEND for
all irqactions sharing the same irq_desc and avoid disabling the IRQ
if at least one of them has IRQF_NO_SUSPEND set.  If that flag is
also unset for at least one irqaction in the given irq_desc, however,
suspend_device_irqs() will move the irqactions with IRQF_NO_SUSPEND
unset over to a list of "suspended actions" whose interrupt handlers
won't be invoked going forward.

Second, modify note_interrupt() to disable misbehaving IRQs faster if
spuroius interrupts occur during system suspend/resume (that is, for
an irq_desc with IRQS_SUSPENDED set).

Finally, make resume_device_irqs() move irqactions from the list of
"suspended actions" created by suspend_device_irqs() back to the
regular "action" list.  It also will clear the IRQS_SPURIOUS_DISABLED
flag for IRQS_SUSPENDED IRQs that were emergency disabled after
suspend_device_irqs() had returned and will log warning messages for
them.

This should address the problematic case (when IRQF_NO_SUSPEND is set
for at least one and unset for at least one irqaction sharing one
irq_desc) and that case only without changing behavior in any other
cases.

It also effectively reverts a previous change to refuse interrupt
requests with IRQF_NO_SUSPEND settings that do not match the
existing ones (as it is not needed any more) and a previous change
that added a IRQD_WAKEUP_STATE check to __disable_irqs() (as that
mechanism has the same problem with shared interrupts as described
above).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

One more minor update, sorry about that.

I've just noticed that in v5 I used the same name (no_suspend) for two different
variables in the same function (in different scopes, but still confusing), so
this is fixed here.

Rafael

---
 include/linux/irqdesc.h |    2 +
 kernel/irq/internals.h  |    4 +-
 kernel/irq/manage.c     |   31 +++---------------
 kernel/irq/pm.c         |   82 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/irq/spurious.c   |   14 +++++++-
 5 files changed, 102 insertions(+), 31 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -382,15 +382,8 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
-void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+void __disable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)
-		    || irqd_has_set(&desc->irq_data, IRQD_WAKEUP_STATE))
-			return;
-		desc->istate |= IRQS_SUSPENDED;
-	}
-
 	if (!desc->depth++)
 		irq_disable(desc);
 }
@@ -402,7 +395,7 @@ static int __disable_irq_nosync(unsigned
 
 	if (!desc)
 		return -EINVAL;
-	__disable_irq(desc, irq, false);
+	__disable_irq(desc, irq);
 	irq_put_desc_busunlock(desc, flags);
 	return 0;
 }
@@ -443,20 +436,8 @@ void disable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(disable_irq);
 
-void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
+void __enable_irq(struct irq_desc *desc, unsigned int irq)
 {
-	if (resume) {
-		if (!(desc->istate & IRQS_SUSPENDED)) {
-			if (!desc->action)
-				return;
-			if (!(desc->action->flags & IRQF_FORCE_RESUME))
-				return;
-			/* Pretend that it got disabled ! */
-			desc->depth++;
-		}
-		desc->istate &= ~IRQS_SUSPENDED;
-	}
-
 	switch (desc->depth) {
 	case 0:
  err_out:
@@ -498,7 +479,7 @@ void enable_irq(unsigned int irq)
 		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
 		goto out;
 
-	__enable_irq(desc, irq, false);
+	__enable_irq(desc, irq);
 out:
 	irq_put_desc_busunlock(desc, flags);
 }
@@ -1079,7 +1060,7 @@ __setup_irq(unsigned int irq, struct irq
 		 */
 
 #define IRQF_MISMATCH \
-	(IRQF_TRIGGER_MASK | IRQF_ONESHOT | IRQF_NO_SUSPEND)
+	(IRQF_TRIGGER_MASK | IRQF_ONESHOT)
 
 		if (!((old->flags & new->flags) & IRQF_SHARED) ||
 		    ((old->flags ^ new->flags) & IRQF_MISMATCH))
@@ -1232,7 +1213,7 @@ __setup_irq(unsigned int irq, struct irq
 	 */
 	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
 		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
-		__enable_irq(desc, irq, false);
+		__enable_irq(desc, irq);
 	}
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -391,10 +391,20 @@ void note_interrupt(unsigned int irq, st
 		 * otherwise the counter becomes a doomsday timer for otherwise
 		 * working systems
 		 */
-		if (time_after(jiffies, desc->last_unhandled + HZ/10))
+		if (time_after(jiffies, desc->last_unhandled + HZ/10)) {
 			desc->irqs_unhandled = 1;
-		else
+		} else {
 			desc->irqs_unhandled++;
+			if (unlikely(desc->istate & IRQS_SUSPENDED)) {
+				/*
+				 * That shouldn't happen.  It means IRQs from
+				 * a device that is supposed to be suspended at
+				 * this point.  Decay faster.
+				 */
+				desc->irqs_unhandled += 999;
+				desc->irq_count += 999;
+			}
+		}
 		desc->last_unhandled = jiffies;
 	}
 
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -20,6 +20,7 @@ struct irq_desc;
  * @handle_irq:		highlevel irq-events handler
  * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
+ * @action_suspended:	suspended irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
@@ -47,6 +48,7 @@ struct irq_desc {
 	irq_preflow_handler_t	preflow_handler;
 #endif
 	struct irqaction	*action;	/* IRQ action list */
+	struct irqaction	*action_suspended;
 	unsigned int		status_use_accessors;
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -63,8 +63,8 @@ enum {
 
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
-extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
-extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+extern void __disable_irq(struct irq_desc *desc, unsigned int irq);
+extern void __enable_irq(struct irq_desc *desc, unsigned int irq);
 
 extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -13,6 +13,50 @@
 
 #include "internals.h"
 
+static void suspend_irq(struct irq_desc *desc, int irq)
+{
+	struct irqaction *action = desc->action;
+	unsigned int no_suspend, flags;
+
+	if (!action)
+		return;
+
+	no_suspend = IRQF_NO_SUSPEND;
+	flags = 0;
+	do {
+		no_suspend &= action->flags;
+		flags |= action->flags;
+		action = action->next;
+	} while (action);
+	if (no_suspend)
+		return;
+
+	desc->istate |= IRQS_SUSPENDED;
+
+	if ((flags & IRQF_NO_SUSPEND) &&
+	    !(desc->istate & IRQS_SPURIOUS_DISABLED)) {
+		struct irqaction *active = NULL;
+		struct irqaction *suspended = NULL;
+		struct irqaction *head = desc->action;
+
+		do {
+			action = head;
+			head = action->next;
+			if (action->flags & IRQF_NO_SUSPEND) {
+				action->next = active;
+				active = action;
+			} else {
+				action->next = suspended;
+				suspended = action;
+			}
+		} while (head);
+		desc->action = active;
+		desc->action_suspended = suspended;
+		return;
+	}
+	__disable_irq(desc, irq);
+}
+
 /**
  * suspend_device_irqs - disable all currently enabled interrupt lines
  *
@@ -30,7 +74,7 @@ void suspend_device_irqs(void)
 		unsigned long flags;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__disable_irq(desc, irq, true);
+		suspend_irq(desc, irq);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
@@ -40,6 +84,40 @@ void suspend_device_irqs(void)
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
+static void resume_irq(struct irq_desc *desc, int irq)
+{
+	if (desc->istate & IRQS_SUSPENDED) {
+		desc->istate &= ~IRQS_SUSPENDED;
+		if (desc->action_suspended) {
+			struct irqaction *action = desc->action;
+
+			while (action->next)
+				action = action->next;
+
+			action->next = desc->action_suspended;
+			desc->action_suspended = NULL;
+
+			if (desc->istate & IRQS_SPURIOUS_DISABLED) {
+				pr_err("Re-enabling emergency disabled IRQ %d\n",
+				       irq);
+				desc->istate &= ~IRQS_SPURIOUS_DISABLED;
+			} else {
+				return;
+			}
+		}
+	} else {
+		if (!desc->action)
+			return;
+
+		if (!(desc->action->flags & IRQF_FORCE_RESUME))
+			return;
+
+		/* Pretend that it got disabled ! */
+		desc->depth++;
+	}
+	__enable_irq(desc, irq);
+}
+
 static void resume_irqs(bool want_early)
 {
 	struct irq_desc *desc;
@@ -54,7 +132,7 @@ static void resume_irqs(bool want_early)
 			continue;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		__enable_irq(desc, irq, true);
+		resume_irq(desc, irq);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 0/3] irq / PM: wakeup interrupt interface for drivers (was: Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED)
  2014-07-29 13:33                         ` Rafael J. Wysocki
@ 2014-07-30 21:46                           ` Rafael J. Wysocki
  2014-07-30 21:51                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Rafael J. Wysocki
                                               ` (2 more replies)
  0 siblings, 3 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-30 21:46 UTC (permalink / raw)
  To: Thomas Gleixner, Peter Zijlstra
  Cc: linux-kernel, Linux PM list, Dmitry Torokhov

On Tuesday, July 29, 2014 03:33:23 PM Rafael J. Wysocki wrote:
> On Tuesday, July 29, 2014 02:46:41 PM Thomas Gleixner wrote:
> > On Tue, 29 Jul 2014, Rafael J. Wysocki wrote:
> > 
> > > On Monday, July 28, 2014 11:53:15 PM Rafael J. Wysocki wrote:
> > > > On Monday, July 28, 2014 02:33:41 PM Thomas Gleixner wrote:
> > > > > On Mon, 28 Jul 2014, Peter Zijlstra wrote:
> > > > > > On Sat, Jul 26, 2014 at 01:49:17PM +0200, Rafael J. Wysocki wrote:
> > > 
> > > [cut]
> > > 
> > > > > So we are not going to make everything a single stupid flag and limit
> > > > > the usability of existing code. We rather go and try to remove the
> > > > > stupid flag before it becomes more wide spread.
> > > > > 
> > > > > And we cannot treat the wakeup thing the same way as the
> > > > > IRQF_NO_SUSPEND flag, because there is hardware where the irq line
> > > > > must be disabled at the normal (non suspend) interrupt controller, and
> > > > > the wake mechanism tells the PM microcontroller to monitor the
> > > > > interrupt line and kick the machine back to life.
> > > > > 
> > > > > So we need to very carefully look at all the existing cases instead of
> > > > > yelling crap and inflicting x86 specific horror on everyone. I said on
> > > > > friday, that I need to look at ALL use cases first and I meant it.
> > > > 
> > > > Regardless of the use case, I don't think it is necessary to manipulate
> > > > the interrupt controller settings before the syscore_suspend stage, because
> > > > if an interrupt happens earlier, we need to handle it pretty much in a normal
> > > > way, unless it has been suspended.
> > > > 
> > > > So I'd argue for not using anything like enable_irq_wake() that goes all
> > > > the way to the hardware in drivers.  Instead, we could allow drivers to
> > > > mark interrupts as "set this up for system wakeup" and really do the setup
> > > > right before putting the platform into the final "suspended" state.  And that
> > > > is totally independend of the IRQF_NO_SUSPEND thing.
> > > 
> > > In addition to that we need the interrupt handler of the driver that requested
> > > the irq to be set up for system wakeup to be invoked after suspend_device_irqs()
> > > in case there are interrupts that should abort the suspend transition or we
> > > can lose a wakeup event.  So whatever interface we decide to use it has to
> > > affect suspend/resume_device_irqs() pretty much in the same way as the
> > > IRQF_NO_SUSPEND flag.
> > 
> > Right, that's a different issue. We probably want that even for the
> > existing irq_wake() users.
> 
> I agree.
> 
> There's one more thing to consider here.  Going forward we'll want to avoid
> touching runtime-suspended devices during system suspend.  Then, system wakeup
> devices will need to mark their IRQs for system wakeup at the runtime suspend
> time and I'm not sure if that's the right time for calling enable_irq_wake().

Taking all of the above into consideration I prepared a prototype that will
follow.  Patch [1/3] is the actual prototype of the core changes, patch [2/3]
uses that to implement suspend-to-idle wakeup for PME and patch [3/3] illustrates
how an existing user of enable_irq_wake() can be modified to use the new stuff.

All is on top of https://patchwork.kernel.org/patch/4643871/ which should apply
on top of -tip (if I'm not mistaken).

I've tested patches [1-2/3] with PME on my MSI Wind.

Comments welcome.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-30 21:46                           ` [PATCH 0/3] irq / PM: wakeup interrupt interface for drivers (was: Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED) Rafael J. Wysocki
@ 2014-07-30 21:51                             ` Rafael J. Wysocki
  2014-07-30 22:56                               ` Thomas Gleixner
  2014-07-30 21:51                             ` [PATCH 2/3] PCI / PM: Make PCIe PME interrupts wake up from "freeze" sleep state Rafael J. Wysocki
  2014-07-30 21:52                             ` [PATCH 3/3] gpio-keys / PM: use enable/disable_device_irq_wake() Rafael J. Wysocki
  2 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-30 21:51 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Device drivers currently use enable_irq_wake() to configure their
interrupts for system wakeup, but that API is not particularly
well suited for this purpose, because it goes directly all the
way to the hardware and attempts to change the IRQ configuration
at the chip level.

The first problem with this approach is that the IRQ subsystem
is not told which interrupt handler is supposed to handle
interrupts from the wakeup line should they occur during system
suspend or resume.  That is problematic if the IRQ is shared
and the other devices sharing it with the wakeup device in question
are not wakeup devices.  In that case their drivers may not be
prepared to handle interrupts after the devices have been powered
down and they may expect suspend_device_irqs() to disable the
interrupt.  For this reason, the IRQ should not be left enabled
by suspend_device_irqs() in that case.  On the other hand, though,
it needs to be left enabled to prevent wakeup events occuring
after suspend_device_irqs() has returned from being lost.

The second problem is that on some platforms enable_irq_wake()
results in moving the IRQ over to a special interrupt controller
whose voltage is not removed in the final platform state.  That
allows the platform to react to wakeup signals from the IRQ while
suspended, but the IRQ stops generating regular interrupts at that
point.  That may lead to the loss of wakeup interrupts if they
come in after calling enable_irq_wake() and before the platform
is put into the final state.  Moreover, if the IRQ is shared
and enable_irq_wake() is called from a device driver's .suspend()
callback, for example, it may prevent interrupts generated by
the other devices sharing the line from being handled.

To address the above issues introduce a new interface that can be
used by drivers to request that IRQs be configured for system
wakeup.  That interface doesn't actually change the hardware
state, but tells the IRQ subsystem that the given interrupt should
or should not be configured for system wakeup at the right time.

First, enable_device_irq_wake() takes two arguments, the IRQ
number and the device cookie used when requesting the IRQ.  The
cookie is used to identify the irqaction that should be used for
handling interrups during system suspend and resume.  Namely,
the (new) IRQF_WAKEUP flag is set for that irqaction (if not
set already) and the (new) wake_needed field of the irq_desc
identified by the first argument is incremented.

Second, suspend_device_irqs() is modified to treat irqactions with
IRQF_WAKEUP set in the same way as irqactions with IRQF_NO_SUSPEND
set.  That is, the handlers of those irqactions are left enabled
for the entire duration of system suspend/resume.

Next, the (new) syscore suspend routine for the IRQ subsystem,
irq_pm_syscore_suspend(), browses all irq_descs and calls
enable_irq_wake() for the ones with wake_needed set.  If that is
successful, the (new) IRQS_WAKE_READY flag is set for the given
irq_desc to indicate that the IRQ should be switched back from
the wakeup mode during resume.

The IRQ subsystem's syscore resume routine, irq_pm_syscore_resume(),
is modified to call disable_irq_wake() for each irq_desc with
IRQS_WAKE_READY set and clears that flag for all of them.

Finally, disable_device_irq_wake() takes the same arguments as
enable_device_irq_wake(), finds the irqaction identified by the
second argument, clears IRQF_WAKEUP for it and decrements
wake_needed for the irq_desc identified by the first argument.

This organization of code guarantees that suspend_device_irqs()
will leave wakeup IRQs enabled, but also will block the execution
of interrupt handlers that should not be invoked going forward,
which allows wakeup interrupts to be handled and prevents possible
driver bugs from being tripped over at the same time.  It also
ensures that IRQs will be reconfigured for wakeup at the point
where that should not disturb any legitimate functionality.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/interrupt.h |   12 ++++++++++
 include/linux/irqdesc.h   |    1 
 kernel/irq/internals.h    |    1 
 kernel/irq/manage.c       |   53 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/pm.c           |   47 ++++++++++++++++++++++++++++++++++++----
 5 files changed, 109 insertions(+), 5 deletions(-)

Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -70,8 +70,10 @@
 #define IRQF_FORCE_RESUME	0x00008000
 #define IRQF_NO_THREAD		0x00010000
 #define IRQF_EARLY_RESUME	0x00020000
+#define IRQF_WAKEUP		0x00040000
 
 #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
+#define IRQF_INHIBIT_SUSPEND	(IRQF_NO_SUSPEND | IRQF_WAKEUP)
 
 /*
  * These values can be returned by request_any_context_irq() and
@@ -350,6 +352,7 @@ static inline void enable_irq_lockdep_ir
 
 /* IRQ wakeup (PM) control: */
 extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
+extern int device_irq_wake(unsigned int irq, void *dev_id, bool enable);
 
 static inline int enable_irq_wake(unsigned int irq)
 {
@@ -361,6 +364,15 @@ static inline int disable_irq_wake(unsig
 	return irq_set_irq_wake(irq, 0);
 }
 
+static inline int enable_device_irq_wake(unsigned int irq, void *dev_id)
+{
+	return device_irq_wake(irq, dev_id, true);
+}
+
+static inline int disable_device_irq_wake(unsigned int irq, void *dev_id)
+{
+	return device_irq_wake(irq, dev_id, false);
+}
 
 #ifdef CONFIG_IRQ_FORCED_THREADING
 extern bool force_irqthreads;
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -53,6 +53,7 @@ struct irq_desc {
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		wake_needed;
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -53,6 +53,7 @@ enum {
 	IRQS_REPLAY		= 0x00000040,
 	IRQS_WAITING		= 0x00000080,
 	IRQS_PENDING		= 0x00000200,
+	IRQS_WAKE_READY		= 0x00000400,
 	IRQS_SUSPENDED		= 0x00000800,
 };
 
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -21,7 +21,7 @@ static void suspend_irq(struct irq_desc
 	if (!action)
 		return;
 
-	no_suspend = IRQF_NO_SUSPEND;
+	no_suspend = IRQF_INHIBIT_SUSPEND;
 	flags = 0;
 	do {
 		no_suspend &= action->flags;
@@ -33,7 +33,7 @@ static void suspend_irq(struct irq_desc
 
 	desc->istate |= IRQS_SUSPENDED;
 
-	if ((flags & IRQF_NO_SUSPEND) &&
+	if ((flags & IRQF_INHIBIT_SUSPEND) &&
 	    !(desc->istate & IRQS_SPURIOUS_DISABLED)) {
 		struct irqaction *active = NULL;
 		struct irqaction *suspended = NULL;
@@ -42,7 +42,7 @@ static void suspend_irq(struct irq_desc
 		do {
 			action = head;
 			head = action->next;
-			if (action->flags & IRQF_NO_SUSPEND) {
+			if (action->flags & IRQF_INHIBIT_SUSPEND) {
 				action->next = active;
 				active = action;
 			} else {
@@ -138,16 +138,53 @@ static void resume_irqs(bool want_early)
 }
 
 /**
- * irq_pm_syscore_ops - enable interrupt lines early
+ * irq_pm_syscore_suspend - configure interrupts for system wakeup
  *
- * Enable all interrupt lines with %IRQF_EARLY_RESUME set.
+ * Configure all interrupt lines with %wake_needed set for system wakeup.
+ */
+static int irq_pm_syscore_suspend(void)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc)
+		if (desc->wake_needed) {
+			int error = enable_irq_wake(irq);
+
+			if (error) {
+				/* Ignore missing callbacks. */
+				if (error != -ENXIO)
+					return error;
+			} else {
+				desc->istate |= IRQS_WAKE_READY;
+			}
+		}
+
+	return 0;
+}
+
+/**
+ * irq_pm_syscore_resume - enable interrupt lines early
+ *
+ * Switch wakeup interrupt lines back to the normal mode of operation and
+ * enable all interrupt lines with %IRQF_EARLY_RESUME set.
  */
 static void irq_pm_syscore_resume(void)
 {
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc)
+		if (desc->istate & IRQS_WAKE_READY) {
+			disable_irq_wake(irq);
+			desc->istate &= ~IRQS_WAKE_READY;
+		}
+
 	resume_irqs(true);
 }
 
 static struct syscore_ops irq_pm_syscore_ops = {
+	.suspend	= irq_pm_syscore_suspend,
 	.resume		= irq_pm_syscore_resume,
 };
 
Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -547,6 +547,59 @@ int irq_set_irq_wake(unsigned int irq, u
 }
 EXPORT_SYMBOL(irq_set_irq_wake);
 
+/**
+ *	device_irq_wake - set/unset device irq PM wakeup requirement
+ *	@irq:		interrupt to control
+ *	@dev_id:	interrupt handler device cookie
+ *	@enable:	whether or not the interrupt should wake up the system
+ *
+ *	Tell the IRQ subsystem whether or not the given interrupt should be used
+ *	for system wakeup from sleep states (like suspend-to-RAM).  This doesn't
+ *	change the hardware configuration, but notes whether or not to change it
+ *	at the syscore stage of system suspend and resume.
+ *
+ *	@dev_id may be NULL if the IRQ has not been requested as a shared one.
+ *	Otherwise, it must be the same as the one used when requesting the IRQ.
+ */
+int device_irq_wake(unsigned int irq, void *dev_id, bool enable)
+{
+	unsigned long flags;
+	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+	struct irqaction *action;
+
+	if (!desc || !desc->action)
+		return -EINVAL;
+
+	if (dev_id) {
+		for (action = desc->action; action; action = action->next)
+			if (action->dev_id == dev_id)
+				break;
+	} else {
+		action = desc->action;
+		if (action->flags & IRQF_SHARED)
+			action = NULL;
+	}
+	if (!action) {
+		irq_put_desc_busunlock(desc, flags);
+		return -ENODEV;
+	}
+	if (enable) {
+		if (!(action->flags & IRQF_WAKEUP)) {
+			action->flags |= IRQF_WAKEUP;
+			desc->wake_needed++;
+		}
+	} else {
+		if (action->flags & IRQF_WAKEUP) {
+			action->flags &= ~IRQF_WAKEUP;
+			desc->wake_needed--;
+		}
+	}
+
+	irq_put_desc_busunlock(desc, flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(device_irq_wake);
+
 /*
  * Internal function that tells the architecture code whether a
  * particular irq has been exclusively allocated or is available


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 2/3] PCI / PM: Make PCIe PME interrupts wake up from "freeze" sleep state
  2014-07-30 21:46                           ` [PATCH 0/3] irq / PM: wakeup interrupt interface for drivers (was: Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED) Rafael J. Wysocki
  2014-07-30 21:51                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Rafael J. Wysocki
@ 2014-07-30 21:51                             ` Rafael J. Wysocki
  2014-07-30 21:52                             ` [PATCH 3/3] gpio-keys / PM: use enable/disable_device_irq_wake() Rafael J. Wysocki
  2 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-30 21:51 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The "freeze" sleep state, also known as suspend-to-idle, is entered
without taking nonboot CPUs offline, right after devices have been
suspended.  It works by waiting for at least one wakeup source object
to become "active" as a result of handling a hardware interrupt.

Of course, interrupts supposed to be able to wake up the system from
suspend-to-idle cannot be disabled by suspend_device_irqs() and their
interrupt handlers must be able to cope with interrupts coming after
all devices have been suspended.  In that case, they only need to
call __pm_wakeup_event() for a single wakeup source object without
trying to access hardware (that will be resumed later as part of
the subsequent system resume).

Make PCIe PME interrupts work this way.

Register an additional wakeup source object for each PCIe PME
service device.  That object will be used to generate wakeups from
suspend-to-idle.

During system suspend, while suspending the PME service for given
port, walk the bus below that port and see if any devices on that
bus are configured for wakeup.  If so, do not disable the PME
interrupt, but call enable_device_irq_wake() for it, set a "suspend
level" variable for the service to "wakeup" and make the interrupt
handler behave in a special way, which is to call __pm_wakeup_event()
with the service's wakeup source object as the first argument and
switch the "suspend level" over to "noirq" when the interrupt is
triggered.

The "suspend level" is cleared and disable_device_irq_wake() is
called for the PME interrupt while resuming the PME service.

This change allows Wake-on-LAN to be used for wakeup from
suspend-to-idle on my MSI Wind tesbed netbook.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/pcie/pme.c |   74 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 63 insertions(+), 11 deletions(-)

Index: linux-pm/drivers/pci/pcie/pme.c
===================================================================
--- linux-pm.orig/drivers/pci/pcie/pme.c
+++ linux-pm/drivers/pci/pcie/pme.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <linux/pcieport_if.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_wakeup.h>
 
 #include "../pci.h"
 #include "portdrv.h"
@@ -41,11 +42,18 @@ static int __init pcie_pme_setup(char *s
 }
 __setup("pcie_pme=", pcie_pme_setup);
 
+enum pme_suspend_level {
+	PME_SUSPEND_NONE = 0,
+	PME_SUSPEND_WAKEUP,
+	PME_SUSPEND_NOIRQ,
+};
+
 struct pcie_pme_service_data {
 	spinlock_t lock;
 	struct pcie_device *srv;
 	struct work_struct work;
-	bool noirq; /* Don't enable the PME interrupt used by this service. */
+	struct wakeup_source *ws;
+	enum pme_suspend_level suspend_level;
 };
 
 /**
@@ -223,7 +231,7 @@ static void pcie_pme_work_fn(struct work
 	spin_lock_irq(&data->lock);
 
 	for (;;) {
-		if (data->noirq)
+		if (data->suspend_level == PME_SUSPEND_NOIRQ)
 			break;
 
 		pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
@@ -234,6 +242,12 @@ static void pcie_pme_work_fn(struct work
 			 */
 			pcie_clear_root_pme_status(port);
 
+			if (data->suspend_level == PME_SUSPEND_WAKEUP) {
+				__pm_wakeup_event(data->ws, 0);
+				data->suspend_level = PME_SUSPEND_NOIRQ;
+				break;
+			}
+
 			spin_unlock_irq(&data->lock);
 			pcie_pme_handle_request(port, rtsta & 0xffff);
 			spin_lock_irq(&data->lock);
@@ -250,7 +264,7 @@ static void pcie_pme_work_fn(struct work
 		spin_lock_irq(&data->lock);
 	}
 
-	if (!data->noirq)
+	if (data->suspend_level != PME_SUSPEND_NOIRQ)
 		pcie_pme_interrupt_enable(port, true);
 
 	spin_unlock_irq(&data->lock);
@@ -360,6 +374,7 @@ static int pcie_pme_probe(struct pcie_de
 	if (ret) {
 		kfree(data);
 	} else {
+		data->ws = wakeup_source_register(dev_name(&srv->device));
 		pcie_pme_mark_devices(port);
 		pcie_pme_interrupt_enable(port, true);
 	}
@@ -367,6 +382,21 @@ static int pcie_pme_probe(struct pcie_de
 	return ret;
 }
 
+static bool pcie_pme_check_wakeup(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	if (!bus)
+		return false;
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (device_may_wakeup(&dev->dev)
+		    || pcie_pme_check_wakeup(dev->subordinate))
+			return true;
+
+	return false;
+}
+
 /**
  * pcie_pme_suspend - Suspend PCIe PME service device.
  * @srv: PCIe service device to suspend.
@@ -375,11 +405,26 @@ static int pcie_pme_suspend(struct pcie_
 {
 	struct pcie_pme_service_data *data = get_service_data(srv);
 	struct pci_dev *port = srv->port;
+	bool wakeup;
 
+	if (device_may_wakeup(&port->dev)) {
+		wakeup = true;
+	} else {
+		down_read(&pci_bus_sem);
+		wakeup = pcie_pme_check_wakeup(port->subordinate);
+		up_read(&pci_bus_sem);
+	}
 	spin_lock_irq(&data->lock);
-	pcie_pme_interrupt_enable(port, false);
-	pcie_clear_root_pme_status(port);
-	data->noirq = true;
+	if (wakeup) {
+		enable_device_irq_wake(srv->irq, srv);
+		data->suspend_level = PME_SUSPEND_WAKEUP;
+	} else {
+		struct pci_dev *port = srv->port;
+
+		pcie_pme_interrupt_enable(port, false);
+		pcie_clear_root_pme_status(port);
+		data->suspend_level = PME_SUSPEND_NOIRQ;
+	}
 	spin_unlock_irq(&data->lock);
 
 	synchronize_irq(srv->irq);
@@ -394,12 +439,16 @@ static int pcie_pme_suspend(struct pcie_
 static int pcie_pme_resume(struct pcie_device *srv)
 {
 	struct pcie_pme_service_data *data = get_service_data(srv);
-	struct pci_dev *port = srv->port;
 
 	spin_lock_irq(&data->lock);
-	data->noirq = false;
-	pcie_clear_root_pme_status(port);
-	pcie_pme_interrupt_enable(port, true);
+	if (data->suspend_level == PME_SUSPEND_NOIRQ) {
+		struct pci_dev *port = srv->port;
+
+		pcie_clear_root_pme_status(port);
+		pcie_pme_interrupt_enable(port, true);
+	}
+	disable_device_irq_wake(srv->irq, srv);
+	data->suspend_level = PME_SUSPEND_NONE;
 	spin_unlock_irq(&data->lock);
 
 	return 0;
@@ -411,9 +460,12 @@ static int pcie_pme_resume(struct pcie_d
  */
 static void pcie_pme_remove(struct pcie_device *srv)
 {
+	struct pcie_pme_service_data *data = get_service_data(srv);
+
 	pcie_pme_suspend(srv);
 	free_irq(srv->irq, srv);
-	kfree(get_service_data(srv));
+	wakeup_source_unregister(data->ws);
+	kfree(data);
 }
 
 static struct pcie_port_service_driver pcie_pme_driver = {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 3/3] gpio-keys / PM: use enable/disable_device_irq_wake()
  2014-07-30 21:46                           ` [PATCH 0/3] irq / PM: wakeup interrupt interface for drivers (was: Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED) Rafael J. Wysocki
  2014-07-30 21:51                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Rafael J. Wysocki
  2014-07-30 21:51                             ` [PATCH 2/3] PCI / PM: Make PCIe PME interrupts wake up from "freeze" sleep state Rafael J. Wysocki
@ 2014-07-30 21:52                             ` Rafael J. Wysocki
  2 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-30 21:52 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Use enable/disable_device_irq_wake() instead of enable/disable_irq_wake(),
respectively, to prepare the gpio-keys interrupt for waking up the
system from sleep states.

That is safe with respect to shared interrupts and allows the IRQ
subsystem to take care of IRQ configuration at the right time
instead of going all the way to the hardware and reconfiguring it
right away.  It also allows gpio-keys to wake up the system from
the "freeze" sleep state (suspend-to-idle).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/input/keyboard/gpio_keys.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-pm/drivers/input/keyboard/gpio_keys.c
===================================================================
--- linux-pm.orig/drivers/input/keyboard/gpio_keys.c
+++ linux-pm/drivers/input/keyboard/gpio_keys.c
@@ -788,7 +788,7 @@ static int gpio_keys_suspend(struct devi
 		for (i = 0; i < ddata->pdata->nbuttons; i++) {
 			struct gpio_button_data *bdata = &ddata->data[i];
 			if (bdata->button->wakeup)
-				enable_irq_wake(bdata->irq);
+				enable_device_irq_wake(bdata->irq, bdata);
 		}
 	} else {
 		mutex_lock(&input->mutex);
@@ -811,7 +811,7 @@ static int gpio_keys_resume(struct devic
 		for (i = 0; i < ddata->pdata->nbuttons; i++) {
 			struct gpio_button_data *bdata = &ddata->data[i];
 			if (bdata->button->wakeup)
-				disable_irq_wake(bdata->irq);
+				disable_device_irq_wake(bdata->irq, bdata);
 		}
 	} else {
 		mutex_lock(&input->mutex);


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-30 21:51                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Rafael J. Wysocki
@ 2014-07-30 22:56                               ` Thomas Gleixner
  2014-07-31  0:12                                 ` Thomas Gleixner
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-30 22:56 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Wed, 30 Jul 2014, Rafael J. Wysocki wrote:

> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> Device drivers currently use enable_irq_wake() to configure their
> interrupts for system wakeup, but that API is not particularly
> well suited for this purpose, because it goes directly all the
> way to the hardware and attempts to change the IRQ configuration
> at the chip level.
>
> The first problem with this approach is that the IRQ subsystem
> is not told which interrupt handler is supposed to handle
> interrupts from the wakeup line should they occur during system
> suspend or resume.  That is problematic if the IRQ is shared
> and the other devices sharing it with the wakeup device in question
> are not wakeup devices.  In that case their drivers may not be
> prepared to handle interrupts after the devices have been powered
> down and they may expect suspend_device_irqs() to disable the
> interrupt.  For this reason, the IRQ should not be left enabled
> by suspend_device_irqs() in that case.  On the other hand, though,
> it needs to be left enabled to prevent wakeup events occuring
> after suspend_device_irqs() has returned from being lost.

I really disagree here. The API was designed at a point where it was
very well suited for the purpose. At least from the POV of the
hardware which caused that infrastructure to be built. Looking at it
10 years later with a different set of hardware requirements in mind
does not make it invalid.

That's really not the way it works. x86 didn't give a rats ass 10
years ago when this was introduced, simply because there was no x86
hardware which could support this or if there was hardware nobody was
interested to do so. Coming in 10 years after the fact and telling
those who designed and used this for 10 years, that it's a design
failure is more than inappropriate.

There is nothing wrong to point out that existing infrastructure is
not able to handle the new requirements of differently (and partially
ass backwards) designed hardware. But that's different from stating:

   "that API is not particularly well suited for the purpose"

What's worse is that you are merily fiddling around in the existing
code without doing a proper analysis of the existing semantics and a
proper description of your new semantics.

Before this code changes in any way I want to see:

 1) a description of the existing semantics and their background

 2) a description of the short comings of the existing semantics w/o
    considering the new fangled (hardware) use cases

 3) a description how to mitigate the short comings described in #2
    w/o breaking the world and some more and introducing hard to
    decode regressions

 4) a description why the improvements introduced by #3 are not
    sufficient for the new fangled (hardware) use cases

 5) a description how to mitigate the short comings described in #4
    w/o breaking the world and some more and introducing hard to
    decode regressions

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-30 22:56                               ` Thomas Gleixner
@ 2014-07-31  0:12                                 ` Thomas Gleixner
  2014-07-31  2:14                                   ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-31  0:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thu, 31 Jul 2014, Thomas Gleixner wrote:
> On Wed, 30 Jul 2014, Rafael J. Wysocki wrote:
> Before this code changes in any way I want to see:
> 
>  1) a description of the existing semantics and their background
> 
>  2) a description of the short comings of the existing semantics w/o
>     considering the new fangled (hardware) use cases
> 
>  3) a description how to mitigate the short comings described in #2
>     w/o breaking the world and some more and introducing hard to
>     decode regressions
> 
>  4) a description why the improvements introduced by #3 are not
>     sufficient for the new fangled (hardware) use cases
> 
>  5) a description how to mitigate the short comings described in #4
>     w/o breaking the world and some more and introducing hard to
>     decode regressions

Aside of that I want to see a coherent explanation why a shared MSI
interrupt makes any sense at all.

25:  1 <....> 0   PCI-MSI-edge      aerdrv, PCIe PME

AFAICT, that's the primary reason why you insist to support wakeup
sources on shared irq lines. And to be honest, it's utter bullshit.

The whole purpose of MSI is to avoid interrupt sharing, but of course
if that particular interrupt source has two potential causes, i.e. the
AER and the PME one and the stupid hardware does not support different
vectors or the drivers are not able to do so, it's conveniant to make
them shared instead of thinking about them what they really are:

 Separate interrupts on a secondary interrupt controller connected to
 the primary (MSI) one.

That's what is in fact. Simply because you can enable/disable them at
the same IP block quite contrary to stuff which is physically shared
by hard wired electrical connections.

Slapping IRQF_SHARED on that and then whining about shortcomings of
the core code is way simpler than sitting down and think about
actual topologies, right?

Most architectures aside of x86 have long ago realized that and the
core has all infrastructure available to deal with irq topologies. You
just have to utilize it.

Thanks,

	tglx





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31  0:12                                 ` Thomas Gleixner
@ 2014-07-31  2:14                                   ` Rafael J. Wysocki
  2014-07-31 10:44                                     ` Thomas Gleixner
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-31  2:14 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thursday, July 31, 2014 02:12:11 AM Thomas Gleixner wrote:
> On Thu, 31 Jul 2014, Thomas Gleixner wrote:
> > On Wed, 30 Jul 2014, Rafael J. Wysocki wrote:
> > Before this code changes in any way I want to see:
> > 
> >  1) a description of the existing semantics and their background

On that one, the meaning of IRQF_NO_SUSPEND is quite simple to me.

If it is set (for the first irqaction in a given irq_desc)
suspend_device_irqs() will leave that IRQ alone (that is, will not
disable it and will not mark it as IRQS_SUSPENDED).

As a result, if an interrupt for that irq_desc happens after
suspend_device_irqs(), the interrupt handlers from all of its irqactions
will be invoked.

So this basically means "ignore that IRQ" to suspend_device_irqs() and
that's its *only* meaning.

It's primary users are timers, per-CPU IRQs, ACPI SCI, via-pmu, Xen.
There also is a bunch of drivers that use it for wakeup stuff, but they
shouldn't in my opinion.

The background I'm aware of was primarily timers (we wanted to be able
to msleep() during PCI PM transitions in the noirq phases of system suspend
and resume among other things), and we want per-CPU stuff to work all the
time too.  ACPI uses it for signaling various types of events (including
battery and thermal) that need to be handled all the time.  I'm not sure
why Xen needs it exactly, but that seems to be IPI-related.

The current handling of IRQF_NO_SUSPEND has a problem vs shared interrupts
which I've tried to address by https://patchwork.kernel.org/patch/4643871/
and which is described in the changelog of that patch.  Unfortunately, some
existing users pass IRQF_SHARED along with IRQF_NO_SUSPEND which is the main
motivation for that.  Many of them use it for wakeup purposes, but for one
example (that doesn't use it for wakeup only) the ACPI SCI is shareable by
design.

To be continued.


> >  2) a description of the short comings of the existing semantics w/o
> >     considering the new fangled (hardware) use cases
> > 
> >  3) a description how to mitigate the short comings described in #2
> >     w/o breaking the world and some more and introducing hard to
> >     decode regressions
> > 
> >  4) a description why the improvements introduced by #3 are not
> >     sufficient for the new fangled (hardware) use cases
> > 
> >  5) a description how to mitigate the short comings described in #4
> >     w/o breaking the world and some more and introducing hard to
> >     decode regressions
> 
> Aside of that I want to see a coherent explanation why a shared MSI
> interrupt makes any sense at all.
> 
> 25:  1 <....> 0   PCI-MSI-edge      aerdrv, PCIe PME
>
> AFAICT, that's the primary reason why you insist to support wakeup
> sources on shared irq lines. And to be honest, it's utter bullshit.

No, this isn't the primary reason.

Here's /proc/interrupts from my MSI Wind system and, as you can see,
PCIe PME are (a) not MSI and (b) shared with some interesting things
(USB, WiFi and the GPU).

           CPU0       CPU1       
  0:      26321          0   IO-APIC-edge      timer
  1:        379          0   IO-APIC-edge      i8042
  7:          6          0   IO-APIC-edge    
  9:         59          0   IO-APIC-fasteoi   acpi
 12:       2824          0   IO-APIC-edge      i8042
 14:       9074          0   IO-APIC-edge      ata_piix
 15:          0          0   IO-APIC-edge      ata_piix
 16:       5217          0   IO-APIC-fasteoi   PCIe PME, uhci_hcd:usb4, i915
 17:      13964          0   IO-APIC-fasteoi   PCIe PME, rtl818x_pci
 18:          0          0   IO-APIC-fasteoi   uhci_hcd:usb3
 19:          0          0   IO-APIC-fasteoi   uhci_hcd:usb2
 23:       9402          0   IO-APIC-fasteoi   uhci_hcd:usb1, ehci_hcd:usb5
 40:         61          0   PCI-MSI-edge      eth0
 41:        102          0   PCI-MSI-edge      snd_hda_intel
NMI:          0          0   Non-maskable interrupts
LOC:      18538      30831   Local timer interrupts
SPU:          0          0   Spurious interrupts
PMI:          0          0   Performance monitoring interrupts
IWI:          6          0   IRQ work interrupts
RTR:          0          0   APIC ICR read retries
RES:       5277       6121   Rescheduling interrupts
CAL:         92        106   Function call interrupts
TLB:        317        302   TLB shootdowns
TRM:          0          0   Thermal event interrupts
THR:          0          0   Threshold APIC interrupts
MCE:          0          0   Machine check exceptions
MCP:          5          5   Machine check polls
ERR:          6
MIS:          0


> The whole purpose of MSI is to avoid interrupt sharing, but of course
> if that particular interrupt source has two potential causes, i.e. the
> AER and the PME one and the stupid hardware does not support different
> vectors or the drivers are not able to do so, it's conveniant to make
> them shared instead of thinking about them what they really are:
> 
>  Separate interrupts on a secondary interrupt controller connected to
>  the primary (MSI) one.
> 
> That's what is in fact. Simply because you can enable/disable them at
> the same IP block quite contrary to stuff which is physically shared
> by hard wired electrical connections.

I agree with the above.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31  2:14                                   ` Rafael J. Wysocki
@ 2014-07-31 10:44                                     ` Thomas Gleixner
  2014-07-31 18:36                                       ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-31 10:44 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:

> On Thursday, July 31, 2014 02:12:11 AM Thomas Gleixner wrote:
> > On Thu, 31 Jul 2014, Thomas Gleixner wrote:
> > > On Wed, 30 Jul 2014, Rafael J. Wysocki wrote:
> > > Before this code changes in any way I want to see:
> > > 
> > >  1) a description of the existing semantics and their background
> 
> On that one, the meaning of IRQF_NO_SUSPEND is quite simple to me.
> 
> If it is set (for the first irqaction in a given irq_desc)
> suspend_device_irqs() will leave that IRQ alone (that is, will not
> disable it and will not mark it as IRQS_SUSPENDED).
> 
> As a result, if an interrupt for that irq_desc happens after
> suspend_device_irqs(), the interrupt handlers from all of its irqactions
> will be invoked.
> 
> So this basically means "ignore that IRQ" to suspend_device_irqs() and
> that's its *only* meaning.
> 
> It's primary users are timers, per-CPU IRQs, ACPI SCI, via-pmu, Xen.
> There also is a bunch of drivers that use it for wakeup stuff, but they
> shouldn't in my opinion.

Indeed.

> The background I'm aware of was primarily timers (we wanted to be able
> to msleep() during PCI PM transitions in the noirq phases of system suspend
> and resume among other things), and we want per-CPU stuff to work all the
> time too.  ACPI uses it for signaling various types of events (including
> battery and thermal) that need to be handled all the time.  I'm not sure
> why Xen needs it exactly, but that seems to be IPI-related.

So none of these interrupts is used to abort suspend or wakeup. They
are kept functional because they are required for the suspend/resume
transitions itself.

What's this PCIe PME handler doing? Is it required functionality for
the suspend/resume path or is it a wakeup/abort mechanism.

> The current handling of IRQF_NO_SUSPEND has a problem vs shared interrupts
> which I've tried to address by https://patchwork.kernel.org/patch/4643871/
> and which is described in the changelog of that patch.  Unfortunately, some
> existing users pass IRQF_SHARED along with IRQF_NO_SUSPEND which is the main
> motivation for that.  Many of them use it for wakeup purposes, but for one
> example (that doesn't use it for wakeup only) the ACPI SCI is shareable by
> design.

So many of them use it for wakeup purposes. Why so and how is that
supposed to work?

The mechanism for wakeup sources is:

    1) Lazy disable the interrupt

    2) Do the transition into suspend with interrupts enabled

    3) Check whether one of the wakeup sources has triggered. If yes,
       abort. Otherwise suspend.

The ones marked IRQF_NO_SUSPEND are not part of the above scheme,
because they are not checked. So they use different mechanisms to
abort the suspend?

> > Aside of that I want to see a coherent explanation why a shared MSI
> > interrupt makes any sense at all.
> > 
> > 25:  1 <....> 0   PCI-MSI-edge      aerdrv, PCIe PME
> >
> > AFAICT, that's the primary reason why you insist to support wakeup
> > sources on shared irq lines. And to be honest, it's utter bullshit.
> 
> No, this isn't the primary reason.
> 
> Here's /proc/interrupts from my MSI Wind system and, as you can see,
> PCIe PME are (a) not MSI and (b) shared with some interesting things
> (USB, WiFi and the GPU).

>  16:       5217          0   IO-APIC-fasteoi   PCIe PME, uhci_hcd:usb4, i915
>  17:      13964          0   IO-APIC-fasteoi   PCIe PME, rtl818x_pci

So the obvious question is: WHY are they not using MSI?

Just because MSI fucked up the MSI configuration of the device or is
there any sane explanation for it?
 
Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 10:44                                     ` Thomas Gleixner
@ 2014-07-31 18:36                                       ` Rafael J. Wysocki
  2014-07-31 20:12                                           ` Alan Stern
                                                           ` (2 more replies)
  0 siblings, 3 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-31 18:36 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thursday, July 31, 2014 12:44:24 PM Thomas Gleixner wrote:
> On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> 
> > On Thursday, July 31, 2014 02:12:11 AM Thomas Gleixner wrote:
> > > On Thu, 31 Jul 2014, Thomas Gleixner wrote:
> > > > On Wed, 30 Jul 2014, Rafael J. Wysocki wrote:
> > > > Before this code changes in any way I want to see:
> > > > 
> > > >  1) a description of the existing semantics and their background
> > 
> > On that one, the meaning of IRQF_NO_SUSPEND is quite simple to me.
> > 
> > If it is set (for the first irqaction in a given irq_desc)
> > suspend_device_irqs() will leave that IRQ alone (that is, will not
> > disable it and will not mark it as IRQS_SUSPENDED).
> > 
> > As a result, if an interrupt for that irq_desc happens after
> > suspend_device_irqs(), the interrupt handlers from all of its irqactions
> > will be invoked.
> > 
> > So this basically means "ignore that IRQ" to suspend_device_irqs() and
> > that's its *only* meaning.
> > 
> > It's primary users are timers, per-CPU IRQs, ACPI SCI, via-pmu, Xen.
> > There also is a bunch of drivers that use it for wakeup stuff, but they
> > shouldn't in my opinion.
> 
> Indeed.
> 
> > The background I'm aware of was primarily timers (we wanted to be able
> > to msleep() during PCI PM transitions in the noirq phases of system suspend
> > and resume among other things), and we want per-CPU stuff to work all the
> > time too.  ACPI uses it for signaling various types of events (including
> > battery and thermal) that need to be handled all the time.  I'm not sure
> > why Xen needs it exactly, but that seems to be IPI-related.
> 
> So none of these interrupts is used to abort suspend or wakeup.

ACPI can do that too, but it's just one of its functions.

> They are kept functional because they are required for the suspend/resume
> transitions itself.

They are for things that need to work throughout system suspend/resume and
which are not wakeup.

> What's this PCIe PME handler doing? Is it required functionality for
> the suspend/resume path or is it a wakeup/abort mechanism.

It is a wakeup/abort mechanism.

> > The current handling of IRQF_NO_SUSPEND has a problem vs shared interrupts
> > which I've tried to address by https://patchwork.kernel.org/patch/4643871/
> > and which is described in the changelog of that patch.

And before we enter the wakeup handling slippery slope, let me make a note
that this problem is bothering me quite a bit at the moment.  In my opinion
we need to address it somehow regardless of the wakeup issues and I'm not sure
if failing __setup_irq() when there's a mismatch (that is, there are existing
actions for the given irq_desc and their IRQF_NO_SUSPEND settings are not
consistent with the new one) is the right way to do that, because it may make
things behave a bit randomly (it will always fail the second guy, but that need
not be the one who's requested IRQF_NO_SUSPEND and it depends on the ordering
between them).

I had a couple of ideas, but none of them was particularly clean.  Ideally,
IRQF_NO_SUSPEND should always be requested without IRQF_SHARED, but I'm
afraid that we can't really do that for the ACPI SCI, because that may
cause problems to happen on some older systems where that interrupt is
actually shared.  On all systems I have immediate access to it isn't shared,
but I remember seeing some where it was.  On those systems the ACPI SCI itself
would not be affected, because it is requested quite early during system init,
but the other guys wanting to share the line with it would take a hit.

One thing I was thinking about was to return an error from suspend_device_irqs()
if there was a mismatch between IRQF_NO_SUSPEND settings for different irqactions
in the same irq_desc.  That would make system suspend fail on systems where it
is potentially unsafe, but at least any other functionality would not be affected.

> > Unfortunately, some
> > existing users pass IRQF_SHARED along with IRQF_NO_SUSPEND which is the main
> > motivation for that.  Many of them use it for wakeup purposes, but for one
> > example (that doesn't use it for wakeup only) the ACPI SCI is shareable by
> > design.
> 
> So many of them use it for wakeup purposes. Why so and how is that
> supposed to work?

Quite frankly, I'm not sure why they use it.  These are mostly drivers I'm not
familiar with on platforms I'm not familiar with.  My guess is that the lazy
disable mechanism is not sufficient for them for some reason.

> The mechanism for wakeup sources is:
> 
>     1) Lazy disable the interrupt
> 
>     2) Do the transition into suspend with interrupts enabled
> 
>     3) Check whether one of the wakeup sources has triggered. If yes,
>        abort. Otherwise suspend.
> 
> The ones marked IRQF_NO_SUSPEND are not part of the above scheme,
> because they are not checked. So they use different mechanisms to
> abort the suspend?

Well, if you look at the tegra_kbc driver, for example, it uses both
enable_irq_wake() and IRQF_NO_SUSPEND.  Why it does that, I don't know.

Other ones seem to be using pm_wakeup_event(), but that will only abort
suspend when it is enabled upfront (it need not be).  Moreover, it wasn't
intended to be used that way.

It generally looks like things are used not as intended in the wakeup
area, sadly.  Perhaps that's my fault, because I wasn't looking carefully
enough every time, but I wasn't directly involved in any of them IIRC.

I guess that's an opportunity to clean that up ...

And now there's one more piece of it which is suspend-to-idle (aka "freeze").
That doesn't go all the way to syscore_suspend(), but basically stops after
finishing the "noirq" phase of suspending devices.  Then, it idles the CPUs
and waits for interrupts that will take them out of idle.  Only some of those
interrupts are wakeup events, so it only resumes when __pm_wakeup_event() or
__pm_relax() is called in the process of handling the interrupts.

Of course, it could be implemented differently, but that was the simplest
way to do that.  It still can be changed, but I'd really like it not to have
to go through all of the disabling nonboot CPUs and sysfore_suspend(), because
that simply isn't necessary (and it avoids a metric ton of problems with CPU
offline/online).  And of course, it has to work on x86 too.

On x86, however, enable_irq_wake() always returns -ENXIO and nothing happens,
because the chip doesn't have an ->irq_set_wake callback and doesn't flag
itself as IRQCHIP_SKIP_SET_WAKE, so even if we found a way to do something
equivalent to check_wakeup_irqs() for suspend-to-idle, it still wouldn't work
on x86 for that reason.

Also, I wouldn't like to make suspend-to-idle more special than it really has
to be, so it would be ideal if it could use as much of the same mechanics as
regular platform-supported suspend as reasonably possible.  The handling of
wakeup events is crucial here, because it's needed to make suspend-to-idle
really work and I'd like to make it as consistent as possible with the
"regular" suspend.  Now, that can be done in a couple of ways and some of
them may be better than others for reasons that aren't known to me.

My current case at hand is to make PCIe MSI wake systems up from suspend-to-idle
(it actually works for the "regular" suspend most of the time), but that's part
of a bigger picture, of course.

> > > Aside of that I want to see a coherent explanation why a shared MSI
> > > interrupt makes any sense at all.
> > > 
> > > 25:  1 <....> 0   PCI-MSI-edge      aerdrv, PCIe PME
> > >
> > > AFAICT, that's the primary reason why you insist to support wakeup
> > > sources on shared irq lines. And to be honest, it's utter bullshit.
> > 
> > No, this isn't the primary reason.
> > 
> > Here's /proc/interrupts from my MSI Wind system and, as you can see,
> > PCIe PME are (a) not MSI and (b) shared with some interesting things
> > (USB, WiFi and the GPU).
> 
> >  16:       5217          0   IO-APIC-fasteoi   PCIe PME, uhci_hcd:usb4, i915
> >  17:      13964          0   IO-APIC-fasteoi   PCIe PME, rtl818x_pci
> 
> So the obvious question is: WHY are they not using MSI?
> 
> Just because MSI fucked up the MSI configuration of the device or is
> there any sane explanation for it?

It looks like they don't use MSI on that machine at all, so perhaps the chipset
is not capable of that or similar.  I'm not sure why it matters, though.  The
system shipped like that and with Linux on it, so we should be able to
handle it regardless.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 18:36                                       ` Rafael J. Wysocki
@ 2014-07-31 20:12                                           ` Alan Stern
  2014-07-31 22:16                                         ` Thomas Gleixner
  2014-07-31 22:54                                         ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Thomas Gleixner
  2 siblings, 0 replies; 75+ messages in thread
From: Alan Stern @ 2014-07-31 20:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Thomas Gleixner, Peter Zijlstra, linux-kernel, Linux PM list,
	Dmitry Torokhov

On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:

> And before we enter the wakeup handling slippery slope, let me make a note
> that this problem is bothering me quite a bit at the moment.  In my opinion
> we need to address it somehow regardless of the wakeup issues and I'm not sure
> if failing __setup_irq() when there's a mismatch (that is, there are existing
> actions for the given irq_desc and their IRQF_NO_SUSPEND settings are not
> consistent with the new one) is the right way to do that, because it may make
> things behave a bit randomly (it will always fail the second guy, but that need
> not be the one who's requested IRQF_NO_SUSPEND and it depends on the ordering
> between them).

Pardon me for sticking my nose into the middle of the conversation, but
here's what it looks like to me:

The entire no_irq phase of suspend/resume is starting to seem like a
mistake.  We should never have done it.  Alternatively, it might be
okay to disable _all_ interrupts during the no_irq phase provided they
are then _all_ enabled again before entering the sysdev and
platform-specific parts of suspend (or the final freeze).

As I understand it, the idea behind the no_irq phase was to make life 
easier for drivers.  They wouldn't have to worry about strange things 
cropping up while they're in the middle of powering down their devices 
or afterward.

Well, guess what?  It turns out that they do have to worry about it
after all.  Timers can still fire during suspend transitions, and if an
IRQ line is shared with a wakeup source then it won't be disabled.

The fact is, drivers should not rely on disabled interrupts to prevent
untimely interrupt requests or wakeups.  They should configure their
devices not to generate any interrupt requests at all, apart from
wakeup requests.  And their interrupt handlers shouldn't mind being
invoked for a shared IRQ, even after their devices are turned off.

Any driver that leaves its device capable of generating non-wakeup
interrupt requests during suspend, and relies on interrupts being
globally disabled to avoid problems, is most likely broken.  Yes, it
may be acceptable in cases where the IRQ line isn't shared, but it's
still a bad design.

Alan Stern


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
@ 2014-07-31 20:12                                           ` Alan Stern
  0 siblings, 0 replies; 75+ messages in thread
From: Alan Stern @ 2014-07-31 20:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Thomas Gleixner, Peter Zijlstra, linux-kernel, Linux PM list,
	Dmitry Torokhov

On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:

> And before we enter the wakeup handling slippery slope, let me make a note
> that this problem is bothering me quite a bit at the moment.  In my opinion
> we need to address it somehow regardless of the wakeup issues and I'm not sure
> if failing __setup_irq() when there's a mismatch (that is, there are existing
> actions for the given irq_desc and their IRQF_NO_SUSPEND settings are not
> consistent with the new one) is the right way to do that, because it may make
> things behave a bit randomly (it will always fail the second guy, but that need
> not be the one who's requested IRQF_NO_SUSPEND and it depends on the ordering
> between them).

Pardon me for sticking my nose into the middle of the conversation, but
here's what it looks like to me:

The entire no_irq phase of suspend/resume is starting to seem like a
mistake.  We should never have done it.  Alternatively, it might be
okay to disable _all_ interrupts during the no_irq phase provided they
are then _all_ enabled again before entering the sysdev and
platform-specific parts of suspend (or the final freeze).

As I understand it, the idea behind the no_irq phase was to make life 
easier for drivers.  They wouldn't have to worry about strange things 
cropping up while they're in the middle of powering down their devices 
or afterward.

Well, guess what?  It turns out that they do have to worry about it
after all.  Timers can still fire during suspend transitions, and if an
IRQ line is shared with a wakeup source then it won't be disabled.

The fact is, drivers should not rely on disabled interrupts to prevent
untimely interrupt requests or wakeups.  They should configure their
devices not to generate any interrupt requests at all, apart from
wakeup requests.  And their interrupt handlers shouldn't mind being
invoked for a shared IRQ, even after their devices are turned off.

Any driver that leaves its device capable of generating non-wakeup
interrupt requests during suspend, and relies on interrupts being
globally disabled to avoid problems, is most likely broken.  Yes, it
may be acceptable in cases where the IRQ line isn't shared, but it's
still a bad design.

Alan Stern


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 20:12                                           ` Alan Stern
  (?)
@ 2014-07-31 21:04                                           ` Rafael J. Wysocki
  2014-07-31 23:41                                             ` Thomas Gleixner
  -1 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-07-31 21:04 UTC (permalink / raw)
  To: Alan Stern
  Cc: Thomas Gleixner, Peter Zijlstra, linux-kernel, Linux PM list,
	Dmitry Torokhov

On Thursday, July 31, 2014 04:12:55 PM Alan Stern wrote:
> On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> 
> > And before we enter the wakeup handling slippery slope, let me make a note
> > that this problem is bothering me quite a bit at the moment.  In my opinion
> > we need to address it somehow regardless of the wakeup issues and I'm not sure
> > if failing __setup_irq() when there's a mismatch (that is, there are existing
> > actions for the given irq_desc and their IRQF_NO_SUSPEND settings are not
> > consistent with the new one) is the right way to do that, because it may make
> > things behave a bit randomly (it will always fail the second guy, but that need
> > not be the one who's requested IRQF_NO_SUSPEND and it depends on the ordering
> > between them).
> 
> Pardon me for sticking my nose into the middle of the conversation, but
> here's what it looks like to me:
> 
> The entire no_irq phase of suspend/resume is starting to seem like a
> mistake.  We should never have done it.

In hindsight, I totally agree.  Question is what we can do about it now.

> Alternatively, it might be
> okay to disable _all_ interrupts during the no_irq phase provided they
> are then _all_ enabled again before entering the sysdev and
> platform-specific parts of suspend (or the final freeze).
> 
> As I understand it, the idea behind the no_irq phase was to make life 
> easier for drivers.  They wouldn't have to worry about strange things 
> cropping up while they're in the middle of powering down their devices 
> or afterward.

Actually, it was done to prevent bugs in PCI drivers from crashing boxes
during suspend and resume IIRC.

When we were debugging PME with Peter a couple of days ago I asked him to
comment out suspend/resume_device_irqs() experimentally and see what
happens and it turned out that the system didn't resume any more.  It looks
like it still prevents problems from happening, then.

> Well, guess what?  It turns out that they do have to worry about it
> after all.  Timers can still fire during suspend transitions, and if an
> IRQ line is shared with a wakeup source then it won't be disabled.

OK, that's where it starts to get really messy.  If people were not using
IRQF_NO_SUSPEND excessively, the situation would be that almost all IRQ
lines, including the ones of wakeup sources, would be disabled by
suspend_device_irqs() (the exceptions being really low-level stuff that
needs to run during suspend/resume and not sharing IRQ lines).

The wakeup would then be handled with the help of the lazy disable mechanism,
that is, enable_irq_wake() from the driver and then check_wakeup_irqs() in
syscore_suspend().

However, what we have now is a mixture of different mechanisms often used
for things they had not been intended for.

But I agree that we'd probably have avoided it if suspend_device_irqs() hadn't
existed.

> The fact is, drivers should not rely on disabled interrupts to prevent
> untimely interrupt requests or wakeups.  They should configure their
> devices not to generate any interrupt requests at all, apart from
> wakeup requests.  And their interrupt handlers shouldn't mind being
> invoked for a shared IRQ, even after their devices are turned off.
> 
> Any driver that leaves its device capable of generating non-wakeup
> interrupt requests during suspend, and relies on interrupts being
> globally disabled to avoid problems, is most likely broken.  Yes, it
> may be acceptable in cases where the IRQ line isn't shared, but it's
> still a bad design.

Totally agreed.

So how can we eliminate the noirq phase in a workable way?

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 18:36                                       ` Rafael J. Wysocki
  2014-07-31 20:12                                           ` Alan Stern
@ 2014-07-31 22:16                                         ` Thomas Gleixner
  2014-08-01  0:08                                           ` Rafael J. Wysocki
  2014-07-31 22:54                                         ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Thomas Gleixner
  2 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-31 22:16 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> On Thursday, July 31, 2014 12:44:24 PM Thomas Gleixner wrote:
> > What's this PCIe PME handler doing? Is it required functionality for
> > the suspend/resume path or is it a wakeup/abort mechanism.
> 
> It is a wakeup/abort mechanism.

So why is it using IRQF_NO_SUSPEND in the first place? Just because x86
does not have irq wake implemented or flagged it's irq chips with
IRQCHIP_SKIP_SET_WAKE. Right, so instead of thinking about a proper
solution driver folks just slap the next available thing on it w/o
thinking about the consequences. But, thats partly our own fault due
to lack of proper documentation.
 
> And before we enter the wakeup handling slippery slope, let me make a note
> that this problem is bothering me quite a bit at the moment.  In my opinion
> we need to address it somehow regardless of the wakeup issues and I'm not sure
> if failing __setup_irq() when there's a mismatch (that is, there are existing
> actions for the given irq_desc and their IRQF_NO_SUSPEND settings are not
> consistent with the new one) is the right way to do that, because it may make
> things behave a bit randomly (it will always fail the second guy, but that need
> not be the one who's requested IRQF_NO_SUSPEND and it depends on the ordering
> between them).

I totally agree that we want to fix it and I'm going to help. Though I
really wanted to have a clear picture of the stuff before making
decisions.
 
> I had a couple of ideas, but none of them was particularly clean.  Ideally,
> IRQF_NO_SUSPEND should always be requested without IRQF_SHARED, but I'm
> afraid that we can't really do that for the ACPI SCI, because that may
> cause problems to happen on some older systems where that interrupt is
> actually shared.  On all systems I have immediate access to it isn't shared,
> but I remember seeing some where it was.  On those systems the ACPI SCI itself
> would not be affected, because it is requested quite early during system init,
> but the other guys wanting to share the line with it would take a hit.
> 
> One thing I was thinking about was to return an error from suspend_device_irqs()
> if there was a mismatch between IRQF_NO_SUSPEND settings for different irqactions
> in the same irq_desc.  That would make system suspend fail on systems where it
> is potentially unsafe, but at least any other functionality would not be affected.
>

That's one possible solution. See below.

> > So many of them use it for wakeup purposes. Why so and how is that
> > supposed to work?
> 
> Quite frankly, I'm not sure why they use it.  These are mostly drivers I'm not
> familiar with on platforms I'm not familiar with.  My guess is that the lazy
> disable mechanism is not sufficient for them for some reason.

Looking at a few of them I fear the reason is that the developer did
not understand the wakeup mechanism at all. Again that's probably our
fault, because the whole business including the irq part lacks proper
in depth documentation.

> > The mechanism for wakeup sources is:
> > 
> >     1) Lazy disable the interrupt
> > 
> >     2) Do the transition into suspend with interrupts enabled
> > 
> >     3) Check whether one of the wakeup sources has triggered. If yes,
> >        abort. Otherwise suspend.
> > 
> > The ones marked IRQF_NO_SUSPEND are not part of the above scheme,
> > because they are not checked. So they use different mechanisms to
> > abort the suspend?
> 
> Well, if you look at the tegra_kbc driver, for example, it uses both
> enable_irq_wake() and IRQF_NO_SUSPEND.  Why it does that, I don't know.

That doesn't make sense at all.
 
> Other ones seem to be using pm_wakeup_event(), but that will only abort
> suspend when it is enabled upfront (it need not be).  Moreover, it wasn't
> intended to be used that way.

Right. We should kill that before more people copy it blindly.
 
> It generally looks like things are used not as intended in the wakeup
> area, sadly.  Perhaps that's my fault, because I wasn't looking carefully
> enough every time, but I wasn't directly involved in any of them IIRC.
> 
> I guess that's an opportunity to clean that up ...

Agreed. I'm not frightened to do a tree wide sweep. Seems to become a
habit :)
 
> And now there's one more piece of it which is suspend-to-idle (aka "freeze").
> That doesn't go all the way to syscore_suspend(), but basically stops after
> finishing the "noirq" phase of suspending devices.  Then, it idles the CPUs
> and waits for interrupts that will take them out of idle.  Only some of those
> interrupts are wakeup events, so it only resumes when __pm_wakeup_event() or
> __pm_relax() is called in the process of handling the interrupts.
>
> Of course, it could be implemented differently, but that was the simplest
> way to do that.  It still can be changed, but I'd really like it not to have
> to go through all of the disabling nonboot CPUs and sysfore_suspend(), because
> that simply isn't necessary (and it avoids a metric ton of problems with CPU
> offline/online).  And of course, it has to work on x86 too.

Right. So one thing which would make the situation more clear is that
the interrupt handler needs to tell the core code about this by
returning something like IRQ_HANDLED_PMWAKE and the core kicks the
suspend-to-idle logic back to life. I'm not sure whether the extra
return value is actually necessary, we might even map it to
IRQ_HANDLED in the core as we know that we are in the suspend
state.
 
> On x86, however, enable_irq_wake() always returns -ENXIO and nothing happens,
> because the chip doesn't have an ->irq_set_wake callback and doesn't flag
> itself as IRQCHIP_SKIP_SET_WAKE, so even if we found a way to do something
> equivalent to check_wakeup_irqs() for suspend-to-idle, it still wouldn't work
> on x86 for that reason.

There is no reason, why we can't flag the affected irq chips with
IRQCHIP_SKIP_SET_WAKE. That was introduced to handle irq controllers
which provide wakeup functionality but do not have this special extra
magic which is implemented in ->irq_set_wake, i.e. talking to some PM
controller directly.

> Also, I wouldn't like to make suspend-to-idle more special than it really has
> to be, so it would be ideal if it could use as much of the same mechanics as
> regular platform-supported suspend as reasonably possible.  The handling of
> wakeup events is crucial here, because it's needed to make suspend-to-idle
> really work and I'd like to make it as consistent as possible with the
> "regular" suspend.  Now, that can be done in a couple of ways and some of
> them may be better than others for reasons that aren't known to me.

Sure. From the recent discussion I can see the following things we
need to address:

1) IRQF_NO_SUSPEND:

    - Investigate the (ab)use
    - Fix the offending call sites 
    - Provide proper documentation when and why it is sane to use


2) check_wakeup_irqs()

   Call it in the suspend-to-idle case and deal with the detected
   suspend abort from there. If we don't do that, we basically force
   people to hack creative workarounds into their driver, be it
   abusing IRQF_NO_SUSPEND or other completely brainwrecked
   modifications.

   The approach of doing:

    1) lazy disable
    2) transition into suspend
    3) check abort conditions

   is the only sane way to keep the real suspend and the
   suspend-to-idle stuff in line.

   Any other approach will create major headache, simply because by
   avoiding the conditional call in the PM core you force the driver
   developers to work around it and have conditional handling for that
   case at the driver level without having proper information about
   the actual suspend target, i.e. idle vs. real. I can predict the
   mess caused by that halfways precisely without using a crystal
   ball: With the expected popularity of the suspend-to-idle approach
   this is going to approach infinite level rapidly.

   Seriously we cannot tell people to use A for real suspend and B for
   suspend-to-idle. It must be the same mechanism. If that costs the
   price of some core code restructuring, we better pay that than
   dealing with the fallout of the other variant.


3) IRQF_SHARED

   - The ideal solution for this would be to provide seperate virtual
     irq numbers for each device which sits on a shared interrupt line
     and have a pseudo interrupt chip handling the devices and the
     real interrupt line providing the "demux" function.

     That would make life way simpler as we'd always deal with a
     single interrupt number per device. So the whole chained action
     pain would simply go away and the wake/no suspend functionality
     would not have to care at all about this whole shared interrupt
     trainwreck. You'd get per device interrupt stats for free as a
     bonus.

     Unfortunately we can't pull that off w/o major surgery to a
     gazillion of device drivers, but I'm tempted to provide the
     infrastructure for this sooner than later.

     Though for the PCIe root hub thing which has a shared MSI handler
     !?! we really want to implement that at the device level
     today. Sharing a MSI interrupt is just ass backwards.


   - So we need to bite the bullet and handle the action chain as it
     is.

     And we need to support it for the IRQF_NO_SUSPEND and the wake
     functionality.

     For that we have two choices:

     1) Use the separate pointer in the irq desc which stores the
        actions which are either not flagged IRQF_NO_SUSPEND or not
        declared as actual wakeup sources.

     2) Have a separate handler function pointer in struct irqaction,
     	which is used to store the actual device handler and have a
     	core stub handler as the primary handler.

	That stub handler would basically do:

	if (!irqs_suspended)
	   return action->real_handler();
	return IRQ_NONE;

	The switch over of the handler can be done at
	setup/request_irq time when a shared non consistent flagged
	handler is detected or at enable_wake() time.

     There is an advantage to #2:

        The suspend/resume path does not care about any of this. It
	only penalizes the few shared handlers which are affected by
	this nonsense with the extra conditional in the hot handling
	path. Bad luck for the device which has to share the interrupt
	line: complain to the morons who still think that shared
	interrupt lines are a brilliant idea.

     Now for the enable/disable_wake() stuff we really need to have
     the mechanism which has the extra *dev_id argument to identify
     the proper handler. The existing interface should simply hand in
     NULL and if it's used on a shared interrupt line it should yell
     loudly and maybe even prevent suspend. Combined with the above we
     burden all the nonsense of handling shared interrupts in that
     context to the slow path.

     Of course we need handling for the stupid case where the non
     wakeup device decides to fire interrupts on the shared line.

     We certainly need a detection mechanism for this, but your
     current proposal of accelerating the spurious detection has a
     serious downside:

       If there is a spurious interrupt on a shared edge type
       interrupt, which is crap by definition, but unfortunately
       reality, we never trigger the spurious detector, but we might
       render the device useless in the following case:

         disk irq shares line with wakeup button irq

         suspend
		spurious interrupt caused by disk device

	 resume from a differnt wakeup source

	 Disk device interrupt is lost and therefor the disk is
	 disfunctional until someone presses the wakeup button.

       Admittedly a constructed scenario, but we had our share of
       pleasure to deal with lost edge type interrupts in the past and
       I can assure you it's not fun to debug.

       So we rather error out on the safe side and disable the
       interrupt line right away, mark it pending and resume or abort
       suspend and tell why. If we don't do that upfront we'll add
       that feature after a painful debug session which starts with
       this information: Sporadically after suspend I have to hard
       reset the machine by pressing the power button for 10 seconds
       or removing the battery....

       Note: This only applies to shared interrupt lines. We still
       have a sane spurious handling for proper designed systems which
       avoid the shared brainfarts.


4) Move the handling of resume/abort to the core

   We really want either an explicit return value from the device
   interrupt handler which indicates that we should resume or abort
   the suspend transition or handle IRQ_HANDLED automatically at the
   core level in case of suspend.

   If you let drivers do that, they will create a gazillion of
   solutions to abort/resume which will kill your ability to change
   any of the PM core internal mechanisms w/o doing a tree wide
   cleanup. Let's do it now before we get into the unavoidable
   exponential copy and paste phase.


5) Documentation
   
   Add a metric ton of it!

Thoughts?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 18:36                                       ` Rafael J. Wysocki
  2014-07-31 20:12                                           ` Alan Stern
  2014-07-31 22:16                                         ` Thomas Gleixner
@ 2014-07-31 22:54                                         ` Thomas Gleixner
  2 siblings, 0 replies; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-31 22:54 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> On Thursday, July 31, 2014 12:44:24 PM Thomas Gleixner wrote:
> > > > Aside of that I want to see a coherent explanation why a shared MSI
> > > > interrupt makes any sense at all.
> > > > 
> > > > 25:  1 <....> 0   PCI-MSI-edge      aerdrv, PCIe PME
> > > >
> > > > AFAICT, that's the primary reason why you insist to support wakeup
> > > > sources on shared irq lines. And to be honest, it's utter bullshit.
> > > 
> > > No, this isn't the primary reason.
> > > 
> > > Here's /proc/interrupts from my MSI Wind system and, as you can see,
> > > PCIe PME are (a) not MSI and (b) shared with some interesting things
> > > (USB, WiFi and the GPU).
> > 
> > >  16:       5217          0   IO-APIC-fasteoi   PCIe PME, uhci_hcd:usb4, i915
> > >  17:      13964          0   IO-APIC-fasteoi   PCIe PME, rtl818x_pci
> > 
> > So the obvious question is: WHY are they not using MSI?
> > 
> > Just because MSI fucked up the MSI configuration of the device or is
> > there any sane explanation for it?
> 
> It looks like they don't use MSI on that machine at all, so perhaps the chipset
> is not capable of that or similar.  I'm not sure why it matters, though.  The
> system shipped like that and with Linux on it, so we should be able to
> handle it regardless.

Sorry, but this is outright hillarious.

#1 "so perhaps the chipset is not capable of that or similar."

   This is a fricking Intel chipset. All Intel chipsets which have a
   PCIe root hub are MSI capable at least to my restricted knowledge.

   As an Intel employee you could have had the decency to RTFM of the
   chipset which is in the machine you care about.

#2 "I'm not sure why it matters"

   It matters a lot.
   
   You're just fostering the industry mentality of "it works for us,
   lets ship it" simply because you tell them:

     No matter what brainfarts you have or what level of complete
     ignorance and incompetence you have, we're going to cope with it.

   You are sending out the wrong message. The message needs to be:

     Despite your completely idiotic implementation which renders a
     perfectly well designed piece of modern hardware into a last
     century piece of shit, we went out on a limb to provide the
     victims of your incompetence, i.e. your paying customers, a state
     of the art user experience.

  See the difference ?

The only point where I agree is that we want to handle it, but not
without pointing out that this stuff is a major piece of shite, which
should not ever had been offered to customers who simply are not able
to judge the shit level before spending their hard earned bucks for
it.

I'm well aware that neither Intel nor any other silicon vendor gives a
rats ass about this as long as they sell enough chips. But that's no
excuse for wasting the brains of community developers for no value,
simply because that could have been avoided by applying a fraction of
the brain power, which is necessary to cope with the damage, in the
first place.

I don't care how you feel about that, but I very much care about the
waste of my own precious time.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 21:04                                           ` Rafael J. Wysocki
@ 2014-07-31 23:41                                             ` Thomas Gleixner
  2014-08-01  0:51                                               ` Rafael J. Wysocki
  2014-08-01 14:41                                                 ` Alan Stern
  0 siblings, 2 replies; 75+ messages in thread
From: Thomas Gleixner @ 2014-07-31 23:41 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Alan Stern, Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> On Thursday, July 31, 2014 04:12:55 PM Alan Stern wrote:
> > Pardon me for sticking my nose into the middle of the conversation, but
> > here's what it looks like to me:
> > 
> > The entire no_irq phase of suspend/resume is starting to seem like a
> > mistake.  We should never have done it.
> 
> In hindsight, I totally agree.  Question is what we can do about it now.

<SNIP>

> So how can we eliminate the noirq phase in a workable way?

The straight way to do that is breaking the world and some more and
then fix up a gazillion of device drivers by doing a massive voodoo
debugging effort simply because in most cases we do not get any useful
information out of the system once the shit hits the fan.

We could add instrumentation to the core code about interrupts which
are coming in unexpectedly during suspend, but that does not solve
anything.

We really cannot call any device handler at that point as clocks might
be turned off already and any access to a device register might simply
cause a full undebuggable stall of the CPU.

And there is no way to prove that there is no chance of a spurious
interrupt for a given device. 

So if we cannot handle it at the infrastructure level, we need to make
sure that every fricking device driver interrupt handler has a 

     if (dev->suspended)
     	return CRAP;

conditional as the first line of code in it.

What is that buying us? 

Nothing than a shitload of hard to understand problems, really. The
only sensible way to handle this is at the core level.

#1 There is no way that you can rely on random drivers to do the Right
   Thing. 

#2 There is no way that all hardware is implemented in a sane way.

#3 You CANNOT educate the people who are tasked to implement something
   which "does the job" to understand all the subtle details of
   suspend/resume or whatever.

In fact such an approach would take the general aims of consolidating
repeating patterns into core infrastructure and hiding complexity from
the driver developers ad absurdum. No thanks. We have enough
uncomprehensible shite in drivers/* already. We really can do without
adding more reasons for voodoo programming.

This is a classic core infrastructure problem and we need to get the
semantics and the implementation straight by considering the
challenges of new fangled hardware and the incompentent usage of
that. Once we have that we need to fix the few offending drivers, but
that's a task which can be handled with grep and some brain applied.

Anyone who thinks that this can and should be solved at the driver
level is simply taking the wrong drugs or ran out of supply of the
proper ones. Either call your shrink or your drug dealer to get out of
that.

Thanks,

	tglx





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 22:16                                         ` Thomas Gleixner
@ 2014-08-01  0:08                                           ` Rafael J. Wysocki
  2014-08-01  1:24                                             ` Rafael J. Wysocki
  2014-08-01  9:40                                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn Thomas Gleixner
  0 siblings, 2 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-01  0:08 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 12:16:23 AM Thomas Gleixner wrote:
> On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> > On Thursday, July 31, 2014 12:44:24 PM Thomas Gleixner wrote:
> > > What's this PCIe PME handler doing? Is it required functionality for
> > > the suspend/resume path or is it a wakeup/abort mechanism.
> > 
> > It is a wakeup/abort mechanism.
> 
> So why is it using IRQF_NO_SUSPEND in the first place?

It isn't in the current code.

It did in *some* of the prototype patches floating around, but they were
withdrawn.

In the last one ([2/3] in this series) it doesn't any more.

> Just because x86 does not have irq wake implemented or flagged it's irq
> chips with IRQCHIP_SKIP_SET_WAKE.

The reason for using IRQF_NO_SUSPEND was to make it wake up from suspend-to-idle,
but that was a sledgehammer approach.

> Right, so instead of thinking about a proper solution driver folks just slap
> the next available thing on it w/o thinking about the consequences. But,
> thats partly our own fault due to lack of proper documentation.

Prototyping with the next available thing is not generally wrong in my view
as long as this doesn't get to the code base without consideration.

> > And before we enter the wakeup handling slippery slope, let me make a note
> > that this problem is bothering me quite a bit at the moment.  In my opinion
> > we need to address it somehow regardless of the wakeup issues and I'm not sure
> > if failing __setup_irq() when there's a mismatch (that is, there are existing
> > actions for the given irq_desc and their IRQF_NO_SUSPEND settings are not
> > consistent with the new one) is the right way to do that, because it may make
> > things behave a bit randomly (it will always fail the second guy, but that need
> > not be the one who's requested IRQF_NO_SUSPEND and it depends on the ordering
> > between them).
> 
> I totally agree that we want to fix it and I'm going to help. Though I
> really wanted to have a clear picture of the stuff before making
> decisions.
>  
> > I had a couple of ideas, but none of them was particularly clean.  Ideally,
> > IRQF_NO_SUSPEND should always be requested without IRQF_SHARED, but I'm
> > afraid that we can't really do that for the ACPI SCI, because that may
> > cause problems to happen on some older systems where that interrupt is
> > actually shared.  On all systems I have immediate access to it isn't shared,
> > but I remember seeing some where it was.  On those systems the ACPI SCI itself
> > would not be affected, because it is requested quite early during system init,
> > but the other guys wanting to share the line with it would take a hit.
> > 
> > One thing I was thinking about was to return an error from suspend_device_irqs()
> > if there was a mismatch between IRQF_NO_SUSPEND settings for different irqactions
> > in the same irq_desc.  That would make system suspend fail on systems where it
> > is potentially unsafe, but at least any other functionality would not be affected.
> >
> 
> That's one possible solution. See below.
> 
> > > So many of them use it for wakeup purposes. Why so and how is that
> > > supposed to work?
> > 
> > Quite frankly, I'm not sure why they use it.  These are mostly drivers I'm not
> > familiar with on platforms I'm not familiar with.  My guess is that the lazy
> > disable mechanism is not sufficient for them for some reason.
> 
> Looking at a few of them I fear the reason is that the developer did
> not understand the wakeup mechanism at all. Again that's probably our
> fault, because the whole business including the irq part lacks proper
> in depth documentation.
> 
> > > The mechanism for wakeup sources is:
> > > 
> > >     1) Lazy disable the interrupt
> > > 
> > >     2) Do the transition into suspend with interrupts enabled
> > > 
> > >     3) Check whether one of the wakeup sources has triggered. If yes,
> > >        abort. Otherwise suspend.
> > > 
> > > The ones marked IRQF_NO_SUSPEND are not part of the above scheme,
> > > because they are not checked. So they use different mechanisms to
> > > abort the suspend?
> > 
> > Well, if you look at the tegra_kbc driver, for example, it uses both
> > enable_irq_wake() and IRQF_NO_SUSPEND.  Why it does that, I don't know.
> 
> That doesn't make sense at all.
>  
> > Other ones seem to be using pm_wakeup_event(), but that will only abort
> > suspend when it is enabled upfront (it need not be).  Moreover, it wasn't
> > intended to be used that way.
> 
> Right. We should kill that before more people copy it blindly.
>  
> > It generally looks like things are used not as intended in the wakeup
> > area, sadly.  Perhaps that's my fault, because I wasn't looking carefully
> > enough every time, but I wasn't directly involved in any of them IIRC.
> > 
> > I guess that's an opportunity to clean that up ...
> 
> Agreed. I'm not frightened to do a tree wide sweep. Seems to become a
> habit :)
>  
> > And now there's one more piece of it which is suspend-to-idle (aka "freeze").
> > That doesn't go all the way to syscore_suspend(), but basically stops after
> > finishing the "noirq" phase of suspending devices.  Then, it idles the CPUs
> > and waits for interrupts that will take them out of idle.  Only some of those
> > interrupts are wakeup events, so it only resumes when __pm_wakeup_event() or
> > __pm_relax() is called in the process of handling the interrupts.
> >
> > Of course, it could be implemented differently, but that was the simplest
> > way to do that.  It still can be changed, but I'd really like it not to have
> > to go through all of the disabling nonboot CPUs and sysfore_suspend(), because
> > that simply isn't necessary (and it avoids a metric ton of problems with CPU
> > offline/online).  And of course, it has to work on x86 too.
> 
> Right. So one thing which would make the situation more clear is that
> the interrupt handler needs to tell the core code about this by
> returning something like IRQ_HANDLED_PMWAKE and the core kicks the
> suspend-to-idle logic back to life. I'm not sure whether the extra
> return value is actually necessary, we might even map it to
> IRQ_HANDLED in the core as we know that we are in the suspend
> state.

I'm not sure I'm following you here.  Do you mean that upon receiving
IRQ_HANDLED_PMWAKE from an interrupt handler the core will know that
this was a wakeup event and will trigger a resume from suspend-to-idle?

> > On x86, however, enable_irq_wake() always returns -ENXIO and nothing happens,
> > because the chip doesn't have an ->irq_set_wake callback and doesn't flag
> > itself as IRQCHIP_SKIP_SET_WAKE, so even if we found a way to do something
> > equivalent to check_wakeup_irqs() for suspend-to-idle, it still wouldn't work
> > on x86 for that reason.
> 
> There is no reason, why we can't flag the affected irq chips with
> IRQCHIP_SKIP_SET_WAKE. That was introduced to handle irq controllers
> which provide wakeup functionality but do not have this special extra
> magic which is implemented in ->irq_set_wake, i.e. talking to some PM
> controller directly.

That may help, but then it won't prevent wakeup IRQs from being disabled by
suspend_device_irqs().  Do I think correctly that we can re-enable them
in the suspend-to-idle loop, before idling the CPUs?

> > Also, I wouldn't like to make suspend-to-idle more special than it really has
> > to be, so it would be ideal if it could use as much of the same mechanics as
> > regular platform-supported suspend as reasonably possible.  The handling of
> > wakeup events is crucial here, because it's needed to make suspend-to-idle
> > really work and I'd like to make it as consistent as possible with the
> > "regular" suspend.  Now, that can be done in a couple of ways and some of
> > them may be better than others for reasons that aren't known to me.
> 
> Sure. From the recent discussion I can see the following things we
> need to address:
> 
> 1) IRQF_NO_SUSPEND:
> 
>     - Investigate the (ab)use
>     - Fix the offending call sites 
>     - Provide proper documentation when and why it is sane to use

Totally agreed.

> 2) check_wakeup_irqs()
> 
>    Call it in the suspend-to-idle case and deal with the detected
>    suspend abort from there. If we don't do that, we basically force
>    people to hack creative workarounds into their driver, be it
>    abusing IRQF_NO_SUSPEND or other completely brainwrecked
>    modifications.
> 
>    The approach of doing:
> 
>     1) lazy disable
>     2) transition into suspend
>     3) check abort conditions
> 
>    is the only sane way to keep the real suspend and the
>    suspend-to-idle stuff in line.

Well, suspend-to-idle is a loop of the type

 (1) wait for an interrupt
 (2) iterrupt happens -> if wakeup, resume; else goto (1)

so I'm not sure how to match that with lazy disable?  We basically would need to
do something like:

 (1) enable all wakeup interrupts
 (2) wait for an interrupt
 (3) interrupt happens
 (4) disable all interrupts enabled in (1)
 (5) if wakeup, resume (we'll resume interrupts later); else goto (1)

Is that what you mean?

>    Any other approach will create major headache, simply because by
>    avoiding the conditional call in the PM core you force the driver
>    developers to work around it and have conditional handling for that
>    case at the driver level without having proper information about
>    the actual suspend target, i.e. idle vs. real. I can predict the
>    mess caused by that halfways precisely without using a crystal
>    ball: With the expected popularity of the suspend-to-idle approach
>    this is going to approach infinite level rapidly.

Yeah, exponential expansion.

>    Seriously we cannot tell people to use A for real suspend and B for
>    suspend-to-idle. It must be the same mechanism. If that costs the
>    price of some core code restructuring, we better pay that than
>    dealing with the fallout of the other variant.

Totally agreed.

> 3) IRQF_SHARED
> 
>    - The ideal solution for this would be to provide seperate virtual
>      irq numbers for each device which sits on a shared interrupt line
>      and have a pseudo interrupt chip handling the devices and the
>      real interrupt line providing the "demux" function.
> 
>      That would make life way simpler as we'd always deal with a
>      single interrupt number per device. So the whole chained action
>      pain would simply go away and the wake/no suspend functionality
>      would not have to care at all about this whole shared interrupt
>      trainwreck. You'd get per device interrupt stats for free as a
>      bonus.
> 
>      Unfortunately we can't pull that off w/o major surgery to a
>      gazillion of device drivers, but I'm tempted to provide the
>      infrastructure for this sooner than later.

Sounds interesting.

>      Though for the PCIe root hub thing which has a shared MSI handler
>      !?! we really want to implement that at the device level
>      today. Sharing a MSI interrupt is just ass backwards.

There are more reasons for doing that.  A PCIe port is really one device
with multiple things to handle and they really need more coordination with
each other than we have in the current code.  In my opinion we should
just use one PCIe port driver doing all of these things together instead of
three separate drivers handling multiple artificially cut out devices on a
virtual bus.

Problem is, people either have no time or are too scared to do that.

>    - So we need to bite the bullet and handle the action chain as it
>      is.
> 
>      And we need to support it for the IRQF_NO_SUSPEND and the wake
>      functionality.
> 
>      For that we have two choices:
> 
>      1) Use the separate pointer in the irq desc which stores the
>         actions which are either not flagged IRQF_NO_SUSPEND or not
>         declared as actual wakeup sources.
> 
>      2) Have a separate handler function pointer in struct irqaction,
>      	which is used to store the actual device handler and have a
>      	core stub handler as the primary handler.
> 
> 	That stub handler would basically do:
> 
> 	if (!irqs_suspended)
> 	   return action->real_handler();
> 	return IRQ_NONE;
> 
> 	The switch over of the handler can be done at
> 	setup/request_irq time when a shared non consistent flagged
> 	handler is detected or at enable_wake() time.

I had a similar idea at one point.

>      There is an advantage to #2:
> 
>         The suspend/resume path does not care about any of this. It
> 	only penalizes the few shared handlers which are affected by
> 	this nonsense with the extra conditional in the hot handling
> 	path. Bad luck for the device which has to share the interrupt
> 	line: complain to the morons who still think that shared
> 	interrupt lines are a brilliant idea.
> 
>      Now for the enable/disable_wake() stuff we really need to have
>      the mechanism which has the extra *dev_id argument to identify
>      the proper handler. The existing interface should simply hand in
>      NULL and if it's used on a shared interrupt line it should yell
>      loudly and maybe even prevent suspend. Combined with the above we
>      burden all the nonsense of handling shared interrupts in that
>      context to the slow path.
> 
>      Of course we need handling for the stupid case where the non
>      wakeup device decides to fire interrupts on the shared line.
> 
>      We certainly need a detection mechanism for this, but your
>      current proposal of accelerating the spurious detection has a
>      serious downside:
> 
>        If there is a spurious interrupt on a shared edge type
>        interrupt, which is crap by definition, but unfortunately
>        reality, we never trigger the spurious detector, but we might
>        render the device useless in the following case:
> 
>          disk irq shares line with wakeup button irq
> 
>          suspend
> 		spurious interrupt caused by disk device
> 
> 	 resume from a differnt wakeup source
> 
> 	 Disk device interrupt is lost and therefor the disk is
> 	 disfunctional until someone presses the wakeup button.

But the disk was supposed to be suspended, right?  So its driver should
not expect to receive any interrupts at that point anyway.  Or am I missing
anything?

>        Admittedly a constructed scenario, but we had our share of
>        pleasure to deal with lost edge type interrupts in the past and
>        I can assure you it's not fun to debug.
> 
>        So we rather error out on the safe side and disable the
>        interrupt line right away, mark it pending and resume or abort
>        suspend and tell why.

I thought about the same.

>        If we don't do that upfront we'll add
>        that feature after a painful debug session which starts with
>        this information: Sporadically after suspend I have to hard
>        reset the machine by pressing the power button for 10 seconds
>        or removing the battery....
>
>        Note: This only applies to shared interrupt lines. We still
>        have a sane spurious handling for proper designed systems which
>        avoid the shared brainfarts.
>

Agreed.

> 4) Move the handling of resume/abort to the core
> 
>    We really want either an explicit return value from the device
>    interrupt handler which indicates that we should resume or abort
>    the suspend transition or handle IRQ_HANDLED automatically at the
>    core level in case of suspend.
> 
>    If you let drivers do that, they will create a gazillion of
>    solutions to abort/resume which will kill your ability to change
>    any of the PM core internal mechanisms w/o doing a tree wide
>    cleanup. Let's do it now before we get into the unavoidable
>    exponential copy and paste phase.

OK

> 5) Documentation
>    
>    Add a metric ton of it!
> 
> Thoughts?

Except for a couple of points where I'm not sure I understand you correctly
(commented above), all of that sounds good to me.

I'm not sure about the ordering, though.  It would be good to have a working
replacement for the IRQF_NO_SUSPEND things that we'll be removing in 1, for
example.  So since we need to do 3) IRQF_SHARED for both IRQF_NO_SUSPEND and
wakeup, as you said, would it be practical to start with that one?

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 23:41                                             ` Thomas Gleixner
@ 2014-08-01  0:51                                               ` Rafael J. Wysocki
  2014-08-01 14:41                                                 ` Alan Stern
  1 sibling, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-01  0:51 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Alan Stern, Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 01:41:31 AM Thomas Gleixner wrote:
> On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> > On Thursday, July 31, 2014 04:12:55 PM Alan Stern wrote:
> > > Pardon me for sticking my nose into the middle of the conversation, but
> > > here's what it looks like to me:
> > > 
> > > The entire no_irq phase of suspend/resume is starting to seem like a
> > > mistake.  We should never have done it.
> > 
> > In hindsight, I totally agree.  Question is what we can do about it now.
> 
> <SNIP>
> 
> > So how can we eliminate the noirq phase in a workable way?
> 
> The straight way to do that is breaking the world and some more and
> then fix up a gazillion of device drivers by doing a massive voodoo
> debugging effort simply because in most cases we do not get any useful
> information out of the system once the shit hits the fan.
> 
> We could add instrumentation to the core code about interrupts which
> are coming in unexpectedly during suspend, but that does not solve
> anything.
> 
> We really cannot call any device handler at that point as clocks might
> be turned off already and any access to a device register might simply
> cause a full undebuggable stall of the CPU.
> 
> And there is no way to prove that there is no chance of a spurious
> interrupt for a given device. 
> 
> So if we cannot handle it at the infrastructure level, we need to make
> sure that every fricking device driver interrupt handler has a 
> 
>      if (dev->suspended)
>      	return CRAP;
> 
> conditional as the first line of code in it.
> 
> What is that buying us? 
> 
> Nothing than a shitload of hard to understand problems, really. The
> only sensible way to handle this is at the core level.
> 
> #1 There is no way that you can rely on random drivers to do the Right
>    Thing. 
> 
> #2 There is no way that all hardware is implemented in a sane way.
> 
> #3 You CANNOT educate the people who are tasked to implement something
>    which "does the job" to understand all the subtle details of
>    suspend/resume or whatever.

These are fair points.

However, if the driver implements ->runtime_suspend, it has to handle
the "my device is suspended" condition in its interrupt handler regardless.

For such a driver doing the same over system suspend/resume shouldn't
be a real problem.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-08-01  0:08                                           ` Rafael J. Wysocki
@ 2014-08-01  1:24                                             ` Rafael J. Wysocki
  2014-08-01  9:40                                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn Thomas Gleixner
  1 sibling, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-01  1:24 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 02:08:12 AM Rafael J. Wysocki wrote:
> On Friday, August 01, 2014 12:16:23 AM Thomas Gleixner wrote:
> > On Thu, 31 Jul 2014, Rafael J. Wysocki wrote:
> > > On Thursday, July 31, 2014 12:44:24 PM Thomas Gleixner wrote:

[cut]

> Except for a couple of points where I'm not sure I understand you correctly
> (commented above), all of that sounds good to me.
> 
> I'm not sure about the ordering, though.  It would be good to have a working
> replacement for the IRQF_NO_SUSPEND things that we'll be removing in 1, for
> example.  So since we need to do 3) IRQF_SHARED for both IRQF_NO_SUSPEND and
> wakeup, as you said, would it be practical to start with that one?

I forgot about one case which in my opinion would be good to take into account
from the outset.  That is the case of runtime-suspended devices that we don't
want to touch during system suspend/resume.  If those are system wakeup
devices, their drivers should be able to configure them for system wakeup
at the runtime suspend time rather than during system suspend.  Also if their
interrupts are going to be used as system wakeup interrupts, the interface
that we're going to provide needs to handle that case.  That is, it should
be possible to use that interface at the runtime suspend time and it should
take care of all things going forward.

Triggering a lazy disable right at the runtime suspend time may not work,
because the interrupt will also be used for the device's runtime remote wakeup
in that case, so it has to be functional until system suspend is started and
the core decides to leave the device in its current state.  This means that
the core will need to trigger the lazy disable for it at one point during
system suspend.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-01  0:08                                           ` Rafael J. Wysocki
  2014-08-01  1:24                                             ` Rafael J. Wysocki
@ 2014-08-01  9:40                                             ` Thomas Gleixner
  2014-08-01 13:45                                               ` Rafael J. Wysocki
  1 sibling, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-08-01  9:40 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> On Friday, August 01, 2014 12:16:23 AM Thomas Gleixner wrote:
> > Right, so instead of thinking about a proper solution driver folks just slap
> > the next available thing on it w/o thinking about the consequences. But,
> > thats partly our own fault due to lack of proper documentation.
> 
> Prototyping with the next available thing is not generally wrong in my view
> as long as this doesn't get to the code base without consideration.

Prototyping is one thing, but we have real users which are simply wrong
AFAICT.
 
> > > And now there's one more piece of it which is suspend-to-idle (aka "freeze").
> > > That doesn't go all the way to syscore_suspend(), but basically stops after
> > > finishing the "noirq" phase of suspending devices.  Then, it idles the CPUs
> > > and waits for interrupts that will take them out of idle.  Only some of those
> > > interrupts are wakeup events, so it only resumes when __pm_wakeup_event() or
> > > __pm_relax() is called in the process of handling the interrupts.
> > >
> > > Of course, it could be implemented differently, but that was the simplest
> > > way to do that.  It still can be changed, but I'd really like it not to have
> > > to go through all of the disabling nonboot CPUs and sysfore_suspend(), because
> > > that simply isn't necessary (and it avoids a metric ton of problems with CPU
> > > offline/online).  And of course, it has to work on x86 too.
> > 
> > Right. So one thing which would make the situation more clear is that
> > the interrupt handler needs to tell the core code about this by
> > returning something like IRQ_HANDLED_PMWAKE and the core kicks the
> > suspend-to-idle logic back to life. I'm not sure whether the extra
> > return value is actually necessary, we might even map it to
> > IRQ_HANDLED in the core as we know that we are in the suspend
> > state.
> 
> I'm not sure I'm following you here.  Do you mean that upon receiving
> IRQ_HANDLED_PMWAKE from an interrupt handler the core will know that
> this was a wakeup event and will trigger a resume from suspend-to-idle?

Correct. Whether we need the extra return code is debatable. But yes,
we want to talk to the PM/suspend/resume thing at core level instead
of letting drivers use random interfaces which happen to be exported
for one reason or the other, but definitely not for the purpose of
random driver.

> > There is no reason, why we can't flag the affected irq chips with
> > IRQCHIP_SKIP_SET_WAKE. That was introduced to handle irq controllers
> > which provide wakeup functionality but do not have this special extra
> > magic which is implemented in ->irq_set_wake, i.e. talking to some PM
> > controller directly.
> 
> That may help, but then it won't prevent wakeup IRQs from being disabled by
> suspend_device_irqs().  Do I think correctly that we can re-enable them
> in the suspend-to-idle loop, before idling the CPUs?

Yes.
 
> > 2) check_wakeup_irqs()
> > 
> >    Call it in the suspend-to-idle case and deal with the detected
> >    suspend abort from there. If we don't do that, we basically force
> >    people to hack creative workarounds into their driver, be it
> >    abusing IRQF_NO_SUSPEND or other completely brainwrecked
> >    modifications.
> > 
> >    The approach of doing:
> > 
> >     1) lazy disable
> >     2) transition into suspend
> >     3) check abort conditions
> > 
> >    is the only sane way to keep the real suspend and the
> >    suspend-to-idle stuff in line.
> 
> Well, suspend-to-idle is a loop of the type
> 
>  (1) wait for an interrupt
>  (2) iterrupt happens -> if wakeup, resume; else goto (1)
> 
> so I'm not sure how to match that with lazy disable?  We basically would need to
> do something like:
> 
>  (1) enable all wakeup interrupts
>  (2) wait for an interrupt
>  (3) interrupt happens
>  (4) disable all interrupts enabled in (1)
>  (5) if wakeup, resume (we'll resume interrupts later); else goto (1)
> 
> Is that what you mean?

More or less, yes. Whether we do the explicit enable or the implicit
leave it enabled thing is an implementation detail. We just need to
chose one.

> >      Though for the PCIe root hub thing which has a shared MSI handler
> >      !?! we really want to implement that at the device level
> >      today. Sharing a MSI interrupt is just ass backwards.
> 
> There are more reasons for doing that.  A PCIe port is really one device
> with multiple things to handle and they really need more coordination with
> each other than we have in the current code.  In my opinion we should
> just use one PCIe port driver doing all of these things together instead of
> three separate drivers handling multiple artificially cut out devices on a
> virtual bus.
> 
> Problem is, people either have no time or are too scared to do that.

Well, if we don't want to end up with a complete unmaintainable mess,
we need to find the time and the people who have enough brains to do
so.
 
> >        If there is a spurious interrupt on a shared edge type
> >        interrupt, which is crap by definition, but unfortunately
> >        reality, we never trigger the spurious detector, but we might
> >        render the device useless in the following case:
> > 
> >          disk irq shares line with wakeup button irq
> > 
> >          suspend
> > 		spurious interrupt caused by disk device
> > 
> > 	 resume from a differnt wakeup source
> > 
> > 	 Disk device interrupt is lost and therefor the disk is
> > 	 disfunctional until someone presses the wakeup button.
> 
> But the disk was supposed to be suspended, right?  So its driver should
> not expect to receive any interrupts at that point anyway.  Or am I missing
> anything?

As I said it's a conctructed scenario, but we need to deal with that
kind of malfunction in devices. Right now if the driver disabled
interrupts at the device level and calls disable_irq() the spurious
edge from the broken IP block will be recorded and the interrupt
reinjected at enable_irq() time. I'm aware of at least two devices
which rely on that behaviour.
 
> I'm not sure about the ordering, though.  It would be good to have a working
> replacement for the IRQF_NO_SUSPEND things that we'll be removing in 1, for
> example.  So since we need to do 3) IRQF_SHARED for both IRQF_NO_SUSPEND and
> wakeup, as you said, would it be practical to start with that one?

The numbering was not meant as ordering, it was just to seperate the
various issues which we need to look at.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-01 13:45                                               ` Rafael J. Wysocki
@ 2014-08-01 13:43                                                 ` Thomas Gleixner
  2014-08-01 14:29                                                   ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Gleixner @ 2014-08-01 13:43 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> handle_irq_event_percpu(), then using a special return code would save us
> a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> immediately then.

We can handle it at the end of the function by calling
note_interrupt() unconditionally do the following there:

      if (suspended) {
      	 if (ret == IRQ_NONE) {
	    if (shared)
	       yell_and_abort_or_resume();
         } else {
	    abort_or_resume();
         }
      }
      if (noirqdebug)
      	 return;
 
> OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.

Definitely not :)

> Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> 
> In suspend_device_irqs():
> 
> (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
>     keep the current behavior.
> (2) If the actions have different settings:
>     - Actions with IRQF_NO_SUSPEND set are not modified.
>     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
>     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.

Can we please do that in setup_irq() and let the shared ones always
run through the stub? That keeps suspend/resume_device_irqs() simple.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-01  9:40                                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn Thomas Gleixner
@ 2014-08-01 13:45                                               ` Rafael J. Wysocki
  2014-08-01 13:43                                                 ` Thomas Gleixner
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-01 13:45 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 11:40:55 AM Thomas Gleixner wrote:
> On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > On Friday, August 01, 2014 12:16:23 AM Thomas Gleixner wrote:

[cut]

> > > > And now there's one more piece of it which is suspend-to-idle (aka "freeze").
> > > > That doesn't go all the way to syscore_suspend(), but basically stops after
> > > > finishing the "noirq" phase of suspending devices.  Then, it idles the CPUs
> > > > and waits for interrupts that will take them out of idle.  Only some of those
> > > > interrupts are wakeup events, so it only resumes when __pm_wakeup_event() or
> > > > __pm_relax() is called in the process of handling the interrupts.
> > > >
> > > > Of course, it could be implemented differently, but that was the simplest
> > > > way to do that.  It still can be changed, but I'd really like it not to have
> > > > to go through all of the disabling nonboot CPUs and sysfore_suspend(), because
> > > > that simply isn't necessary (and it avoids a metric ton of problems with CPU
> > > > offline/online).  And of course, it has to work on x86 too.
> > > 
> > > Right. So one thing which would make the situation more clear is that
> > > the interrupt handler needs to tell the core code about this by
> > > returning something like IRQ_HANDLED_PMWAKE and the core kicks the
> > > suspend-to-idle logic back to life. I'm not sure whether the extra
> > > return value is actually necessary, we might even map it to
> > > IRQ_HANDLED in the core as we know that we are in the suspend
> > > state.
> > 
> > I'm not sure I'm following you here.  Do you mean that upon receiving
> > IRQ_HANDLED_PMWAKE from an interrupt handler the core will know that
> > this was a wakeup event and will trigger a resume from suspend-to-idle?
> 
> Correct. Whether we need the extra return code is debatable. But yes,
> we want to talk to the PM/suspend/resume thing at core level instead
> of letting drivers use random interfaces which happen to be exported
> for one reason or the other, but definitely not for the purpose of
> random driver.

OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
handle_irq_event_percpu(), then using a special return code would save us
a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
immediately then.

[cut]

> > I'm not sure about the ordering, though.  It would be good to have a working
> > replacement for the IRQF_NO_SUSPEND things that we'll be removing in 1, for
> > example.  So since we need to do 3) IRQF_SHARED for both IRQF_NO_SUSPEND and
> > wakeup, as you said, would it be practical to start with that one?
> 
> The numbering was not meant as ordering, it was just to seperate the
> various issues which we need to look at.

OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.

Here's my current understanding of what can be done for IRQF_NO_SUSPEND.

In suspend_device_irqs():

(1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
    keep the current behavior.
(2) If the actions have different settings:
    - Actions with IRQF_NO_SUSPEND set are not modified.
    - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
    - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.

In note_interrupt():

  If action_ret is IRQ_NONE and IRQS_SUSPEND_MODE is set for the IRQ, disable the
  IRQ, set IRQS_SUSPENDED for it and call system_wakeup(BAD_INTERRUPT) (that will
  abort suspend if still in progress or break the suspend-to-idle loop).

In resume_device_irqs():

(1) If IRQS_SUSPEND_MODE is set, switch over actions with IRQF_NO_SUSPEND unset
    to their original handlers and clear the flag.  Fall through.
(2) If IRQS_SUSPENDED is set, clear the flag and enable the interrupt.

The stub handler only needs to return IRQ_NONE unconditionally in that case.

Does that make sense?

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-01 13:43                                                 ` Thomas Gleixner
@ 2014-08-01 14:29                                                   ` Rafael J. Wysocki
  2014-08-02  1:31                                                     ` Rafael J. Wysocki
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
  0 siblings, 2 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-01 14:29 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 03:43:21 PM Thomas Gleixner wrote:
> On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> > IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> > handle_irq_event_percpu(), then using a special return code would save us
> > a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> > immediately then.
> 
> We can handle it at the end of the function by calling
> note_interrupt() unconditionally do the following there:
> 
>       if (suspended) {
>       	 if (ret == IRQ_NONE) {
> 	    if (shared)
> 	       yell_and_abort_or_resume();
>          } else {
> 	    abort_or_resume();
>          }
>       }
>       if (noirqdebug)
>       	 return;

I see.

> > OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.
> 
> Definitely not :)
> 
> > Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> > 
> > In suspend_device_irqs():
> > 
> > (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
> >     keep the current behavior.
> > (2) If the actions have different settings:
> >     - Actions with IRQF_NO_SUSPEND set are not modified.
> >     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
> >     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.
> 
> Can we please do that in setup_irq() and let the shared ones always
> run through the stub? That keeps suspend/resume_device_irqs() simple.

OK

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
  2014-07-31 23:41                                             ` Thomas Gleixner
@ 2014-08-01 14:41                                                 ` Alan Stern
  2014-08-01 14:41                                                 ` Alan Stern
  1 sibling, 0 replies; 75+ messages in thread
From: Alan Stern @ 2014-08-01 14:41 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Rafael J. Wysocki, Peter Zijlstra, linux-kernel, Linux PM list,
	Dmitry Torokhov

On Fri, 1 Aug 2014, Thomas Gleixner wrote:

> Anyone who thinks that this can and should be solved at the driver
> level is simply taking the wrong drugs or ran out of supply of the
> proper ones. Either call your shrink or your drug dealer to get out of
> that.

That's absolutely true, but there is a counterpoint: Some of the 
problems in this area _must_ be fixed at the driver level.

In particular, any driver that:

	doesn't prevent its device from generating interrupt requests 
	while the system is suspended

	and allows its device to share an IRQ line with a wakeup source

has to be fixed.  The situation is even worse if the driver's interrupt 
handler blows up when called at an inopportune time (such as when 
clocks or power supplies are off).

Unfortunately, the only reasonable way to find such drivers is to wait
and see when the bad combination of events happens.

Alan Stern


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts
@ 2014-08-01 14:41                                                 ` Alan Stern
  0 siblings, 0 replies; 75+ messages in thread
From: Alan Stern @ 2014-08-01 14:41 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Rafael J. Wysocki, Peter Zijlstra, linux-kernel, Linux PM list,
	Dmitry Torokhov

On Fri, 1 Aug 2014, Thomas Gleixner wrote:

> Anyone who thinks that this can and should be solved at the driver
> level is simply taking the wrong drugs or ran out of supply of the
> proper ones. Either call your shrink or your drug dealer to get out of
> that.

That's absolutely true, but there is a counterpoint: Some of the 
problems in this area _must_ be fixed at the driver level.

In particular, any driver that:

	doesn't prevent its device from generating interrupt requests 
	while the system is suspended

	and allows its device to share an IRQ line with a wakeup source

has to be fixed.  The situation is even worse if the driver's interrupt 
handler blows up when called at an inopportune time (such as when 
clocks or power supplies are off).

Unfortunately, the only reasonable way to find such drivers is to wait
and see when the bad combination of events happens.

Alan Stern

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-01 14:29                                                   ` Rafael J. Wysocki
@ 2014-08-02  1:31                                                     ` Rafael J. Wysocki
  2014-08-03 13:42                                                       ` Rafael J. Wysocki
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
  1 sibling, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-02  1:31 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 04:29:40 PM Rafael J. Wysocki wrote:
> On Friday, August 01, 2014 03:43:21 PM Thomas Gleixner wrote:
> > On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > > OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> > > IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> > > handle_irq_event_percpu(), then using a special return code would save us
> > > a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> > > immediately then.
> > 
> > We can handle it at the end of the function by calling
> > note_interrupt() unconditionally do the following there:
> > 
> >       if (suspended) {
> >       	 if (ret == IRQ_NONE) {
> > 	    if (shared)
> > 	       yell_and_abort_or_resume();
> >          } else {
> > 	    abort_or_resume();
> >          }
> >       }
> >       if (noirqdebug)
> >       	 return;
> 
> I see.
> 
> > > OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.
> > 
> > Definitely not :)
> > 
> > > Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> > > 
> > > In suspend_device_irqs():
> > > 
> > > (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
> > >     keep the current behavior.
> > > (2) If the actions have different settings:
> > >     - Actions with IRQF_NO_SUSPEND set are not modified.
> > >     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
> > >     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.
> > 
> > Can we please do that in setup_irq() and let the shared ones always
> > run through the stub? That keeps suspend/resume_device_irqs() simple.
> 
> OK

I've tried to do that, but encountered a problem.  The stub handler is called
with irq, dev_id as arguments and for the "interrupts are not suspended" case
(common) it has to use irq to find the irq_desc and then it has to use dev_id
to find the irqaction to call the original handler from there.  That seemed to
be a bit too much of a penalty to me.  Especially for people who never suspend
their machines. :-)

For this reason, I went for changing handlers in suspend_device_irqs() and
back in resume_device_irqs().  That's not terribly complicated (the restoration
in particular is pretty simple) and it should be easily reusable for the wakeup
interrupts case.  resume_device_irqs() wouldn't need any more changes for that,
for example.  It minimally affects systems that don't suspend too.

I also ended up adding a new interrupt handler return code (IRQ_SUSPENDED).
I could add a new irq_desc flag instead, but then the new code in suspend_device_irqs()
and the new check in note_interrupt() would need to be slightly more complicated.

Below is my current prototype.  Please have a look and let me know what you
think.

I gave it a little bit of testing with success.

Of course, I'd prefer to move the IRQ suspend/resume code to pm.c and get rid
of the third argument in __disable_irq() and __enable_irq() before making these
changes for real.

Rafael


---
 drivers/base/power/main.c   |    1 +
 drivers/base/power/wakeup.c |   20 ++++++++++++++++++++
 include/linux/interrupt.h   |    2 ++
 include/linux/irqreturn.h   |    2 ++
 include/linux/suspend.h     |    3 +++
 kernel/irq/handle.c         |    3 +--
 kernel/irq/manage.c         |   35 ++++++++++++++++++++++++++++++++++-
 kernel/irq/pm.c             |    7 +++++--
 kernel/irq/spurious.c       |   16 ++++++++++++++++
 9 files changed, 84 insertions(+), 5 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -382,11 +382,36 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
+/*
+ * Dummy handler for shared interrupts to use during system suspend.
+ */
+static irqreturn_t irq_pm_dummy_handler(int irq, void *dev_id)
+{
+	return IRQ_SUSPENDED;
+}
+
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
+		struct irqaction *action;
+
+		if (!desc->action)
 			return;
+
+		for (action = desc->action; action; action = action->next) {
+			if (action->flags & IRQF_NO_SUSPEND)
+				break;
+		}
+		if (action) {
+			for (action = desc->action; action; action = action->next) {
+				if (!(action->flags & IRQF_NO_SUSPEND)) {
+					action->saved_handler = action->handler;
+					action->handler = irq_pm_dummy_handler;
+				}
+			}
+			return;
+		}
+
 		desc->istate |= IRQS_SUSPENDED;
 	}
 
@@ -445,6 +470,14 @@ EXPORT_SYMBOL(disable_irq);
 void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
 {
 	if (resume) {
+		struct irqaction *action;
+
+		for (action = desc->action; action; action = action->next) {
+			if (action->handler == irq_pm_dummy_handler) {
+				action->handler = action->saved_handler;
+				action->saved_handler = NULL;
+			}
+		}
 		if (!(desc->istate & IRQS_SUSPENDED)) {
 			if (!desc->action)
 				return;
Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -90,6 +90,7 @@ typedef irqreturn_t (*irq_handler_t)(int
 /**
  * struct irqaction - per interrupt action descriptor
  * @handler:	interrupt handler function
+ * @saved_handler:	saved pointer to interrupt handler function
  * @name:	name of the device
  * @dev_id:	cookie to identify the device
  * @percpu_dev_id:	cookie to identify the device
@@ -104,6 +105,7 @@ typedef irqreturn_t (*irq_handler_t)(int
  */
 struct irqaction {
 	irq_handler_t		handler;
+	irq_handler_t		saved_handler;
 	void			*dev_id;
 	void __percpu		*percpu_dev_id;
 	struct irqaction	*next;
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -175,8 +175,7 @@ handle_irq_event_percpu(struct irq_desc
 
 	add_interrupt_randomness(irq, flags);
 
-	if (!noirqdebug)
-		note_interrupt(irq, desc, retval);
+	note_interrupt(irq, desc, retval);
 	return retval;
 }
 
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
 #include <linux/timer.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -275,10 +276,25 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
+	if (unlikely(action_ret == IRQ_SUSPENDED)) {
+		pr_err("IRQ %d: Unhandled interrupt from suspended device\n", irq);
+
+		desc->istate |= IRQS_SUSPENDED;
+		desc->depth++;
+		irq_disable(desc);
+		pm_system_wakeup();
+		return;
+	}
+
+	if (noirqdebug)
+		return;
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
 
+	action_ret &= ~IRQ_SUSPENDED;
+
 	if (bad_action_ret(action_ret)) {
 		report_bad_irq(irq, desc, action_ret);
 		return;
Index: linux-pm/include/linux/suspend.h
===================================================================
--- linux-pm.orig/include/linux/suspend.h
+++ linux-pm/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,7 @@ static inline int unregister_pm_notifier
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
Index: linux-pm/drivers/base/power/wakeup.c
===================================================================
--- linux-pm.orig/drivers/base/power/wakeup.c
+++ linux-pm/drivers/base/power/wakeup.c
@@ -691,6 +691,8 @@ void pm_print_active_wakeup_sources(void
 }
 EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
+static bool abort_suspend;
+
 /**
  * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
@@ -712,6 +714,7 @@ bool pm_wakeup_pending(void)
 		ret = (cnt != saved_count || inpr > 0);
 		events_check_enabled = !ret;
 	}
+	ret = ret || abort_suspend;
 	spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
@@ -722,6 +725,23 @@ bool pm_wakeup_pending(void)
 	return ret;
 }
 
+void pm_system_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	abort_suspend = true;
+	spin_unlock_irqrestore(&events_lock, flags);
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	spin_lock_irq(&events_lock);
+	abort_suspend = false;
+	spin_unlock_irq(&events_lock);
+}
+
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
Index: linux-pm/drivers/base/power/main.c
===================================================================
--- linux-pm.orig/drivers/base/power/main.c
+++ linux-pm/drivers/base/power/main.c
@@ -1648,6 +1648,7 @@ int dpm_suspend_start(pm_message_t state
 {
 	int error;
 
+	pm_wakeup_clear();
 	error = dpm_prepare(state);
 	if (error) {
 		suspend_stats.failed_prepare++;
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/syscore_ops.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -35,8 +36,7 @@ void suspend_device_irqs(void)
 	}
 
 	for_each_irq_desc(irq, desc)
-		if (desc->istate & IRQS_SUSPENDED)
-			synchronize_irq(irq);
+		synchronize_irq(irq);
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
@@ -102,6 +102,9 @@ int check_wakeup_irqs(void)
 	struct irq_desc *desc;
 	int irq;
 
+	if (pm_wakeup_pending())
+		return -EBUSY;
+
 	for_each_irq_desc(irq, desc) {
 		/*
 		 * Only interrupts which are marked as wakeup source
Index: linux-pm/include/linux/irqreturn.h
===================================================================
--- linux-pm.orig/include/linux/irqreturn.h
+++ linux-pm/include/linux/irqreturn.h
@@ -6,11 +6,13 @@
  * @IRQ_NONE		interrupt was not from this device
  * @IRQ_HANDLED		interrupt was handled by this device
  * @IRQ_WAKE_THREAD	handler requests to wake the handler thread
+ * @IRQ_SUSPENDED	interrupt has been suspended for this device
  */
 enum irqreturn {
 	IRQ_NONE		= (0 << 0),
 	IRQ_HANDLED		= (1 << 0),
 	IRQ_WAKE_THREAD		= (1 << 1),
+	IRQ_SUSPENDED		= (1 << 2),
 };
 
 typedef enum irqreturn irqreturn_t;


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-02  1:31                                                     ` Rafael J. Wysocki
@ 2014-08-03 13:42                                                       ` Rafael J. Wysocki
  2014-08-04  3:38                                                         ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-03 13:42 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Saturday, August 02, 2014 03:31:01 AM Rafael J. Wysocki wrote:
> On Friday, August 01, 2014 04:29:40 PM Rafael J. Wysocki wrote:
> > On Friday, August 01, 2014 03:43:21 PM Thomas Gleixner wrote:
> > > On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > > > OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> > > > IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> > > > handle_irq_event_percpu(), then using a special return code would save us
> > > > a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> > > > immediately then.
> > > 
> > > We can handle it at the end of the function by calling
> > > note_interrupt() unconditionally do the following there:
> > > 
> > >       if (suspended) {
> > >       	 if (ret == IRQ_NONE) {
> > > 	    if (shared)
> > > 	       yell_and_abort_or_resume();
> > >          } else {
> > > 	    abort_or_resume();
> > >          }
> > >       }
> > >       if (noirqdebug)
> > >       	 return;
> > 
> > I see.
> > 
> > > > OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.
> > > 
> > > Definitely not :)
> > > 
> > > > Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> > > > 
> > > > In suspend_device_irqs():
> > > > 
> > > > (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
> > > >     keep the current behavior.
> > > > (2) If the actions have different settings:
> > > >     - Actions with IRQF_NO_SUSPEND set are not modified.
> > > >     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
> > > >     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.
> > > 
> > > Can we please do that in setup_irq() and let the shared ones always
> > > run through the stub? That keeps suspend/resume_device_irqs() simple.
> > 
> > OK
> 
> I've tried to do that, but encountered a problem.  The stub handler is called
> with irq, dev_id as arguments and for the "interrupts are not suspended" case
> (common) it has to use irq to find the irq_desc and then it has to use dev_id
> to find the irqaction to call the original handler from there.  That seemed to
> be a bit too much of a penalty to me.  Especially for people who never suspend
> their machines. :-)
> 
> For this reason, I went for changing handlers in suspend_device_irqs() and
> back in resume_device_irqs().  That's not terribly complicated (the restoration
> in particular is pretty simple) and it should be easily reusable for the wakeup
> interrupts case.  resume_device_irqs() wouldn't need any more changes for that,
> for example.  It minimally affects systems that don't suspend too.
> 
> I also ended up adding a new interrupt handler return code (IRQ_SUSPENDED).
> I could add a new irq_desc flag instead, but then the new code in suspend_device_irqs()
> and the new check in note_interrupt() would need to be slightly more complicated.

Actually, if we have a global irqs_suspended state variable, it will be much
simpler to handle wakeup interrupts going forward, because in that case we'll
be able to keep their original interrupt handlers and IRQ_HANDLED from them
will be interpreted as a wakeup event automatically.

Updated prototype follows.

Rafael

---
 drivers/base/power/main.c   |    1 +
 drivers/base/power/wakeup.c |   20 ++++++++++++++++++++
 include/linux/interrupt.h   |    2 ++
 include/linux/suspend.h     |    3 +++
 kernel/irq/handle.c         |    3 +--
 kernel/irq/internals.h      |    2 ++
 kernel/irq/manage.c         |   35 ++++++++++++++++++++++++++++++++++-
 kernel/irq/pm.c             |   10 ++++++----
 kernel/irq/spurious.c       |   24 ++++++++++++++++++++++++
 9 files changed, 93 insertions(+), 7 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -382,11 +382,36 @@ setup_affinity(unsigned int irq, struct
 }
 #endif
 
+/*
+ * Dummy handler for shared interrupts to use during system suspend.
+ */
+static irqreturn_t irq_pm_dummy_handler(int irq, void *dev_id)
+{
+	return IRQ_NONE;
+}
+
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
+		struct irqaction *action;
+
+		if (!desc->action)
 			return;
+
+		for (action = desc->action; action; action = action->next) {
+			if (action->flags & IRQF_NO_SUSPEND)
+				break;
+		}
+		if (action) {
+			for (action = desc->action; action; action = action->next) {
+				if (!(action->flags & IRQF_NO_SUSPEND)) {
+					action->saved_handler = action->handler;
+					action->handler = irq_pm_dummy_handler;
+				}
+			}
+			return;
+		}
+
 		desc->istate |= IRQS_SUSPENDED;
 	}
 
@@ -445,6 +470,14 @@ EXPORT_SYMBOL(disable_irq);
 void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
 {
 	if (resume) {
+		struct irqaction *action;
+
+		for (action = desc->action; action; action = action->next) {
+			if (action->handler == irq_pm_dummy_handler) {
+				action->handler = action->saved_handler;
+				action->saved_handler = NULL;
+			}
+		}
 		if (!(desc->istate & IRQS_SUSPENDED)) {
 			if (!desc->action)
 				return;
Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -90,6 +90,7 @@ typedef irqreturn_t (*irq_handler_t)(int
 /**
  * struct irqaction - per interrupt action descriptor
  * @handler:	interrupt handler function
+ * @saved_handler:	saved pointer to interrupt handler function
  * @name:	name of the device
  * @dev_id:	cookie to identify the device
  * @percpu_dev_id:	cookie to identify the device
@@ -104,6 +105,7 @@ typedef irqreturn_t (*irq_handler_t)(int
  */
 struct irqaction {
 	irq_handler_t		handler;
+	irq_handler_t		saved_handler;
 	void			*dev_id;
 	void __percpu		*percpu_dev_id;
 	struct irqaction	*next;
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -175,8 +175,7 @@ handle_irq_event_percpu(struct irq_desc
 
 	add_interrupt_randomness(irq, flags);
 
-	if (!noirqdebug)
-		note_interrupt(irq, desc, retval);
+	note_interrupt(irq, desc, retval);
 	return retval;
 }
 
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -13,10 +13,22 @@
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
 #include <linux/timer.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
 static int irqfixup __read_mostly;
+static bool irqs_suspended __read_mostly;
+
+void set_irqs_suspended(bool on)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	irqs_suspended = on;
+	for_each_irq_desc(irq, desc)
+		synchronize_irq(irq);
+}
 
 #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
 static void poll_spurious_irqs(unsigned long dummy);
@@ -275,6 +287,18 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
+	if (unlikely(irqs_suspended && action_ret == IRQ_NONE)) {
+		pr_err_ratelimited("IRQ %d: Unhandled while suspended\n", irq);
+		desc->istate |= IRQS_SUSPENDED;
+		desc->depth++;
+		irq_disable(desc);
+		pm_system_wakeup();
+		return;
+	}
+
+	if (noirqdebug)
+		return;
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
Index: linux-pm/include/linux/suspend.h
===================================================================
--- linux-pm.orig/include/linux/suspend.h
+++ linux-pm/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,7 @@ static inline int unregister_pm_notifier
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
Index: linux-pm/drivers/base/power/wakeup.c
===================================================================
--- linux-pm.orig/drivers/base/power/wakeup.c
+++ linux-pm/drivers/base/power/wakeup.c
@@ -691,6 +691,8 @@ void pm_print_active_wakeup_sources(void
 }
 EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
+static bool abort_suspend;
+
 /**
  * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
@@ -712,6 +714,7 @@ bool pm_wakeup_pending(void)
 		ret = (cnt != saved_count || inpr > 0);
 		events_check_enabled = !ret;
 	}
+	ret = ret || abort_suspend;
 	spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
@@ -722,6 +725,23 @@ bool pm_wakeup_pending(void)
 	return ret;
 }
 
+void pm_system_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	abort_suspend = true;
+	spin_unlock_irqrestore(&events_lock, flags);
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	spin_lock_irq(&events_lock);
+	abort_suspend = false;
+	spin_unlock_irq(&events_lock);
+}
+
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
Index: linux-pm/drivers/base/power/main.c
===================================================================
--- linux-pm.orig/drivers/base/power/main.c
+++ linux-pm/drivers/base/power/main.c
@@ -1648,6 +1648,7 @@ int dpm_suspend_start(pm_message_t state
 {
 	int error;
 
+	pm_wakeup_clear();
 	error = dpm_prepare(state);
 	if (error) {
 		suspend_stats.failed_prepare++;
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/syscore_ops.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -33,10 +34,7 @@ void suspend_device_irqs(void)
 		__disable_irq(desc, irq, true);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
-
-	for_each_irq_desc(irq, desc)
-		if (desc->istate & IRQS_SUSPENDED)
-			synchronize_irq(irq);
+	set_irqs_suspended(true);
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
@@ -90,6 +88,7 @@ device_initcall(irq_pm_init_ops);
  */
 void resume_device_irqs(void)
 {
+	set_irqs_suspended(false);
 	resume_irqs(false);
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
@@ -102,6 +101,9 @@ int check_wakeup_irqs(void)
 	struct irq_desc *desc;
 	int irq;
 
+	if (pm_wakeup_pending())
+		return -EBUSY;
+
 	for_each_irq_desc(irq, desc) {
 		/*
 		 * Only interrupts which are marked as wakeup source
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -194,3 +194,5 @@ static inline void kstat_incr_irqs_this_
 	__this_cpu_inc(*desc->kstat_irqs);
 	__this_cpu_inc(kstat.irqs_sum);
 }
+
+extern void set_irqs_suspended(bool on);


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn
  2014-08-03 13:42                                                       ` Rafael J. Wysocki
@ 2014-08-04  3:38                                                         ` Rafael J. Wysocki
  0 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-04  3:38 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Sunday, August 03, 2014 03:42:49 PM Rafael J. Wysocki wrote:
> On Saturday, August 02, 2014 03:31:01 AM Rafael J. Wysocki wrote:
> > On Friday, August 01, 2014 04:29:40 PM Rafael J. Wysocki wrote:
> > > On Friday, August 01, 2014 03:43:21 PM Thomas Gleixner wrote:
> > > > On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > > > > OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> > > > > IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> > > > > handle_irq_event_percpu(), then using a special return code would save us
> > > > > a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> > > > > immediately then.
> > > > 
> > > > We can handle it at the end of the function by calling
> > > > note_interrupt() unconditionally do the following there:
> > > > 
> > > >       if (suspended) {
> > > >       	 if (ret == IRQ_NONE) {
> > > > 	    if (shared)
> > > > 	       yell_and_abort_or_resume();
> > > >          } else {
> > > > 	    abort_or_resume();
> > > >          }
> > > >       }
> > > >       if (noirqdebug)
> > > >       	 return;
> > > 
> > > I see.
> > > 
> > > > > OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.
> > > > 
> > > > Definitely not :)
> > > > 
> > > > > Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> > > > > 
> > > > > In suspend_device_irqs():
> > > > > 
> > > > > (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
> > > > >     keep the current behavior.
> > > > > (2) If the actions have different settings:
> > > > >     - Actions with IRQF_NO_SUSPEND set are not modified.
> > > > >     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
> > > > >     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.
> > > > 
> > > > Can we please do that in setup_irq() and let the shared ones always
> > > > run through the stub? That keeps suspend/resume_device_irqs() simple.
> > > 
> > > OK
> > 
> > I've tried to do that, but encountered a problem.  The stub handler is called
> > with irq, dev_id as arguments and for the "interrupts are not suspended" case
> > (common) it has to use irq to find the irq_desc and then it has to use dev_id
> > to find the irqaction to call the original handler from there.  That seemed to
> > be a bit too much of a penalty to me.  Especially for people who never suspend
> > their machines. :-)
> > 
> > For this reason, I went for changing handlers in suspend_device_irqs() and
> > back in resume_device_irqs().  That's not terribly complicated (the restoration
> > in particular is pretty simple) and it should be easily reusable for the wakeup
> > interrupts case.  resume_device_irqs() wouldn't need any more changes for that,
> > for example.  It minimally affects systems that don't suspend too.
> > 
> > I also ended up adding a new interrupt handler return code (IRQ_SUSPENDED).
> > I could add a new irq_desc flag instead, but then the new code in suspend_device_irqs()
> > and the new check in note_interrupt() would need to be slightly more complicated.
> 
> Actually, if we have a global irqs_suspended state variable, it will be much
> simpler to handle wakeup interrupts going forward, because in that case we'll
> be able to keep their original interrupt handlers and IRQ_HANDLED from them
> will be interpreted as a wakeup event automatically.

I realized that with this, suspend_device_irqs() will be racy, because it
switches handlers and modifies irqs_suspended at different times then.
I'm not sure how much of a problem that would be, but it's better to
ensure that both the handler and note_interrupt() will be "switched" at
the same time, which means addin a wrapper handler at the __setup_irq()
time, after all.

To avoid the problem mentioned above, I'm using dev_id to store the action's
address and an additional "real dev_id" field to store the original value.
This version seems to work.

Rafael

---
 drivers/base/power/main.c   |    1 
 drivers/base/power/wakeup.c |   20 ++++++++++++++++
 include/linux/interrupt.h   |    4 +++
 include/linux/irqdesc.h     |    1 
 include/linux/suspend.h     |    3 ++
 kernel/irq/handle.c         |    3 --
 kernel/irq/internals.h      |    3 ++
 kernel/irq/manage.c         |   53 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/irq/pm.c             |   10 ++++----
 kernel/irq/spurious.c       |   13 ++++++++++
 10 files changed, 103 insertions(+), 8 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -385,7 +385,7 @@ setup_affinity(unsigned int irq, struct
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
+		if (!desc->action || desc->skip_suspend)
 			return;
 		desc->istate |= IRQS_SUSPENDED;
 	}
@@ -658,6 +658,44 @@ int irq_set_parent(int irq, int parent_i
 }
 #endif
 
+bool irqs_suspended __read_mostly;
+
+void set_irqs_suspended(bool on)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	irqs_suspended = on;
+	for_each_irq_desc(irq, desc)
+		synchronize_irq(irq);
+}
+
+/*
+ * Wrapper handler for shared interrupts.
+ */
+static irqreturn_t irq_shared_wrapper_handler(int irq, void *dev_id)
+{
+	struct irqaction *action;
+
+	if (unlikely(irqs_suspended))
+		return IRQ_NONE;
+
+	action = dev_id;
+	return action->s_handler(irq, action->s_dev_id);
+}
+
+static void irq_shared_wrap_handler(struct irqaction *action)
+{
+	if (action->handler == irq_shared_wrapper_handler ||
+	    (action->flags & IRQF_NO_SUSPEND))
+		return;
+
+	action->s_handler = action->handler;
+	action->handler = irq_shared_wrapper_handler;
+	action->s_dev_id = action->dev_id;
+	action->dev_id = action;
+}
+
 /*
  * Default primary interrupt handler for threaded interrupts. Is
  * assigned as primary handler when request_threaded_irq is called
@@ -1088,6 +1126,7 @@ __setup_irq(unsigned int irq, struct irq
 
 		/* add new interrupt at end of irq queue */
 		do {
+			irq_shared_wrap_handler(old);
 			/*
 			 * Or all existing action->thread_mask bits,
 			 * so we can find the next zero bit for this
@@ -1097,6 +1136,7 @@ __setup_irq(unsigned int irq, struct irq
 			old_ptr = &old->next;
 			old = *old_ptr;
 		} while (old);
+		irq_shared_wrap_handler(new);
 		shared = 1;
 	}
 
@@ -1222,6 +1262,9 @@ __setup_irq(unsigned int irq, struct irq
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 
+	if (new->flags & IRQF_NO_SUSPEND)
+		desc->skip_suspend++;
+
 	/*
 	 * Check whether we disabled the irq via the spurious handler
 	 * before. Reenable it and give it another chance.
@@ -1328,13 +1371,19 @@ static struct irqaction *__free_irq(unsi
 			return NULL;
 		}
 
-		if (action->dev_id == dev_id)
+		if (action->dev_id == dev_id || action->s_dev_id == dev_id)
 			break;
 		action_ptr = &action->next;
 	}
 
 	/* Found it - now remove it from the list of entries: */
 	*action_ptr = action->next;
+	if (action->flags & IRQF_NO_SUSPEND) {
+		desc->skip_suspend--;
+	} else if (action->handler == irq_shared_wrapper_handler) {
+		action->handler = action->s_handler;
+		action->dev_id = action->s_dev_id;
+	}
 
 	/* If this was the last handler, shut down the IRQ line: */
 	if (!desc->action) {
Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -90,8 +90,10 @@ typedef irqreturn_t (*irq_handler_t)(int
 /**
  * struct irqaction - per interrupt action descriptor
  * @handler:	interrupt handler function
+ * @s_handler:	interrupt handler function for shared interrupts
  * @name:	name of the device
  * @dev_id:	cookie to identify the device
+ * @s_dev_id:	cookie to identify the device for shared interrupts
  * @percpu_dev_id:	cookie to identify the device
  * @next:	pointer to the next irqaction for shared interrupts
  * @irq:	interrupt number
@@ -105,6 +107,8 @@ typedef irqreturn_t (*irq_handler_t)(int
 struct irqaction {
 	irq_handler_t		handler;
 	void			*dev_id;
+	irq_handler_t		s_handler;
+	void			*s_dev_id;
 	void __percpu		*percpu_dev_id;
 	struct irqaction	*next;
 	irq_handler_t		thread_fn;
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -175,8 +175,7 @@ handle_irq_event_percpu(struct irq_desc
 
 	add_interrupt_randomness(irq, flags);
 
-	if (!noirqdebug)
-		note_interrupt(irq, desc, retval);
+	note_interrupt(irq, desc, retval);
 	return retval;
 }
 
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
 #include <linux/timer.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -275,6 +276,18 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
+	if (unlikely(irqs_suspended && action_ret == IRQ_NONE)) {
+		pr_err_ratelimited("IRQ %d: Unhandled while suspended\n", irq);
+		desc->istate |= IRQS_SUSPENDED;
+		desc->depth++;
+		irq_disable(desc);
+		pm_system_wakeup();
+		return;
+	}
+
+	if (noirqdebug)
+		return;
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
Index: linux-pm/include/linux/suspend.h
===================================================================
--- linux-pm.orig/include/linux/suspend.h
+++ linux-pm/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,7 @@ static inline int unregister_pm_notifier
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
Index: linux-pm/drivers/base/power/wakeup.c
===================================================================
--- linux-pm.orig/drivers/base/power/wakeup.c
+++ linux-pm/drivers/base/power/wakeup.c
@@ -691,6 +691,8 @@ void pm_print_active_wakeup_sources(void
 }
 EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
+static bool abort_suspend;
+
 /**
  * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
@@ -712,6 +714,7 @@ bool pm_wakeup_pending(void)
 		ret = (cnt != saved_count || inpr > 0);
 		events_check_enabled = !ret;
 	}
+	ret = ret || abort_suspend;
 	spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
@@ -722,6 +725,23 @@ bool pm_wakeup_pending(void)
 	return ret;
 }
 
+void pm_system_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	abort_suspend = true;
+	spin_unlock_irqrestore(&events_lock, flags);
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	spin_lock_irq(&events_lock);
+	abort_suspend = false;
+	spin_unlock_irq(&events_lock);
+}
+
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
Index: linux-pm/drivers/base/power/main.c
===================================================================
--- linux-pm.orig/drivers/base/power/main.c
+++ linux-pm/drivers/base/power/main.c
@@ -1648,6 +1648,7 @@ int dpm_suspend_start(pm_message_t state
 {
 	int error;
 
+	pm_wakeup_clear();
 	error = dpm_prepare(state);
 	if (error) {
 		suspend_stats.failed_prepare++;
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/syscore_ops.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -33,10 +34,7 @@ void suspend_device_irqs(void)
 		__disable_irq(desc, irq, true);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
-
-	for_each_irq_desc(irq, desc)
-		if (desc->istate & IRQS_SUSPENDED)
-			synchronize_irq(irq);
+	set_irqs_suspended(true);
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
@@ -90,6 +88,7 @@ device_initcall(irq_pm_init_ops);
  */
 void resume_device_irqs(void)
 {
+	set_irqs_suspended(false);
 	resume_irqs(false);
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
@@ -102,6 +101,9 @@ int check_wakeup_irqs(void)
 	struct irq_desc *desc;
 	int irq;
 
+	if (pm_wakeup_pending())
+		return -EBUSY;
+
 	for_each_irq_desc(irq, desc) {
 		/*
 		 * Only interrupts which are marked as wakeup source
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -17,6 +17,9 @@
 #define istate core_internal_state__do_not_mess_with_it
 
 extern bool noirqdebug;
+extern bool irqs_suspended;
+
+extern void set_irqs_suspended(bool on);
 
 /*
  * Bits used by threaded handlers:
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -51,6 +51,7 @@ struct irq_desc {
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		skip_suspend;
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup
  2014-08-01 14:29                                                   ` Rafael J. Wysocki
  2014-08-02  1:31                                                     ` Rafael J. Wysocki
@ 2014-08-05 15:22                                                     ` Rafael J. Wysocki
  2014-08-05 15:24                                                       ` [PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally Rafael J. Wysocki
                                                                         ` (6 more replies)
  1 sibling, 7 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 15:22 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 01, 2014 04:29:40 PM Rafael J. Wysocki wrote:
> On Friday, August 01, 2014 03:43:21 PM Thomas Gleixner wrote:
> > On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > > OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> > > IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> > > handle_irq_event_percpu(), then using a special return code would save us
> > > a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> > > immediately then.
> > 
> > We can handle it at the end of the function by calling
> > note_interrupt() unconditionally do the following there:
> > 
> >       if (suspended) {
> >       	 if (ret == IRQ_NONE) {
> > 	    if (shared)
> > 	       yell_and_abort_or_resume();
> >          } else {
> > 	    abort_or_resume();
> >          }
> >       }
> >       if (noirqdebug)
> >       	 return;
> 
> I see.
> 
> > > OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.
> > 
> > Definitely not :)
> > 
> > > Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> > > 
> > > In suspend_device_irqs():
> > > 
> > > (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
> > >     keep the current behavior.
> > > (2) If the actions have different settings:
> > >     - Actions with IRQF_NO_SUSPEND set are not modified.
> > >     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
> > >     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.
> > 
> > Can we please do that in setup_irq() and let the shared ones always
> > run through the stub? That keeps suspend/resume_device_irqs() simple.
> 
> OK

Here's a patch series based on what we talked about.

[1/5] Mechanism to wake up the system or abort suspend in progress automatically.
[2/5] Fix for shared IRQs vs IRQF_NO_SUSPEND (with wakeup in mind).
[3/5] Wakeup interrupts support for suspend-to-idle.
[4/5] Set IRQCHIP_SKIP_SET_WAKE for x86 IOAPIC IRQ chips.
[5/5] Make PCIe PME wake up from suspend to idle.

All tested on MSI Wind that has a couple of issues being addressed.

Rafael


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
@ 2014-08-05 15:24                                                       ` Rafael J. Wysocki
  2014-08-05 23:29                                                         ` [Update][PATCH " Rafael J. Wysocki
  2014-08-05 15:25                                                       ` [PATCH 2/5] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts Rafael J. Wysocki
                                                                         ` (5 subsequent siblings)
  6 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 15:24 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

It sometimes may be necessary to abort a system suspend in
progress or wake up the system from suspend-to-idle even if the
pm_wakeup_event()/pm_stay_awake() mechanism is not enabled.

For this purpose, introduce a new global variable abort_suspend and
make pm_wakeup_pending() check its value.  Also add routines for
manipulating that variable.

This is going to be used in subsequent changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c |   20 ++++++++++++++++++++
 include/linux/suspend.h     |    3 +++
 kernel/power/process.c      |    1 +
 3 files changed, 24 insertions(+)

Index: linux-pm/include/linux/suspend.h
===================================================================
--- linux-pm.orig/include/linux/suspend.h
+++ linux-pm/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,7 @@ static inline int unregister_pm_notifier
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
Index: linux-pm/drivers/base/power/wakeup.c
===================================================================
--- linux-pm.orig/drivers/base/power/wakeup.c
+++ linux-pm/drivers/base/power/wakeup.c
@@ -691,6 +691,8 @@ void pm_print_active_wakeup_sources(void
 }
 EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
+static bool abort_suspend;
+
 /**
  * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
@@ -712,6 +714,7 @@ bool pm_wakeup_pending(void)
 		ret = (cnt != saved_count || inpr > 0);
 		events_check_enabled = !ret;
 	}
+	ret = ret || abort_suspend;
 	spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
@@ -722,6 +725,23 @@ bool pm_wakeup_pending(void)
 	return ret;
 }
 
+void pm_system_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	abort_suspend = true;
+	spin_unlock_irqrestore(&events_lock, flags);
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	spin_lock_irq(&events_lock);
+	abort_suspend = false;
+	spin_unlock_irq(&events_lock);
+}
+
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
Index: linux-pm/kernel/power/process.c
===================================================================
--- linux-pm.orig/kernel/power/process.c
+++ linux-pm/kernel/power/process.c
@@ -129,6 +129,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
+	pm_wakeup_clear();
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 2/5] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
  2014-08-05 15:24                                                       ` [PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally Rafael J. Wysocki
@ 2014-08-05 15:25                                                       ` Rafael J. Wysocki
  2014-08-05 15:26                                                       ` [PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle Rafael J. Wysocki
                                                                         ` (4 subsequent siblings)
  6 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 15:25 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Since __disable_irq() only checks IRQF_NO_SUSPEND for the first
irqaction in a given irq_desc, that value of that bit for the
first irqaction affects all of the other irqactions sharing the
interrupt with it.  This is problematic in two cases.

First, if IRQF_NO_SUSPEND is set in the first irqaction and unset
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
unset will be invoked after suspend_device_irqs() has returned even
though they are not supposed to.  That shouldn't be a problem if
those interrupt handlers are implemented correctly and the
corresponding devices are properly suspended, but it may lead to
functional issues otherwise.

Second, if IRQF_NO_SUSPEND is unset in the first irqaction and set
in at least one of the other irqactions sharing the same interrupt,
the interrupt handlers in the other irqactions with IRQF_NO_SUSPEND
set will not be invoked after suspend_device_irqs() has returned, but
they are supposed to be invoked then.  That may be a problem if the
corresponding devices are wakeup devices or if their interrupts have
to be properly handled during system suspend/resume too for other
reasons, which is the case for timer interrupts or the ACPI SCI for
example.

To address this, introduce a wrapper interrupt handler for shared
interrupts, irq_shared_wrapper_handler(), and make __setup_irq()
use it for shared interrupts by default.  That wrapper handler
simply invokes the original handler supplied while requesting the
IRQ if the (new) irqs_suspended global variable is unset.  Otherwise
it returns IRQ_NONE.  The wrapper handler is used for all irqactions
sharing IRQs except for irqactions with IRQF_NO_SUSPEND set that
retain their original handlers.

The irqs_suspended variable is set by suspend_device_irqs() and
unset by resume_device_irqs().  It is checked by note_interrupt()
(now called unconditionally) and if set, any unhandled interrupt
will cause the system to wake up (or system suspend in progress
to be aborted).

The additional (new) skip_suspend field in struct irq_desc is
used to track if any irqactions in the given desc have
IRQF_NO_SUSPEND set.  It is used by suspend_device_irqs() to
decide which IRQs to disable during system suspends.  This way,
all IRQs with at least one irqaction having IRQF_NO_SUSPEND set
(skip_suspend greater than 0) will stay enabled after
suspend_device_irqs(), but if any irqactions belonging to them
have IRQF_NO_SUSPEND unset, they will use the wrapper handler
that will return IRQ_NONE for them (causing system suspend to
be aborted or system wakeup from suspend-to-idle to happen).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/interrupt.h |    4 +++
 include/linux/irqdesc.h   |    1 
 kernel/irq/handle.c       |    3 --
 kernel/irq/internals.h    |    3 ++
 kernel/irq/manage.c       |   52 ++++++++++++++++++++++++++++++++++++++++++++--
 kernel/irq/pm.c           |   10 +++++---
 kernel/irq/spurious.c     |   13 +++++++++++
 7 files changed, 78 insertions(+), 8 deletions(-)

Index: linux-pm/kernel/irq/manage.c
===================================================================
--- linux-pm.orig/kernel/irq/manage.c
+++ linux-pm/kernel/irq/manage.c
@@ -385,7 +385,7 @@ setup_affinity(unsigned int irq, struct
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
+		if (!desc->action || desc->skip_suspend)
 			return;
 		desc->istate |= IRQS_SUSPENDED;
 	}
@@ -658,6 +658,43 @@ int irq_set_parent(int irq, int parent_i
 }
 #endif
 
+bool irqs_suspended __read_mostly;
+
+void set_irqs_suspended(bool on)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	irqs_suspended = on;
+	for_each_irq_desc(irq, desc)
+		synchronize_irq(irq);
+}
+
+/*
+ * Wrapper handler for shared interrupts.
+ */
+static irqreturn_t irq_shared_wrapper_handler(int irq, void *dev_id)
+{
+	struct irqaction *action = dev_id;
+
+	if (unlikely(irqs_suspended))
+		return IRQ_NONE;
+
+	return action->s_handler(irq, action->s_dev_id);
+}
+
+static void irq_shared_wrap_handler(struct irqaction *action)
+{
+	if (action->handler == irq_shared_wrapper_handler ||
+	    (action->flags & IRQF_NO_SUSPEND))
+		return;
+
+	action->s_handler = action->handler;
+	action->handler = irq_shared_wrapper_handler;
+	action->s_dev_id = action->dev_id;
+	action->dev_id = action;
+}
+
 /*
  * Default primary interrupt handler for threaded interrupts. Is
  * assigned as primary handler when request_threaded_irq is called
@@ -1088,6 +1125,7 @@ __setup_irq(unsigned int irq, struct irq
 
 		/* add new interrupt at end of irq queue */
 		do {
+			irq_shared_wrap_handler(old);
 			/*
 			 * Or all existing action->thread_mask bits,
 			 * so we can find the next zero bit for this
@@ -1097,6 +1135,7 @@ __setup_irq(unsigned int irq, struct irq
 			old_ptr = &old->next;
 			old = *old_ptr;
 		} while (old);
+		irq_shared_wrap_handler(new);
 		shared = 1;
 	}
 
@@ -1222,6 +1261,9 @@ __setup_irq(unsigned int irq, struct irq
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 
+	if (new->flags & IRQF_NO_SUSPEND)
+		desc->skip_suspend++;
+
 	/*
 	 * Check whether we disabled the irq via the spurious handler
 	 * before. Reenable it and give it another chance.
@@ -1328,13 +1370,19 @@ static struct irqaction *__free_irq(unsi
 			return NULL;
 		}
 
-		if (action->dev_id == dev_id)
+		if (action->dev_id == dev_id || action->s_dev_id == dev_id)
 			break;
 		action_ptr = &action->next;
 	}
 
 	/* Found it - now remove it from the list of entries: */
 	*action_ptr = action->next;
+	if (action->flags & IRQF_NO_SUSPEND) {
+		desc->skip_suspend--;
+	} else if (action->handler == irq_shared_wrapper_handler) {
+		action->handler = action->s_handler;
+		action->dev_id = action->s_dev_id;
+	}
 
 	/* If this was the last handler, shut down the IRQ line: */
 	if (!desc->action) {
Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -90,8 +90,10 @@ typedef irqreturn_t (*irq_handler_t)(int
 /**
  * struct irqaction - per interrupt action descriptor
  * @handler:	interrupt handler function
+ * @s_handler:	interrupt handler function for shared interrupts
  * @name:	name of the device
  * @dev_id:	cookie to identify the device
+ * @s_dev_id:	cookie to identify the device for shared interrupts
  * @percpu_dev_id:	cookie to identify the device
  * @next:	pointer to the next irqaction for shared interrupts
  * @irq:	interrupt number
@@ -105,6 +107,8 @@ typedef irqreturn_t (*irq_handler_t)(int
 struct irqaction {
 	irq_handler_t		handler;
 	void			*dev_id;
+	irq_handler_t		s_handler;
+	void			*s_dev_id;
 	void __percpu		*percpu_dev_id;
 	struct irqaction	*next;
 	irq_handler_t		thread_fn;
Index: linux-pm/kernel/irq/handle.c
===================================================================
--- linux-pm.orig/kernel/irq/handle.c
+++ linux-pm/kernel/irq/handle.c
@@ -175,8 +175,7 @@ handle_irq_event_percpu(struct irq_desc
 
 	add_interrupt_randomness(irq, flags);
 
-	if (!noirqdebug)
-		note_interrupt(irq, desc, retval);
+	note_interrupt(irq, desc, retval);
 	return retval;
 }
 
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
 #include <linux/timer.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -275,6 +276,18 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
+	if (unlikely(irqs_suspended && action_ret == IRQ_NONE)) {
+		pr_err("IRQ %d: Unhandled while suspended\n", irq);
+		desc->istate |= IRQS_SUSPENDED;
+		desc->depth++;
+		irq_disable(desc);
+		pm_system_wakeup();
+		return;
+	}
+
+	if (noirqdebug)
+		return;
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/syscore_ops.h>
+#include <linux/suspend.h>
 
 #include "internals.h"
 
@@ -33,10 +34,7 @@ void suspend_device_irqs(void)
 		__disable_irq(desc, irq, true);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
-
-	for_each_irq_desc(irq, desc)
-		if (desc->istate & IRQS_SUSPENDED)
-			synchronize_irq(irq);
+	set_irqs_suspended(true);
 }
 EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
@@ -90,6 +88,7 @@ device_initcall(irq_pm_init_ops);
  */
 void resume_device_irqs(void)
 {
+	set_irqs_suspended(false);
 	resume_irqs(false);
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
@@ -102,6 +101,9 @@ int check_wakeup_irqs(void)
 	struct irq_desc *desc;
 	int irq;
 
+	if (pm_wakeup_pending())
+		return -EBUSY;
+
 	for_each_irq_desc(irq, desc) {
 		/*
 		 * Only interrupts which are marked as wakeup source
Index: linux-pm/kernel/irq/internals.h
===================================================================
--- linux-pm.orig/kernel/irq/internals.h
+++ linux-pm/kernel/irq/internals.h
@@ -17,6 +17,9 @@
 #define istate core_internal_state__do_not_mess_with_it
 
 extern bool noirqdebug;
+extern bool irqs_suspended;
+
+extern void set_irqs_suspended(bool on);
 
 /*
  * Bits used by threaded handlers:
Index: linux-pm/include/linux/irqdesc.h
===================================================================
--- linux-pm.orig/include/linux/irqdesc.h
+++ linux-pm/include/linux/irqdesc.h
@@ -51,6 +51,7 @@ struct irq_desc {
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		skip_suspend;
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
  2014-08-05 15:24                                                       ` [PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally Rafael J. Wysocki
  2014-08-05 15:25                                                       ` [PATCH 2/5] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts Rafael J. Wysocki
@ 2014-08-05 15:26                                                       ` Rafael J. Wysocki
  2014-08-08  1:58                                                         ` [Update][PATCH " Rafael J. Wysocki
  2014-08-05 15:27                                                       ` [PATCH 4/5] x86 / PM: Set IRQCHIP_SKIP_SET_WAKE for IOAPIC IRQ chip objects Rafael J. Wysocki
                                                                         ` (3 subsequent siblings)
  6 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 15:26 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Make IRQs enabled for system wakeup via enable_irq_wake() wake up
the system from suspend-to-idle.

For this purpose, introduce a new routine for enabling or disabling
wakeup interrupts, set_wakeup_irqs(), and make freeze_enter() call
it to enable them before starting the suspend-to-idle loop and to
disable them after the loop has been terminated.

Also make note_interrupt() check the wakeup status of the IRQ when
irqs_suspended is set and wake up the system (or abort system
suspend in progress) for wakeup interrupts as well as for unhandled
ones.  This is necessary to handle wakeup IRQs that aren't shared,
because their handlers may still return IRQ_HANDLED when
irqs_suspended is set.

Thus callers of enable_irq_wake() still need to be prepared for
handling interrupts from the given IRQ during the entire system
suspend and resume, but it is not guaranteed that those handlers
will be invoked every time between suspend_device_irqs() and
resume_device_irqs() (the core may simply wake up the system or
abort system suspend in progress without invoking original
interrupt handlers for the IRQ).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/interrupt.h |    1 +
 kernel/irq/pm.c           |   25 +++++++++++++++++++++++++
 kernel/irq/spurious.c     |    7 +++++--
 kernel/power/suspend.c    |    3 +++
 4 files changed, 34 insertions(+), 2 deletions(-)

Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -197,6 +197,7 @@ extern void irq_wake_thread(unsigned int
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
+extern void set_wakeup_irqs(bool enable);
 #ifdef CONFIG_PM_SLEEP
 extern int check_wakeup_irqs(void);
 #else
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -130,3 +130,28 @@ int check_wakeup_irqs(void)
 
 	return 0;
 }
+
+void set_wakeup_irqs(bool enable)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc) {
+		unsigned long flags;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		if (desc->action && irqd_is_wakeup_set(&desc->irq_data) &&
+		    !desc->skip_suspend) {
+			if (enable) {
+				desc->istate &= ~IRQS_SUSPENDED;
+				__enable_irq(desc, irq, false);
+			} else if (!(desc->istate & IRQS_SUSPENDED)) {
+				__disable_irq(desc, irq, false);
+				desc->istate |= IRQS_SUSPENDED;
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
Index: linux-pm/kernel/power/suspend.c
===================================================================
--- linux-pm.orig/kernel/power/suspend.c
+++ linux-pm/kernel/power/suspend.c
@@ -12,6 +12,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
@@ -55,7 +56,9 @@ static void freeze_enter(void)
 {
 	cpuidle_use_deepest_state(true);
 	cpuidle_resume();
+	set_wakeup_irqs(true);
 	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
+	set_wakeup_irqs(false);
 	cpuidle_pause();
 	cpuidle_use_deepest_state(false);
 }
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -276,8 +276,11 @@ try_misrouted_irq(unsigned int irq, stru
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
-	if (unlikely(irqs_suspended && action_ret == IRQ_NONE)) {
-		pr_err("IRQ %d: Unhandled while suspended\n", irq);
+	if (unlikely(irqs_suspended &&
+	    (action_ret == IRQ_NONE || irqd_is_wakeup_set(&desc->irq_data)))) {
+		if (!irqd_is_wakeup_set(&desc->irq_data))
+			pr_err("IRQ %d: Unhandled while suspended\n", irq);
+
 		desc->istate |= IRQS_SUSPENDED;
 		desc->depth++;
 		irq_disable(desc);


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 4/5] x86 / PM: Set IRQCHIP_SKIP_SET_WAKE for IOAPIC IRQ chip objects
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
                                                                         ` (2 preceding siblings ...)
  2014-08-05 15:26                                                       ` [PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle Rafael J. Wysocki
@ 2014-08-05 15:27                                                       ` Rafael J. Wysocki
  2014-08-05 15:28                                                       ` [PATCH 5/5] PCI / PM: Make PCIe PME interrupts wake up from suspend-to-idle Rafael J. Wysocki
                                                                         ` (2 subsequent siblings)
  6 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 15:27 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Set the IRQCHIP_SKIP_SET_WAKE for IOAPIC IRQ chip objects so that
interrupts from them can work as wakeup interrupts for suspend-to-idle.

After this change, running enable_irq_wake() on one of the IRQs in
question will succeed and IRQD_WAKEUP_STATE will be set for it, so
all of the suspend-to-idle wakeup mechanics introduced previously
will work for it automatically.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/apic/io_apic.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-pm/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-pm.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-pm/arch/x86/kernel/apic/io_apic.c
@@ -2507,6 +2507,7 @@ static struct irq_chip ioapic_chip __rea
 	.irq_eoi		= ack_apic_level,
 	.irq_set_affinity	= native_ioapic_set_affinity,
 	.irq_retrigger		= ioapic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
 static inline void init_IO_APIC_traps(void)
@@ -3030,6 +3031,7 @@ static struct irq_chip msi_chip = {
 	.irq_ack		= ack_apic_edge,
 	.irq_set_affinity	= msi_set_affinity,
 	.irq_retrigger		= ioapic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
@@ -3128,6 +3130,7 @@ static struct irq_chip dmar_msi_type = {
 	.irq_ack		= ack_apic_edge,
 	.irq_set_affinity	= dmar_msi_set_affinity,
 	.irq_retrigger		= ioapic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
 int arch_setup_dmar_msi(unsigned int irq)
@@ -3178,6 +3181,7 @@ static struct irq_chip hpet_msi_type = {
 	.irq_ack = ack_apic_edge,
 	.irq_set_affinity = hpet_msi_set_affinity,
 	.irq_retrigger = ioapic_retrigger_irq,
+	.flags = IRQCHIP_SKIP_SET_WAKE,
 };
 
 int default_setup_hpet_msi(unsigned int irq, unsigned int id)
@@ -3241,6 +3245,7 @@ static struct irq_chip ht_irq_chip = {
 	.irq_ack		= ack_apic_edge,
 	.irq_set_affinity	= ht_set_affinity,
 	.irq_retrigger		= ioapic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH 5/5] PCI / PM: Make PCIe PME interrupts wake up from suspend-to-idle
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
                                                                         ` (3 preceding siblings ...)
  2014-08-05 15:27                                                       ` [PATCH 4/5] x86 / PM: Set IRQCHIP_SKIP_SET_WAKE for IOAPIC IRQ chip objects Rafael J. Wysocki
@ 2014-08-05 15:28                                                       ` Rafael J. Wysocki
  2014-08-05 16:12                                                       ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Peter Zijlstra
  2014-08-08  2:09                                                       ` Rafael J. Wysocki
  6 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 15:28 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

To make PCIe PME interrupts wake up the system from suspend to idle,
make the PME driver use enable_irq_wake() on the IRQ during system
suspend (if there are any wakeup devices below the given PCIe port)
without disabling PME interrupts.  This way, an interrupt will still
trigger if a wakeup event happens and the system will be woken up (or
system suspend in progress will be aborted) by means of the new
mechanics introduced previously.

This change allows Wake-on-LAN to be used for wakeup from
suspend-to-idle on my MSI Wind tesbed netbook.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/pcie/pme.c |   61 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 10 deletions(-)

Index: linux-pm/drivers/pci/pcie/pme.c
===================================================================
--- linux-pm.orig/drivers/pci/pcie/pme.c
+++ linux-pm/drivers/pci/pcie/pme.c
@@ -41,11 +41,17 @@ static int __init pcie_pme_setup(char *s
 }
 __setup("pcie_pme=", pcie_pme_setup);
 
+enum pme_suspend_level {
+	PME_SUSPEND_NONE = 0,
+	PME_SUSPEND_WAKEUP,
+	PME_SUSPEND_NOIRQ,
+};
+
 struct pcie_pme_service_data {
 	spinlock_t lock;
 	struct pcie_device *srv;
 	struct work_struct work;
-	bool noirq; /* Don't enable the PME interrupt used by this service. */
+	enum pme_suspend_level suspend_level;
 };
 
 /**
@@ -223,7 +229,7 @@ static void pcie_pme_work_fn(struct work
 	spin_lock_irq(&data->lock);
 
 	for (;;) {
-		if (data->noirq)
+		if (data->suspend_level != PME_SUSPEND_NONE)
 			break;
 
 		pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
@@ -250,7 +256,7 @@ static void pcie_pme_work_fn(struct work
 		spin_lock_irq(&data->lock);
 	}
 
-	if (!data->noirq)
+	if (data->suspend_level == PME_SUSPEND_NONE)
 		pcie_pme_interrupt_enable(port, true);
 
 	spin_unlock_irq(&data->lock);
@@ -367,6 +373,21 @@ static int pcie_pme_probe(struct pcie_de
 	return ret;
 }
 
+static bool pcie_pme_check_wakeup(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	if (!bus)
+		return false;
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (device_may_wakeup(&dev->dev)
+		    || pcie_pme_check_wakeup(dev->subordinate))
+			return true;
+
+	return false;
+}
+
 /**
  * pcie_pme_suspend - Suspend PCIe PME service device.
  * @srv: PCIe service device to suspend.
@@ -375,11 +396,26 @@ static int pcie_pme_suspend(struct pcie_
 {
 	struct pcie_pme_service_data *data = get_service_data(srv);
 	struct pci_dev *port = srv->port;
+	bool wakeup;
 
+	if (device_may_wakeup(&port->dev)) {
+		wakeup = true;
+	} else {
+		down_read(&pci_bus_sem);
+		wakeup = pcie_pme_check_wakeup(port->subordinate);
+		up_read(&pci_bus_sem);
+	}
 	spin_lock_irq(&data->lock);
-	pcie_pme_interrupt_enable(port, false);
-	pcie_clear_root_pme_status(port);
-	data->noirq = true;
+	if (wakeup) {
+		enable_irq_wake(srv->irq);
+		data->suspend_level = PME_SUSPEND_WAKEUP;
+	} else {
+		struct pci_dev *port = srv->port;
+
+		pcie_pme_interrupt_enable(port, false);
+		pcie_clear_root_pme_status(port);
+		data->suspend_level = PME_SUSPEND_NOIRQ;
+	}
 	spin_unlock_irq(&data->lock);
 
 	synchronize_irq(srv->irq);
@@ -394,12 +430,17 @@ static int pcie_pme_suspend(struct pcie_
 static int pcie_pme_resume(struct pcie_device *srv)
 {
 	struct pcie_pme_service_data *data = get_service_data(srv);
-	struct pci_dev *port = srv->port;
 
 	spin_lock_irq(&data->lock);
-	data->noirq = false;
-	pcie_clear_root_pme_status(port);
-	pcie_pme_interrupt_enable(port, true);
+	if (data->suspend_level == PME_SUSPEND_NOIRQ) {
+		struct pci_dev *port = srv->port;
+
+		pcie_clear_root_pme_status(port);
+		pcie_pme_interrupt_enable(port, true);
+	} else {
+		disable_irq_wake(srv->irq);
+	}
+	data->suspend_level = PME_SUSPEND_NONE;
 	spin_unlock_irq(&data->lock);
 
 	return 0;


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
                                                                         ` (4 preceding siblings ...)
  2014-08-05 15:28                                                       ` [PATCH 5/5] PCI / PM: Make PCIe PME interrupts wake up from suspend-to-idle Rafael J. Wysocki
@ 2014-08-05 16:12                                                       ` Peter Zijlstra
  2014-08-08  2:09                                                       ` Rafael J. Wysocki
  6 siblings, 0 replies; 75+ messages in thread
From: Peter Zijlstra @ 2014-08-05 16:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Thomas Gleixner, linux-kernel, Linux PM list, Dmitry Torokhov

[-- Attachment #1: Type: text/plain, Size: 608 bytes --]

On Tue, Aug 05, 2014 at 05:22:57PM +0200, Rafael J. Wysocki wrote:
> Here's a patch series based on what we talked about.
> 
> [1/5] Mechanism to wake up the system or abort suspend in progress automatically.
> [2/5] Fix for shared IRQs vs IRQF_NO_SUSPEND (with wakeup in mind).
> [3/5] Wakeup interrupts support for suspend-to-idle.
> [4/5] Set IRQCHIP_SKIP_SET_WAKE for x86 IOAPIC IRQ chips.
> [5/5] Make PCIe PME wake up from suspend to idle.
> 
> All tested on MSI Wind that has a couple of issues being addressed.

I can confirm it works on my WSM-EP which has that shared interrupt
stuff.

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 75+ messages in thread

* [Update][PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally
  2014-08-05 15:24                                                       ` [PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally Rafael J. Wysocki
@ 2014-08-05 23:29                                                         ` Rafael J. Wysocki
  0 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-05 23:29 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Subject: PM / sleep: Mechanism for aborting system suspends unconditionally

It sometimes may be necessary to abort a system suspend in
progress or wake up the system from suspend-to-idle even if the
pm_wakeup_event()/pm_stay_awake() mechanism is not enabled.

For this purpose, introduce a new global variable abort_suspend and
make pm_wakeup_pending() check its value.  Also add routines for
manipulating that variable.

This is going to be used in subsequent changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

The empty stub pm_wakeup_clear() for CONFIG_PM_SLEEP unset was missing.

Rafael

---
 drivers/base/power/wakeup.c |   20 ++++++++++++++++++++
 include/linux/suspend.h     |    4 ++++
 kernel/power/process.c      |    1 +
 3 files changed, 25 insertions(+)

Index: linux-pm/include/linux/suspend.h
===================================================================
--- linux-pm.orig/include/linux/suspend.h
+++ linux-pm/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,8 @@ static inline int unregister_pm_notifier
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
+static inline void pm_wakeup_clear(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
Index: linux-pm/drivers/base/power/wakeup.c
===================================================================
--- linux-pm.orig/drivers/base/power/wakeup.c
+++ linux-pm/drivers/base/power/wakeup.c
@@ -691,6 +691,8 @@ void pm_print_active_wakeup_sources(void
 }
 EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
+static bool abort_suspend;
+
 /**
  * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
@@ -712,6 +714,7 @@ bool pm_wakeup_pending(void)
 		ret = (cnt != saved_count || inpr > 0);
 		events_check_enabled = !ret;
 	}
+	ret = ret || abort_suspend;
 	spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
@@ -722,6 +725,23 @@ bool pm_wakeup_pending(void)
 	return ret;
 }
 
+void pm_system_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	abort_suspend = true;
+	spin_unlock_irqrestore(&events_lock, flags);
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	spin_lock_irq(&events_lock);
+	abort_suspend = false;
+	spin_unlock_irq(&events_lock);
+}
+
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
Index: linux-pm/kernel/power/process.c
===================================================================
--- linux-pm.orig/kernel/power/process.c
+++ linux-pm/kernel/power/process.c
@@ -129,6 +129,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
+	pm_wakeup_clear();
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [Update][PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle
  2014-08-05 15:26                                                       ` [PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle Rafael J. Wysocki
@ 2014-08-08  1:58                                                         ` Rafael J. Wysocki
  2014-08-09  0:28                                                           ` Rafael J. Wysocki
  0 siblings, 1 reply; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-08  1:58 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Make IRQs enabled for system wakeup via enable_irq_wake() wake up
the system from suspend-to-idle.

For this purpose, introduce a new routine for enabling and disabling
wakeup interrupts, set_wakeup_irqs(), and make freeze_enter() call it
to enable them before starting the suspend-to-idle loop and to
disable them after that loop has been terminated.

When enabling an IRQ which is not a shared one, that routine
replaces its original handler with a stub one always returning
IRQ_NONE.  When disabling it, set_wakeup_irqs() restores the
original handler for it.  This way, IRQ_NONE is returned for all
of the wakeup interrupts during suspend-to-idle and that triggers
the abort-suspend-or-wakeup condition in note_interrupt() causing
the system to wake up.

To avoid losing wakeup events, make note_interrupt() mark wakeup
interrupts as pending before triggering wakeup for irqs_suspended
set.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

When I was working on the doc (that I'm going to send shortly), it
occured to me that it actually would be better to always return
IRQ_NONE from interrupt handlers for wakeup interrupts when
irqs_suspended is set - and mark them as pending to avoid losing
wakeup events.  That way they'll work uniformly, even if someone is
insane enough to use enable_irq_wake() on an IRQF_NO_SUSPEND IRQ.

Rafael

---
 include/linux/interrupt.h |    1 +
 kernel/irq/pm.c           |   45 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/spurious.c     |    6 +++++-
 kernel/power/suspend.c    |    3 +++
 4 files changed, 54 insertions(+), 1 deletion(-)

Index: linux-pm/include/linux/interrupt.h
===================================================================
--- linux-pm.orig/include/linux/interrupt.h
+++ linux-pm/include/linux/interrupt.h
@@ -197,6 +197,7 @@ extern void irq_wake_thread(unsigned int
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
+extern void set_wakeup_irqs(bool enable);
 #ifdef CONFIG_PM_SLEEP
 extern int check_wakeup_irqs(void);
 #else
Index: linux-pm/kernel/irq/pm.c
===================================================================
--- linux-pm.orig/kernel/irq/pm.c
+++ linux-pm/kernel/irq/pm.c
@@ -130,3 +130,48 @@ int check_wakeup_irqs(void)
 
 	return 0;
 }
+
+static irqreturn_t irq_pm_empty_handler(int irq, void *dev_id)
+{
+	return IRQ_NONE;
+}
+
+void set_wakeup_irqs(bool enable)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc) {
+		struct irqaction *action = desc->action;
+		unsigned long flags;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		if (action && irqd_is_wakeup_set(&desc->irq_data) &&
+		    !desc->skip_suspend) {
+			if (enable) {
+				/*
+				 * Replace handlers for not shared interrupts.
+				 * Shared ones have wrapper handlers already.
+				 */
+				if (!action->next) {
+					action->s_handler = action->handler;
+					action->handler = irq_pm_empty_handler;
+				}
+				desc->istate &= ~IRQS_SUSPENDED;
+				__enable_irq(desc, irq, false);
+			} else {
+				if (!(desc->istate & IRQS_SUSPENDED)) {
+					__disable_irq(desc, irq, false);
+					desc->istate |= IRQS_SUSPENDED;
+				}
+				if (action->handler == irq_pm_empty_handler) {
+					action->handler = action->s_handler;
+					action->s_handler = NULL;
+				}
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
Index: linux-pm/kernel/power/suspend.c
===================================================================
--- linux-pm.orig/kernel/power/suspend.c
+++ linux-pm/kernel/power/suspend.c
@@ -12,6 +12,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
@@ -55,7 +56,9 @@ static void freeze_enter(void)
 {
 	cpuidle_use_deepest_state(true);
 	cpuidle_resume();
+	set_wakeup_irqs(true);
 	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
+	set_wakeup_irqs(false);
 	cpuidle_pause();
 	cpuidle_use_deepest_state(false);
 }
Index: linux-pm/kernel/irq/spurious.c
===================================================================
--- linux-pm.orig/kernel/irq/spurious.c
+++ linux-pm/kernel/irq/spurious.c
@@ -277,7 +277,11 @@ void note_interrupt(unsigned int irq, st
 		    irqreturn_t action_ret)
 {
 	if (unlikely(irqs_suspended && action_ret == IRQ_NONE)) {
-		pr_err("IRQ %d: Unhandled while suspended\n", irq);
+		if (irqd_is_wakeup_set(&desc->irq_data))
+			desc->istate |= IRQS_PENDING;
+		else
+			pr_err("IRQ %d: Unhandled while suspended\n", irq);
+
 		desc->istate |= IRQS_SUSPENDED;
 		desc->depth++;
 		irq_disable(desc);


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup
  2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
                                                                         ` (5 preceding siblings ...)
  2014-08-05 16:12                                                       ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Peter Zijlstra
@ 2014-08-08  2:09                                                       ` Rafael J. Wysocki
  6 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-08  2:09 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Tuesday, August 05, 2014 05:22:57 PM Rafael J. Wysocki wrote:
> On Friday, August 01, 2014 04:29:40 PM Rafael J. Wysocki wrote:
> > On Friday, August 01, 2014 03:43:21 PM Thomas Gleixner wrote:
> > > On Fri, 1 Aug 2014, Rafael J. Wysocki wrote:
> > > > OK, I guess "IRQ_HANDLED from a wakeup interrupt" may be interpreted as
> > > > IRQ_HANDLED_PMWAKE.  On the other hand, if that's going to be handled in
> > > > handle_irq_event_percpu(), then using a special return code would save us
> > > > a brach for IRQ_HANDLED interrupts.  We could convert it to IRQ_HANDLED
> > > > immediately then.
> > > 
> > > We can handle it at the end of the function by calling
> > > note_interrupt() unconditionally do the following there:
> > > 
> > >       if (suspended) {
> > >       	 if (ret == IRQ_NONE) {
> > > 	    if (shared)
> > > 	       yell_and_abort_or_resume();
> > >          } else {
> > > 	    abort_or_resume();
> > >          }
> > >       }
> > >       if (noirqdebug)
> > >       	 return;
> > 
> > I see.
> > 
> > > > OK, I'll take a stab at the IRQF_SHARED thing if you don't mind.
> > > 
> > > Definitely not :)
> > > 
> > > > Here's my current understanding of what can be done for IRQF_NO_SUSPEND.
> > > > 
> > > > In suspend_device_irqs():
> > > > 
> > > > (1) If all actions in the list have the same setting (eg. IRQF_NO_SUSPEND unset),
> > > >     keep the current behavior.
> > > > (2) If the actions have different settings:
> > > >     - Actions with IRQF_NO_SUSPEND set are not modified.
> > > >     - Actions with IRQF_NO_SUSPEND unset are switched over to a stub handler.
> > > >     - IRQS_SUSPEND_MODE (new flag) is set for the IRQ.
> > > 
> > > Can we please do that in setup_irq() and let the shared ones always
> > > run through the stub? That keeps suspend/resume_device_irqs() simple.
> > 
> > OK
> 
> Here's a patch series based on what we talked about.
> 
> [1/5] Mechanism to wake up the system or abort suspend in progress automatically.
> [2/5] Fix for shared IRQs vs IRQF_NO_SUSPEND (with wakeup in mind).
> [3/5] Wakeup interrupts support for suspend-to-idle.
> [4/5] Set IRQCHIP_SKIP_SET_WAKE for x86 IOAPIC IRQ chips.
> [5/5] Make PCIe PME wake up from suspend to idle.
> 
> All tested on MSI Wind that has a couple of issues being addressed.

Below is the doc patch I promised.

I'm not signing it off yet, as I'm sending it for comments mostly rather than
as something actually done.

Of course, it documents the situation after the series of [1-5/5] (with the
updated [3/5] I've just sent), especially as far as the suspend-to-idle goes.

Rafael


---
 Documentation/power/suspend-and-interrupts.txt |  136 +++++++++++++++++++++++++
 1 file changed, 136 insertions(+)

Index: linux-pm/Documentation/power/suspend-and-interrupts.txt
===================================================================
--- /dev/null
+++ linux-pm/Documentation/power/suspend-and-interrupts.txt
@@ -0,0 +1,136 @@
+System Suspend and Device Interrupts
+
+Copyright (C) 2014 Intel Corp.
+Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+Suspending and Resuming Device IRQs
+-----------------------------------
+
+Device interrupt request lines (IRQs) are generally disabled during system
+suspend after the "late" phase of suspending devices (that is, after all of the
+->prepare, ->suspend and ->suspend_late callbacks have been executed for all
+devices).  That is done by the suspend_device_irqs() function.
+
+The rationale for doing so is that after the "late" phase of device suspend
+there is no legitimate reason why any interrupts from suspended devices should
+trigger and if any devices have not been suspended properly yet, it is better to
+block interrupts from them anyway.  Also in the past we had problems with
+interrupt handlers of devices that shared IRQs with other devices and were not
+prepared for interrupts triggering after their devices had been suspended.
+In those cases they would attempt to access, for example, memory address spaces
+of suspended devices and cause unpredictable behavior to ensue as a result.
+Unfortunately, such problems are very difficult to debug and the introduction
+of suspend_device_irqs(), along with the "noirq" phase of device suspend, was
+the only practical way to prevent them from happening.
+
+Device IRQs are re-enabled during system resume, right before the "early" phase
+of resuming devices (that is, before starting to execute ->resume_early
+callbacks for devices).  The function doing that is resume_device_irqs().
+
+
+The IRQF_NO_SUSPEND Flag
+------------------------
+
+There are interrupts that can legitimately trigger during the entire system
+suspend-resume cycle, including the "noirq" phases of suspending and resuming
+devices as well as during the time when nonboot CPUs are taken offline and
+brought back online.  That applies to timer interrupts in the first place,
+but also to IPIs and to some other special-purpose interrupts, such as the ACPI
+SCI that isn't associated with any specific device and is used for signaling
+many different types of events.
+
+The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when
+requesting a special-purpose interrupt.  It causes suspend_device_irqs() to
+leave the corresponding IRQ enabled so as to allow the interrupt to work all
+the time as expected.
+
+If that IRQ is shared, it will still be left enabled by suspend_device_irqs(),
+but the interrupt handlers that were installed for it without IRQF_NO_SUSPEND
+set will not be executed after suspend_device_irqs() has returned.  Then, the
+IRQ subsystem will only execute interrupt handlers for which IRQF_NO_SUSPEND is
+set.  In that case, if the final result of handing an interrupt is IRQ_NONE, the
+IRQ subsystem will assume that the interrupt came from one of the apparently
+suspended devices that share the IRQ with an IRQF_NO_SUSPEND interrupt source
+(or more of them).  As a result, the IRQ will be disabled and marked as
+"suspended" to prevent spurious interrupts from triggering going forward.
+Next
+(1) if a system suspend is in progress, it will be aborted or
+(2) if the system is in the "freeze" sleep state (suspend-to-idle), it will be
+    woken up,
+to allow the system to recover from that error condition gracefully.
+
+The IRQF_NO_SUSPEND flag should not be used when requesting system wakeup
+interrupts (that is, interrupts that are supposed to wake up the system from
+sleep states), unless there are other (that is, unrelated to system wakeup)
+reasons why those interrupts can legitimately trigger after suspend_device_irqs()
+has returned and before resume_device_irqs() is called during the subsequent
+system resume.
+
+
+System Wakeup Interrupts, enable_irq_wake() and disable_irq_wake()
+------------------------------------------------------------------
+
+System wakeup interrupts generally need to be configured to wake up the system
+from sleep states, especially if they are used for different purposes (e.g. as
+I/O interrupts) in the working state.
+
+That may involve turning on a special signal handling logic within the platform
+(such as an SoC) so that signals from a given line are routed in a different way
+during system sleep so as to trigger a system wakeup when needed.  For example,
+the platform may include a dedicated interrupt controller used specifically for
+handling system wakeup events.  Then, if a given interrupt line is supposed to
+wake up the system from sleep sates, the corresponding input of that interrupt
+controller needs to be enabled to handle signals from the line in question.
+After wakeup, it generally is better to disable that input to prevent the
+dedicated controller from triggering interrupts unnecessarily.
+
+The IRQ subsystem provides two helper functions to be used by device drivers for
+those purposes.  Namely, enable_irq_wake() turns on the platform's logic for
+handling the given IRQ as a system wakeup interrupt line and disable_irq_wake()
+turns that logic off.
+
+Calling enable_irq_wake() doesn't prevent the working-state logic for handling
+the given IRQ from being disabled by suspend_device_irqs(), so after the "late"
+phase of suspending devices the IRQ can only be expected to work as a system
+wakeup interrupt line.  The IRQ subsystem checks if there are any pending
+interrupts on those lines by calling check_wakeup_irqs() at the last (syscore)
+stage of full system suspend.  If there are any, it aborts the system suspend
+by returning -EBUSY from that function.
+
+
+System Wakeup Interrupts and Suspend-to-Idle
+--------------------------------------------
+
+Suspend-to-idle (also known as the "freeze" sleep state) is a relatively new
+system sleep state that works by idling all of the processors and waiting for
+interrupts right after the "noirq" phase of suspending devices.
+
+Of course, this means that all of the interrupts with IRQF_NO_SUSPEND set will
+bring CPUs out of idle while in that state, but they will not cause the IRQ
+subsystem to trigger a system wakeup, unless the final result of handling an
+interrupt is IRQ_NONE.
+
+System wakeup interrupts, in turn, are generally expected to trigger wakeup from
+system sleep states, including suspend-to-idle.  For this reason, all of the
+IRQs configured for system wakeup with enable_irq_wake(), previously disabled
+by suspend_device_irqs(), are re-enabled right before starting the final "go to
+an idle state and wait for an interrupt" loop of suspend-to-idle.  However, they
+are also reconfigured so that the original handlers associated with them will
+not be invoked at that time.  Those interrupt handlers are all temporarily
+replaced with stub handlers that always return IRQ_NONE.  That tells the IRQ
+subsystem to trigger wakeup from suspend-to-idle upon receiving one of those
+interrupts, which is analogous to how system wakeup from full suspend works
+(that is, system wakeup is triggered when there's a signal on one of the wakeup
+lines).
+
+The rationale for doing so is that, even if the original handlers for the
+wakeup interrupts were invoked (which is not the case for wakeup from full
+system suspend anyway), they wouldn't be able to access devices (presumably
+suspended at that point) and they would only be able to tell "wakeup!" to the
+IRQ core.  However, the core doesn't really need to ask them about that, as it
+can figure that out by itself well enough.
+
+When wakeup from suspend-to-idle is triggered, the wakeup IRQs are disabled
+again and their original behavior is restored.  They are subsequently re-enabled
+by resume_device_irqs(), as usual.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [Update][PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle
  2014-08-08  1:58                                                         ` [Update][PATCH " Rafael J. Wysocki
@ 2014-08-09  0:28                                                           ` Rafael J. Wysocki
  0 siblings, 0 replies; 75+ messages in thread
From: Rafael J. Wysocki @ 2014-08-09  0:28 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, linux-kernel, Linux PM list, Dmitry Torokhov

On Friday, August 08, 2014 03:58:02 AM Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> Make IRQs enabled for system wakeup via enable_irq_wake() wake up
> the system from suspend-to-idle.
> 
> For this purpose, introduce a new routine for enabling and disabling
> wakeup interrupts, set_wakeup_irqs(), and make freeze_enter() call it
> to enable them before starting the suspend-to-idle loop and to
> disable them after that loop has been terminated.
> 
> When enabling an IRQ which is not a shared one, that routine
> replaces its original handler with a stub one always returning
> IRQ_NONE.  When disabling it, set_wakeup_irqs() restores the
> original handler for it.  This way, IRQ_NONE is returned for all
> of the wakeup interrupts during suspend-to-idle and that triggers
> the abort-suspend-or-wakeup condition in note_interrupt() causing
> the system to wake up.

Actually, I should have added this to the changelog here:

The line of reasoning here is as follows.

The previous changeset established the rule that

  (1) Unhandled interrupts after suspend_device_irqs() abort system
      suspends in progress (or wake up the system from suspend-to-idle
      if already there).

The next two rules follow from how suspend_device_irqs() works
and from the existing code in check_wakeup_irqs(), called by
syscore_suspend():

  (2) Interrupt handlers are not invoked for wakeup interrupts
      after suspend_device_irqs().

  (3) System wakeup interrups after suspend_device_irqs() cause
      full system suspends to be aborted.

In addition to the above, there is the requirement that

  (4) System wakeup interrupts should wake up the system from
      suspend-to-idle.

It immediately follows from (2) and (3) that no effort is made to
distinguish "genuine" wakeup interrupts from "spurious" ones.  They
all are treated in the same way.  Since (4) means that "genuine"
wakeup interrupts are supposed to wake up the system from
suspend-to-idle too, consistency with (2) and (3) requires that
"spurious" wakeup interrupts should do that too.  That also is
consistent with rule (1) and implies that there is no reason to
invoke interrupt handlers for wakeup interrups after
suspend_device_irqs() in the suspend-to-idle case.  Moreover, doing
so would go against rule (2).


> To avoid losing wakeup events, make note_interrupt() mark wakeup
> interrupts as pending before triggering wakeup for irqs_suspended
> set.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
> 
> When I was working on the doc (that I'm going to send shortly), it
> occured to me that it actually would be better to always return
> IRQ_NONE from interrupt handlers for wakeup interrupts when
> irqs_suspended is set - and mark them as pending to avoid losing
> wakeup events.  That way they'll work uniformly, even if someone is
> insane enough to use enable_irq_wake() on an IRQF_NO_SUSPEND IRQ.
> 
> Rafael
> 
> ---
>  include/linux/interrupt.h |    1 +
>  kernel/irq/pm.c           |   45 +++++++++++++++++++++++++++++++++++++++++++++
>  kernel/irq/spurious.c     |    6 +++++-
>  kernel/power/suspend.c    |    3 +++
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> Index: linux-pm/include/linux/interrupt.h
> ===================================================================
> --- linux-pm.orig/include/linux/interrupt.h
> +++ linux-pm/include/linux/interrupt.h
> @@ -197,6 +197,7 @@ extern void irq_wake_thread(unsigned int
>  /* The following three functions are for the core kernel use only. */
>  extern void suspend_device_irqs(void);
>  extern void resume_device_irqs(void);
> +extern void set_wakeup_irqs(bool enable);
>  #ifdef CONFIG_PM_SLEEP
>  extern int check_wakeup_irqs(void);
>  #else
> Index: linux-pm/kernel/irq/pm.c
> ===================================================================
> --- linux-pm.orig/kernel/irq/pm.c
> +++ linux-pm/kernel/irq/pm.c
> @@ -130,3 +130,48 @@ int check_wakeup_irqs(void)
>  
>  	return 0;
>  }
> +
> +static irqreturn_t irq_pm_empty_handler(int irq, void *dev_id)
> +{
> +	return IRQ_NONE;
> +}
> +
> +void set_wakeup_irqs(bool enable)
> +{
> +	struct irq_desc *desc;
> +	int irq;
> +
> +	for_each_irq_desc(irq, desc) {
> +		struct irqaction *action = desc->action;
> +		unsigned long flags;
> +
> +		raw_spin_lock_irqsave(&desc->lock, flags);
> +
> +		if (action && irqd_is_wakeup_set(&desc->irq_data) &&
> +		    !desc->skip_suspend) {
> +			if (enable) {
> +				/*
> +				 * Replace handlers for not shared interrupts.
> +				 * Shared ones have wrapper handlers already.
> +				 */
> +				if (!action->next) {

And this check is too weak.  It should check action->s_handler against NULL too
or, alternatively, whether or not action->handler is the shared wrapper handler
already.

There are some changes I want to make to patch [2/5] to avoid compiling too
much code and adding overhead for CONFIG_PM_SLEEP unset, so I'll refresh the
whole series on Monday.  I'll add the doc to it too. :-)


> +					action->s_handler = action->handler;
> +					action->handler = irq_pm_empty_handler;
> +				}
> +				desc->istate &= ~IRQS_SUSPENDED;
> +				__enable_irq(desc, irq, false);
> +			} else {
> +				if (!(desc->istate & IRQS_SUSPENDED)) {
> +					__disable_irq(desc, irq, false);
> +					desc->istate |= IRQS_SUSPENDED;
> +				}
> +				if (action->handler == irq_pm_empty_handler) {
> +					action->handler = action->s_handler;
> +					action->s_handler = NULL;
> +				}
> +			}
> +		}
> +
> +		raw_spin_unlock_irqrestore(&desc->lock, flags);
> +	}
> +}
> Index: linux-pm/kernel/power/suspend.c
> ===================================================================
> --- linux-pm.orig/kernel/power/suspend.c
> +++ linux-pm/kernel/power/suspend.c
> @@ -12,6 +12,7 @@
>  #include <linux/delay.h>
>  #include <linux/errno.h>
>  #include <linux/init.h>
> +#include <linux/interrupt.h>
>  #include <linux/console.h>
>  #include <linux/cpu.h>
>  #include <linux/cpuidle.h>
> @@ -55,7 +56,9 @@ static void freeze_enter(void)
>  {
>  	cpuidle_use_deepest_state(true);
>  	cpuidle_resume();
> +	set_wakeup_irqs(true);
>  	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
> +	set_wakeup_irqs(false);
>  	cpuidle_pause();
>  	cpuidle_use_deepest_state(false);
>  }
> Index: linux-pm/kernel/irq/spurious.c
> ===================================================================
> --- linux-pm.orig/kernel/irq/spurious.c
> +++ linux-pm/kernel/irq/spurious.c
> @@ -277,7 +277,11 @@ void note_interrupt(unsigned int irq, st
>  		    irqreturn_t action_ret)
>  {
>  	if (unlikely(irqs_suspended && action_ret == IRQ_NONE)) {
> -		pr_err("IRQ %d: Unhandled while suspended\n", irq);
> +		if (irqd_is_wakeup_set(&desc->irq_data))
> +			desc->istate |= IRQS_PENDING;
> +		else
> +			pr_err("IRQ %d: Unhandled while suspended\n", irq);
> +
>  		desc->istate |= IRQS_SUSPENDED;
>  		desc->depth++;
>  		irq_disable(desc);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 75+ messages in thread

end of thread, other threads:[~2014-08-09  0:09 UTC | newest]

Thread overview: 75+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-24 21:26 [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Peter Zijlstra
2014-07-24 22:02 ` Rafael J. Wysocki
2014-07-24 23:10 ` Rafael J. Wysocki
2014-07-25  5:58   ` Peter Zijlstra
2014-07-29 19:20     ` Brian Norris
2014-07-29 19:28       ` Peter Zijlstra
2014-07-29 20:41         ` Brian Norris
2014-07-25  9:27   ` Thomas Gleixner
2014-07-25 12:49     ` Rafael J. Wysocki
2014-07-25 13:55       ` Thomas Gleixner
2014-07-25  9:40 ` Thomas Gleixner
2014-07-25 12:40   ` Peter Zijlstra
2014-07-25 13:25     ` Peter Zijlstra
2014-07-25 17:03       ` Rafael J. Wysocki
2014-07-25 16:58         ` Peter Zijlstra
2014-07-25 21:00         ` Thomas Gleixner
2014-07-25 22:25           ` Rafael J. Wysocki
2014-07-25 23:07             ` Rafael J. Wysocki
2014-07-26 11:49             ` Rafael J. Wysocki
2014-07-26 11:53               ` Rafael J. Wysocki
2014-07-28  6:49               ` Peter Zijlstra
2014-07-28 12:33                 ` Thomas Gleixner
2014-07-28 13:04                   ` Peter Zijlstra
2014-07-28 21:53                   ` Rafael J. Wysocki
2014-07-28 23:01                     ` Rafael J. Wysocki
2014-07-29 12:46                       ` Thomas Gleixner
2014-07-29 13:33                         ` Rafael J. Wysocki
2014-07-30 21:46                           ` [PATCH 0/3] irq / PM: wakeup interrupt interface for drivers (was: Re: [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED) Rafael J. Wysocki
2014-07-30 21:51                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Rafael J. Wysocki
2014-07-30 22:56                               ` Thomas Gleixner
2014-07-31  0:12                                 ` Thomas Gleixner
2014-07-31  2:14                                   ` Rafael J. Wysocki
2014-07-31 10:44                                     ` Thomas Gleixner
2014-07-31 18:36                                       ` Rafael J. Wysocki
2014-07-31 20:12                                         ` Alan Stern
2014-07-31 20:12                                           ` Alan Stern
2014-07-31 21:04                                           ` Rafael J. Wysocki
2014-07-31 23:41                                             ` Thomas Gleixner
2014-08-01  0:51                                               ` Rafael J. Wysocki
2014-08-01 14:41                                               ` Alan Stern
2014-08-01 14:41                                                 ` Alan Stern
2014-07-31 22:16                                         ` Thomas Gleixner
2014-08-01  0:08                                           ` Rafael J. Wysocki
2014-08-01  1:24                                             ` Rafael J. Wysocki
2014-08-01  9:40                                             ` [PATCH 1/3] irq / PM: New driver interface for wakeup interruptsn Thomas Gleixner
2014-08-01 13:45                                               ` Rafael J. Wysocki
2014-08-01 13:43                                                 ` Thomas Gleixner
2014-08-01 14:29                                                   ` Rafael J. Wysocki
2014-08-02  1:31                                                     ` Rafael J. Wysocki
2014-08-03 13:42                                                       ` Rafael J. Wysocki
2014-08-04  3:38                                                         ` Rafael J. Wysocki
2014-08-05 15:22                                                     ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Rafael J. Wysocki
2014-08-05 15:24                                                       ` [PATCH 1/5] PM / sleep: Mechanism for aborting system suspends unconditionally Rafael J. Wysocki
2014-08-05 23:29                                                         ` [Update][PATCH " Rafael J. Wysocki
2014-08-05 15:25                                                       ` [PATCH 2/5] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts Rafael J. Wysocki
2014-08-05 15:26                                                       ` [PATCH 3/5] irq / PM: Make wakeup interrupts wake up from suspend-to-idle Rafael J. Wysocki
2014-08-08  1:58                                                         ` [Update][PATCH " Rafael J. Wysocki
2014-08-09  0:28                                                           ` Rafael J. Wysocki
2014-08-05 15:27                                                       ` [PATCH 4/5] x86 / PM: Set IRQCHIP_SKIP_SET_WAKE for IOAPIC IRQ chip objects Rafael J. Wysocki
2014-08-05 15:28                                                       ` [PATCH 5/5] PCI / PM: Make PCIe PME interrupts wake up from suspend-to-idle Rafael J. Wysocki
2014-08-05 16:12                                                       ` [PATCH 0/5] irq / PM: Shared IRQs vs IRQF_NO_SUSPEND and suspend-to-idle wakeup Peter Zijlstra
2014-08-08  2:09                                                       ` Rafael J. Wysocki
2014-07-31 22:54                                         ` [PATCH 1/3] irq / PM: New driver interface for wakeup interrupts Thomas Gleixner
2014-07-30 21:51                             ` [PATCH 2/3] PCI / PM: Make PCIe PME interrupts wake up from "freeze" sleep state Rafael J. Wysocki
2014-07-30 21:52                             ` [PATCH 3/3] gpio-keys / PM: use enable/disable_device_irq_wake() Rafael J. Wysocki
2014-07-28 21:27                 ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
2014-07-27 15:53             ` Rafael J. Wysocki
2014-07-27 22:00               ` [PATCH, v2] Rafael J. Wysocki
2014-07-28 12:11                 ` Thomas Gleixner
2014-07-28 21:17                   ` [PATCH, v3] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts (was: Re: [PATCH, v2]) Rafael J. Wysocki
2014-07-29  7:28                     ` [PATCH, v4] irq / PM: Fix IRQF_NO_SUSPEND problem with shared interrupts Rafael J. Wysocki
2014-07-29 13:46                       ` [PATCH, v5] " Rafael J. Wysocki
2014-07-30  0:54                         ` [PATCH, v6] " Rafael J. Wysocki
2014-07-25 12:47   ` [RFC][PATCH] irq: Rework IRQF_NO_SUSPENDED Rafael J. Wysocki
2014-07-25 13:22     ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.