Re: Remove __napi_schedule_irqoff?

From: Heiner Kallweit <hkallweit1@gmail.com>
To: Thomas Gleixner <tglx@linutronix.de>, Jakub Kicinski <kuba@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	David Miller <davem@davemloft.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	linux-kernel@vger.kernel.org
Subject: Re: Remove __napi_schedule_irqoff?
Date: Sun, 18 Oct 2020 13:57:19 +0200	[thread overview]
Message-ID: <3360b93e-4097-de43-0c4d-edda85d2ac72@gmail.com> (raw)
In-Reply-To: <878sc3j1tb.fsf@nanos.tec.linutronix.de>

On 18.10.2020 11:55, Thomas Gleixner wrote:
> Jakub,
> 
> On Sat, Oct 17 2020 at 16:29, Jakub Kicinski wrote:
>> On Sat, 17 Oct 2020 15:45:57 +0200 Heiner Kallweit wrote:
>>> It turned out that this most of the time isn't safe in certain
>>> configurations:
>>> - if CONFIG_PREEMPT_RT is set
>>> - if command line parameter threadirqs is set
>>>
>>> Having said that drivers are being switched back to __napi_schedule(),
>>> see e.g. patch in [0] and related discussion. I thought about a
>>> __napi_schedule version checking dynamically whether interrupts are
>>> disabled. But checking e.g. variable force_irqthreads also comes at
>>> a cost, so that we may not see a benefit compared to calling
>>> local_irq_save/local_irq_restore.
> 
> This does not have to be a variable check. It's trivial enough to make
> it a static key.
> 
Pretty cool. I have to admit that I wasn't aware of the jump label
mechanism.

>>> If more or less all users have to switch back, then the question is
>>> whether we should remove __napi_schedule_irqoff.
>>> Instead of touching all users we could make  __napi_schedule_irqoff
>>> an alias for __napi_schedule for now.
>>>
>>> [0] https://lkml.org/lkml/2020/10/8/706
>>
>> We're effectively calling raise_softirq_irqoff() from IRQ handlers,
>> with force_irqthreads == true that's no longer legal.
> 
> Hrmpf, indeed. When force threading was introduced that did not exist.
> 
> The forced threaded handler is always invoked with bottom halfs disabled
> and bottom half processing either happens when the handler returns and
> the thread wrapper invokes local_bh_enable() or from ksoftirq. As
> everything runs in thread context CPU local serialization through
> local_bh_disable() is sufficient.
> 
>> Thomas - is the expectation that IRQ handlers never assume they have 
>> IRQs disabled going forward? We don't have any performance numbers 
>> but if I'm reading Agner's tables right POPF is 18 cycles on Broadwell.
>> Is PUSHF/POPF too cheap to bother?
> 
> It's not only PUSHF/POPF it's PUSHF,CLI -> POPF, but yeah it's pretty
> cheap nowadays. But doing the static key change might still be a good
> thing. Completely untested patch below.
> 
> Quoting Eric:
> 
>> I have to say I do not understand why we want to defer to a thread the
>> hard IRQ that we use in NAPI model.
>>
>> Whole point of NAPI was to keep hard irq handler very short.
> 
> Right. In case the interrupt handler is doing not much more than
> scheduling NAPI then you can request it with IRQF_NO_THREAD, which will
> prevent it from being threaded even on RT.
> 
>> We should focus on transferring the NAPI work (potentially disrupting
>> ) to a thread context, instead of the very minor hard irq trigger.
> 
> Read about that. I only looked briefly at the patches and wondered why
> this has it's own threading mechanism and is not using the irq thread
> mechanics. I'll have a closer look in the next days.
> 
> Thanks,
> 
>         tglx
> ---
> --- a/drivers/ide/ide-iops.c
> +++ b/drivers/ide/ide-iops.c
> @@ -109,7 +109,6 @@ int __ide_wait_stat(ide_drive_t *drive,
>  	ide_hwif_t *hwif = drive->hwif;
>  	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
>  	unsigned long flags;
> -	bool irqs_threaded = force_irqthreads;
>  	int i;
>  	u8 stat;
>  
> @@ -117,7 +116,7 @@ int __ide_wait_stat(ide_drive_t *drive,
>  	stat = tp_ops->read_status(hwif);
>  
>  	if (stat & ATA_BUSY) {
> -		if (!irqs_threaded) {
> +		if (!force_irqthreads_active()) {
>  			local_save_flags(flags);
>  			local_irq_enable_in_hardirq();
>  		}
> @@ -133,13 +132,13 @@ int __ide_wait_stat(ide_drive_t *drive,
>  				if ((stat & ATA_BUSY) == 0)
>  					break;
>  
> -				if (!irqs_threaded)
> +				if (!force_irqthreads_active())
>  					local_irq_restore(flags);
>  				*rstat = stat;
>  				return -EBUSY;
>  			}
>  		}
> -		if (!irqs_threaded)
> +		if (!force_irqthreads_active())
>  			local_irq_restore(flags);
>  	}
>  	/*
> --- a/drivers/ide/ide-taskfile.c
> +++ b/drivers/ide/ide-taskfile.c
> @@ -406,7 +406,8 @@ static ide_startstop_t pre_task_out_intr
>  		return startstop;
>  	}
>  
> -	if (!force_irqthreads && (drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
> +	if (!force_irqthreads_active() &&
> +	    (drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
>  		local_irq_disable();
>  
>  	ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -489,12 +489,16 @@ extern int irq_set_irqchip_state(unsigne
>  
>  #ifdef CONFIG_IRQ_FORCED_THREADING
>  # ifdef CONFIG_PREEMPT_RT
> -#  define force_irqthreads	(true)
> +static inline bool force_irqthreads_active(void) { return true; }
>  # else
> -extern bool force_irqthreads;
> +extern struct static_key_false force_irqthreads_key;
> +static inline bool force_irqthreads_active(void)
> +{
> +	return static_branch_unlikely(&force_irqthreads_key);
> +}
>  # endif
>  #else
> -#define force_irqthreads	(0)
> +static inline bool force_irqthreads_active(void) { return false; }
>  #endif
>  
>  #ifndef local_softirq_pending
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -25,12 +25,14 @@
>  #include "internals.h"
>  
>  #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
> -__read_mostly bool force_irqthreads;
> -EXPORT_SYMBOL_GPL(force_irqthreads);
> +DEFINE_STATIC_KEY_FALSE(force_irqthreads_key);
> +#ifdef CONFIG_IDE
> +EXPORT_SYMBOL_GPL(force_irqthreads_key);
> +#endif
>  
>  static int __init setup_forced_irqthreads(char *arg)
>  {
> -	force_irqthreads = true;
> +	static_branch_enable(&force_irqthreads_key);
>  	return 0;
>  }
>  early_param("threadirqs", setup_forced_irqthreads);
> @@ -1155,8 +1157,8 @@ static int irq_thread(void *data)
>  	irqreturn_t (*handler_fn)(struct irq_desc *desc,
>  			struct irqaction *action);
>  
> -	if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
> -					&action->thread_flags))
> +	if (force_irqthreads_active() && test_bit(IRQTF_FORCED_THREAD,
> +						  &action->thread_flags))
>  		handler_fn = irq_forced_thread_fn;
>  	else
>  		handler_fn = irq_thread_fn;
> @@ -1217,7 +1219,7 @@ EXPORT_SYMBOL_GPL(irq_wake_thread);
>  
>  static int irq_setup_forced_threading(struct irqaction *new)
>  {
> -	if (!force_irqthreads)
> +	if (!force_irqthreads_active())
>  		return 0;
>  	if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
>  		return 0;
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -376,7 +376,7 @@ static inline void invoke_softirq(void)
>  	if (ksoftirqd_running(local_softirq_pending()))
>  		return;
>  
> -	if (!force_irqthreads) {
> +	if (!force_irqthreads_active()) {
>  #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
>  		/*
>  		 * We can safely execute softirq on the current stack if
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6282,9 +6282,11 @@ void __napi_schedule(struct napi_struct
>  {
>  	unsigned long flags;
>  
> -	local_irq_save(flags);
> +	if (force_irqthreads_active())
> +		local_irq_save(flags);
>  	____napi_schedule(this_cpu_ptr(&softnet_data), n);
> -	local_irq_restore(flags);
> +	if (force_irqthreads_active())
> +		local_irq_restore(flags);

Question would be whether we want to modify __napi_schedule() or
__napi_schedule_irqoff(). This may depend on whether we have calls to
__napi_schedule() that require local_irq_save() even if
force_irqthreads_active() is false. Not sure about that. At a first
glance it should be better to modify __napi_schedule_irqoff().
Only drawback I see is that the name of the function then is a little
bit inconsistent.

>  }
>  EXPORT_SYMBOL(__napi_schedule);
>  
>