linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Venkatesh Pallipadi <venki@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
	Suresh Siddha <suresh.b.siddha@intel.com>,
	Aaron Durbin <adurbin@google.com>, Paul Turner <pjt@google.com>,
	Yong Zhang <yong.zhang0@gmail.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] Extend mwait idle to optimize away CAL and RES interrupts to an idle CPU -v1
Date: Thu, 23 Feb 2012 10:30:54 +0100	[thread overview]
Message-ID: <1329989454.24994.57.camel@twins> (raw)
In-Reply-To: <1329957415-15239-1-git-send-email-venki@google.com>

On Wed, 2012-02-22 at 16:36 -0800, Venkatesh Pallipadi wrote:

> Changes since previous versions:

>   Moved the changes into arch specific code as per PeterZ suggestion

You failed:

>  include/linux/sched.h               |    2 +
>  kernel/sched/core.c                 |   13 ++++++



> diff --git a/arch/x86/include/asm/ipiless_poke.h b/arch/x86/include/asm/ipiless_poke.h
> new file mode 100644
> index 0000000..58670c7
> --- /dev/null
> +++ b/arch/x86/include/asm/ipiless_poke.h
> @@ -0,0 +1,82 @@
> +#ifndef _ASM_X86_IPILESS_POKE_H
> +#define _ASM_X86_IPILESS_POKE_H
> +
> +#include <linux/sched.h>
> +#include <asm/thread_info.h>
> +
> +#ifdef CONFIG_SMP
> +
> +DECLARE_PER_CPU(atomic_t *, idle_task_ti_flags);
> +
> +/*
> + * Use 2 bits in idle_task's thead info flags:
> + * TIF_IPILESS_IDLE marks enter to and exit from idle states with ipiless
> + * wakeup capability.
> + * TIF_IPI_PENDING set by IPI source CPU if it finds that the IPI target CPU
> + * is in TIF_IPILESS_IDLE state (and TIF_IPI_PENDING is not already set).
> + * Setting of TIF_IPI_PENDING bit brings the target CPU out of idle state.
> + */
> +
> +static inline void ipiless_idle_enter(void)
> +{
> +	set_thread_flag(TIF_IPILESS_IDLE);
> +}
> +
> +static inline void ipiless_idle_exit(void)
> +{
> +	clear_thread_flag(TIF_IPILESS_IDLE);
> +}
> +
> +static inline int is_ipi_pending(void)
> +{
> +	return unlikely(test_thread_flag(TIF_IPI_PENDING));
> +}
> +
> +static inline int need_wakeup(void)
> +{
> +	return need_resched() || is_ipi_pending();
> +}
> +
> +static inline void ipiless_pending_work(void)
> +{
> +	if (is_ipi_pending()) {
> +		clear_thread_flag(TIF_IPI_PENDING);
> +		local_bh_disable();
> +		local_irq_disable();

That local_bh_disable() is completely superfluous, disabling IRQs
already kills bh.

> +		generic_smp_call_function_single_interrupt();
> +		__scheduler_ipi();

Why not scheduler_ipi()?

Also, you could keep a pending vector bitmask in a per-cpu variable to
avoid having to call all handlers. not sure that's worth it for just
two, but something to keep in mind for if/when this starts expanding.

> +		local_irq_enable();
> +		local_bh_enable();
> +	}
> +}

Why doesn't ipiless_idle_exit() call this? That would keep it isolated
to just those idle routines that actually use mwait instead of bothering
the generic idle paths with this.

> +static inline int ipiless_magic_poke(int cpu)
> +{
> +	int val;
> +	atomic_t *idle_flag = per_cpu(idle_task_ti_flags, cpu);

What's this atomic_t nonsense about? thread_info::flags is __u32,
casting it to atomic_t is complete rubbish.

> +
> +	val = atomic_read(idle_flag);

The __u32 version would look like: val = ACCESS_ONCE(*idle_flag);

> +	if (unlikely(val & _TIF_IPI_PENDING))
> +		return 1;
> +
> +	if (!(val & _TIF_IPILESS_IDLE))
> +		return 0;
> +
> +	if (val == atomic_cmpxchg(idle_flag, val, val | _TIF_IPI_PENDING))

The __u32 version would look like:

  if (val == cmpxchg(idle_flag, val, val | _TIF_IPI_PENDING))

Bonus win for being shorter!

> +		return 1;
> +
> +	return 0;
> +}
> +
> +#else
> +static inline void ipiless_pending_work(void) { }
> +static inline void ipiless_idle_enter(void) { }
> +static inline void ipiless_idle_exit(void) { }
> +
> +static inline int need_wakeup(void)
> +{
> +	return need_resched();
> +}
> +#endif
> +
> +#endif /* _ASM_X86_IPILESS_POKE_H */


> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> index a8ff227..e66a4c8 100644
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -27,6 +27,7 @@
>  #include <asm/mtrr.h>
>  #include <asm/tlbflush.h>
>  #include <asm/mmu_context.h>
> +#include <asm/ipiless_poke.h>
>  #include <asm/proto.h>
>  #include <asm/apic.h>
>  #include <asm/nmi.h>
> @@ -109,6 +110,14 @@
>   *	about nothing of note with C stepping upwards.
>   */
>  
> +DEFINE_PER_CPU(atomic_t *, idle_task_ti_flags);
> +
> +void __cpuinit thread_idle_state_setup(struct task_struct *idle, int cpu)
> +{
> +	per_cpu(idle_task_ti_flags, cpu) =
> +				(atomic_t *)(&(task_thread_info(idle)->flags));
> +}

As Ingo already pointed out, its the architecture that actually sets up
the idle threads, so putting callbacks into the generic code to find
them is kinda backwards.

Again, *yuck* at that atomic_t business.


> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 5255c9d..1558316 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1451,6 +1451,14 @@ static void sched_ttwu_pending(void)
>  	raw_spin_unlock(&rq->lock);
>  }
>  
> +void __scheduler_ipi(void)
> +{
> +	if (llist_empty(&this_rq()->wake_list))
> +		return;
> +
> +	sched_ttwu_pending();
> +}

FAIL!! It should be 100% identical to a normal IPI, that calls
scheduler_ipi() so this should too.

>  void scheduler_ipi(void)
>  {
>  	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
> @@ -4827,6 +4835,10 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle)
>  	idle->sched_class = &idle_sched_class;
>  }
>  
> +void __cpuinit __weak thread_idle_state_setup(struct task_struct *idle, int cpu)
> +{
> +}
> +
>  /**
>   * init_idle - set up an idle thread for a given CPU
>   * @idle: task in question
> @@ -4869,6 +4881,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
>  
>  	/* Set the preempt count _outside_ the spinlocks! */
>  	task_thread_info(idle)->preempt_count = 0;
> +	thread_idle_state_setup(idle, cpu);

I suggest you put this in smpboot.c someplace ;-)

  parent reply	other threads:[~2012-02-23  9:31 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-23  0:36 [PATCH] Extend mwait idle to optimize away CAL and RES interrupts to an idle CPU -v1 Venkatesh Pallipadi
2012-02-23  7:50 ` Ingo Molnar
2012-02-23  9:08   ` Peter Zijlstra
2012-02-23 20:04     ` Venki Pallipadi
2012-02-23 20:03   ` Venki Pallipadi
2012-03-02  0:33   ` Venki Pallipadi
2012-03-02  1:28     ` Suresh Siddha
2012-03-02  1:35       ` Venki Pallipadi
2012-03-02  1:37         ` Suresh Siddha
2012-03-02  2:00           ` Venki Pallipadi
2012-03-02  7:21             ` Ingo Molnar
2012-03-02 17:41               ` Suresh Siddha
2012-03-06 21:41                 ` fork_idle from wq cleanup Venkatesh Pallipadi
2012-03-06 21:41                   ` [PATCH 1/5] x86: Move fork_idle from wq and idle caching to common code Venkatesh Pallipadi
2012-03-06 21:41                   ` [PATCH 2/5] ia64: Use common fork_idle_from_wq in smpboot Venkatesh Pallipadi
2012-03-06 21:41                   ` [PATCH 3/5] mips: " Venkatesh Pallipadi
2012-03-06 22:51                     ` Ralf Baechle
2012-03-06 21:41                   ` [PATCH 4/5] powerpc: " Venkatesh Pallipadi
2012-03-06 21:41                   ` [PATCH 5/5] s390: " Venkatesh Pallipadi
2012-03-07  7:00                     ` Heiko Carstens
2012-03-07  6:06                   ` fork_idle from wq cleanup Suresh Siddha
2012-02-23  9:30 ` Peter Zijlstra [this message]
2012-02-23 19:34   ` [PATCH] Extend mwait idle to optimize away CAL and RES interrupts to an idle CPU -v1 Venki Pallipadi
2012-02-24  5:41     ` Yong Zhang
2012-02-24  6:13       ` Yong Zhang
2012-02-27  8:38         ` Peter Zijlstra
2012-02-27  9:08           ` Yong Zhang
2012-02-27  9:30             ` Peter Zijlstra
2012-02-27  9:51               ` Yong Zhang
2012-02-26  1:32       ` Paul E. McKenney
2012-02-27  9:06         ` Yong Zhang
2012-02-27 17:05           ` Paul E. McKenney
2012-02-28  7:12             ` Yong Zhang
2012-02-28 13:05               ` Paul E. McKenney
2012-02-29  6:36                 ` Yong Zhang
2012-02-27  8:45     ` Peter Zijlstra
2012-02-27 18:17       ` Venki Pallipadi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1329989454.24994.57.camel@twins \
    --to=peterz@infradead.org \
    --cc=adurbin@google.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pjt@google.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=venki@google.com \
    --cc=yong.zhang0@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).