linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-08 12:48 Dmitry Antipov
  2012-02-08 13:18 ` Russell King - ARM Linux
  2012-02-09  2:48 ` Yong Zhang
  0 siblings, 2 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 12:48 UTC (permalink / raw)
  To: Rusty Russell, Ingo Molnar
  Cc: Venki Pallipadi, linux-kernel, x86, linux-arm-kernel, linaro-dev,
	patches, Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, "tsc=noirqtime"
is preserved, but issues a warning.

Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 arch/arm/kernel/sched_clock.c |    3 +++
 arch/x86/Kconfig              |   11 -----------
 arch/x86/kernel/tsc.c         |    7 ++++---
 include/linux/sched.h         |    2 ++
 lib/Kconfig.debug             |   12 ++++++++++++
 lib/Makefile                  |    2 ++
 lib/irqtime.c                 |   12 ++++++++++++
 7 files changed, 35 insertions(+), 14 deletions(-)
 create mode 100644 lib/irqtime.c

diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..56d2a9d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
 	if (read_sched_clock == jiffy_sched_clock_read)
 		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
 
+	if (!no_sched_irq_time)
+		enable_sched_clock_irqtime();
+
 	sched_clock_poll(sched_clock_timer.data);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..70510a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
 		no_sched_irq_time = 1;
+	}
 	return 1;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
  * The reason for this explicit opt-in is not to have perf penalty with
  * slow sched_clocks.
  */
+extern int no_sched_irq_time;
 extern void enable_sched_clock_irqtime(void);
 extern void disable_sched_clock_irqtime(void);
 #else
+#define no_sched_irq_time 1
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..48be210 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
 ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
   lib-y += dec_and_lock.o
 endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+	no_sched_irq_time = 1;
+	return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-08 12:48 [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM Dmitry Antipov
@ 2012-02-08 13:18 ` Russell King - ARM Linux
  2012-02-08 15:15   ` Dmitry Antipov
  2012-02-09  2:51   ` Yong Zhang
  2012-02-09  2:48 ` Yong Zhang
  1 sibling, 2 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2012-02-08 13:18 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linaro-dev, patches,
	x86, linux-kernel, linux-arm-kernel

On Wed, Feb 08, 2012 at 04:48:34AM -0800, Dmitry Antipov wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, "tsc=noirqtime"
> is preserved, but issues a warning.
> 
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
>  arch/arm/kernel/sched_clock.c |    3 +++
>  arch/x86/Kconfig              |   11 -----------
>  arch/x86/kernel/tsc.c         |    7 ++++---
>  include/linux/sched.h         |    2 ++
>  lib/Kconfig.debug             |   12 ++++++++++++
>  lib/Makefile                  |    2 ++
>  lib/irqtime.c                 |   12 ++++++++++++
>  7 files changed, 35 insertions(+), 14 deletions(-)
>  create mode 100644 lib/irqtime.c
> 
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..56d2a9d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
>  	if (read_sched_clock == jiffy_sched_clock_read)
>  		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>  
> +	if (!no_sched_irq_time)
> +		enable_sched_clock_irqtime();

Why are you placing this here?  sched_clock is available from the point
that it's registered, which should be before the first sched_clock()
call.

> +config IRQ_TIME_ACCOUNTING
> +	bool "Fine granularity task level IRQ time accounting"
> +	depends on (X86 || (ARM && HAVE_SCHED_CLOCK))

Even though it's not bad here, please get out of the habbit of throwing
unnecessary parens into the mix.  It can make stuff more difficult to
read and therefore confirm correctness.  (I've spent many a time
rewriting if() statements because of paren overuse.)

This could have been written:

	depends on X86 || (ARM && HAVE_SCHED_CLOCK)

However, ARM will always have HAVE_SCHED_CLOCK after the next merge window,
so this can become a much simpler:

	depends on X86 || ARM

Apart from these two points, the rest of the patch looks fine to me but
the ultimate decision about its acceptability is up to other people.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-08 13:18 ` Russell King - ARM Linux
@ 2012-02-08 15:15   ` Dmitry Antipov
  2012-02-08 15:24     ` Russell King - ARM Linux
  2012-02-09  2:51   ` Yong Zhang
  1 sibling, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 15:15 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linaro-dev,
	linux-kernel, linux-arm-kernel

On 02/08/2012 05:18 AM, Russell King - ARM Linux wrote:

>> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
>> index 5416c7c..56d2a9d 100644
>> --- a/arch/arm/kernel/sched_clock.c
>> +++ b/arch/arm/kernel/sched_clock.c
>> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
>>   	if (read_sched_clock == jiffy_sched_clock_read)
>>   		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>>
>> +	if (!no_sched_irq_time)
>> +		enable_sched_clock_irqtime();
>
> Why are you placing this here?  sched_clock is available from the point
> that it's registered, which should be before the first sched_clock()
> call.

This is just because I'm thinking about:

if (read_sched_clock == jiffy_sched_clock_read)
         setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
else if (!no_sched_irq_time)
	enable_sched_clock_irqtime();

I suppose that "fine granularity task irq time accounting"
makes no sense if sched_clock() granularity is poor.

> This could have been written:
>
> 	depends on X86 || (ARM&&  HAVE_SCHED_CLOCK)
>
> However, ARM will always have HAVE_SCHED_CLOCK after the next merge window,
> so this can become a much simpler:
>
> 	depends on X86 || ARM

OK.

Dmitry


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-08 15:15   ` Dmitry Antipov
@ 2012-02-08 15:24     ` Russell King - ARM Linux
  0 siblings, 0 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2012-02-08 15:24 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linaro-dev,
	linux-kernel, linux-arm-kernel

On Wed, Feb 08, 2012 at 07:15:26AM -0800, Dmitry Antipov wrote:
> On 02/08/2012 05:18 AM, Russell King - ARM Linux wrote:
>> Why are you placing this here?  sched_clock is available from the point
>> that it's registered, which should be before the first sched_clock()
>> call.
>
> This is just because I'm thinking about:
>
> if (read_sched_clock == jiffy_sched_clock_read)
>         setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
> else if (!no_sched_irq_time)
> 	enable_sched_clock_irqtime();
>
> I suppose that "fine granularity task irq time accounting"
> makes no sense if sched_clock() granularity is poor.

Let me put it a different way - is there a reason not to do this in
setup_sched_clock() so that it becomes available as soon as sched_clock()
has been initialized by a platform?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-08 12:48 [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM Dmitry Antipov
  2012-02-08 13:18 ` Russell King - ARM Linux
@ 2012-02-09  2:48 ` Yong Zhang
  1 sibling, 0 replies; 19+ messages in thread
From: Yong Zhang @ 2012-02-09  2:48 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linux-kernel, x86,
	linux-arm-kernel, linaro-dev, patches, Peter Zijlstra

Cc'ing PeterZ.

On Wed, Feb 08, 2012 at 04:48:34AM -0800, Dmitry Antipov wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, "tsc=noirqtime"
> is preserved, but issues a warning.
> 
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
>  lib/Kconfig.debug             |   12 ++++++++++++
>  lib/Makefile                  |    2 ++
>  lib/irqtime.c                 |   12 ++++++++++++

Do we need a single file for this?
You know this feature is sched related, why not just move it
to kernel/sched/core.c?

Thanks,
Yong

>  7 files changed, 35 insertions(+), 14 deletions(-)
>  create mode 100644 lib/irqtime.c
> 
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..56d2a9d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
>  	if (read_sched_clock == jiffy_sched_clock_read)
>  		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>  
> +	if (!no_sched_irq_time)
> +		enable_sched_clock_irqtime();
> +
>  	sched_clock_poll(sched_clock_timer.data);
>  }
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
>  	  making when dealing with multi-core CPU chips at a cost of slightly
>  	  increased overhead in some places. If unsure say N here.
>  
> -config IRQ_TIME_ACCOUNTING
> -	bool "Fine granularity task level IRQ time accounting"
> -	default n
> -	---help---
> -	  Select this option to enable fine granularity task irq time
> -	  accounting. This is done by reading a timestamp on each
> -	  transitions between softirq and hardirq state, so there can be a
> -	  small performance impact.
> -
> -	  If in doubt, say N here.
> -
>  source "kernel/Kconfig.preempt"
>  
>  config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..70510a3 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
>  
>  __setup("notsc", notsc_setup);
>  
> -static int no_sched_irq_time;
> -
>  static int __init tsc_setup(char *str)
>  {
>  	if (!strcmp(str, "reliable"))
>  		tsc_clocksource_reliable = 1;
> -	if (!strncmp(str, "noirqtime", 9))
> +	if (!strncmp(str, "noirqtime", 9)) {
> +		printk(KERN_WARNING "tsc: tsc=noirqtime is "
> +		       "obsolete, use noirqtime instead\n");
>  		no_sched_irq_time = 1;
> +	}
>  	return 1;
>  }
>  
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..b3575b5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>   * The reason for this explicit opt-in is not to have perf penalty with
>   * slow sched_clocks.
>   */
> +extern int no_sched_irq_time;
>  extern void enable_sched_clock_irqtime(void);
>  extern void disable_sched_clock_irqtime(void);
>  #else
> +#define no_sched_irq_time 1
>  static inline void enable_sched_clock_irqtime(void) {}
>  static inline void disable_sched_clock_irqtime(void) {}
>  #endif
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..48be210 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
>  	  application, you can say N to avoid the very slight overhead
>  	  this adds.
>  
> +config IRQ_TIME_ACCOUNTING
> +	bool "Fine granularity task level IRQ time accounting"
> +	depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
> +	default n
> +	---help---
> +	  Select this option to enable fine granularity task irq time
> +	  accounting. This is done by reading a timestamp on each
> +	  transitions between softirq and hardirq state, so there can be a
> +	  small performance impact.
> +
> +	  If in doubt, say N here.
> +
>  config TIMER_STATS
>  	bool "Collect kernel timers statistics"
>  	depends on DEBUG_KERNEL && PROC_FS
> diff --git a/lib/Makefile b/lib/Makefile
> index 18515f0..44d67d4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
>  obj-$(CONFIG_DEBUG_LIST) += list_debug.o
>  obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
>  
> +obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
> +
>  ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
>    lib-y += dec_and_lock.o
>  endif
> diff --git a/lib/irqtime.c b/lib/irqtime.c
> new file mode 100644
> index 0000000..10d440d
> --- /dev/null
> +++ b/lib/irqtime.c
> @@ -0,0 +1,12 @@
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +
> +int no_sched_irq_time;
> +
> +static int __init irqtime_setup(char *str)
> +{
> +	no_sched_irq_time = 1;
> +	return 1;
> +}
> +
> +__setup("noirqtime", irqtime_setup);
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Only stand for myself

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-08 13:18 ` Russell King - ARM Linux
  2012-02-08 15:15   ` Dmitry Antipov
@ 2012-02-09  2:51   ` Yong Zhang
  1 sibling, 0 replies; 19+ messages in thread
From: Yong Zhang @ 2012-02-09  2:51 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Dmitry Antipov, Rusty Russell, Ingo Molnar, Venki Pallipadi,
	linaro-dev, patches, x86, linux-kernel, linux-arm-kernel

On Wed, Feb 08, 2012 at 01:18:33PM +0000, Russell King - ARM Linux wrote:
> On Wed, Feb 08, 2012 at 04:48:34AM -0800, Dmitry Antipov wrote:
> > Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> > ARM, move "noirqtime=" option to common debugging code.
> > For a bit of backward compatibility, "tsc=noirqtime"
> > is preserved, but issues a warning.
> > 
> > Suggested-by: Venki Pallipadi <venki@google.com>
> > Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> > ---
> >  arch/arm/kernel/sched_clock.c |    3 +++
> >  arch/x86/Kconfig              |   11 -----------
> >  arch/x86/kernel/tsc.c         |    7 ++++---
> >  include/linux/sched.h         |    2 ++
> >  lib/Kconfig.debug             |   12 ++++++++++++
> >  lib/Makefile                  |    2 ++
> >  lib/irqtime.c                 |   12 ++++++++++++
> >  7 files changed, 35 insertions(+), 14 deletions(-)
> >  create mode 100644 lib/irqtime.c
> > 
> > diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> > index 5416c7c..56d2a9d 100644
> > --- a/arch/arm/kernel/sched_clock.c
> > +++ b/arch/arm/kernel/sched_clock.c
> > @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
> >  	if (read_sched_clock == jiffy_sched_clock_read)
> >  		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
> >  
> > +	if (!no_sched_irq_time)
> > +		enable_sched_clock_irqtime();
> 
> Why are you placing this here?  sched_clock is available from the point
> that it's registered, which should be before the first sched_clock()
> call.
> 
> > +config IRQ_TIME_ACCOUNTING
> > +	bool "Fine granularity task level IRQ time accounting"
> > +	depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
> 
> Even though it's not bad here, please get out of the habbit of throwing
> unnecessary parens into the mix.  It can make stuff more difficult to
> read and therefore confirm correctness.  (I've spent many a time
> rewriting if() statements because of paren overuse.)
> 
> This could have been written:
> 
> 	depends on X86 || (ARM && HAVE_SCHED_CLOCK)
> 
> However, ARM will always have HAVE_SCHED_CLOCK after the next merge window,
> so this can become a much simpler:
> 
> 	depends on X86 || ARM

Maybe we can hand the depend-things to every ARCH, say let ARCH provides
HAVE_IRQ_TIME_ACCOUNTING. Thus we can make IRQ_TIME_ACCOUNTING
denpend on HAVE_IRQ_TIME_ACCOUNTING.

Thanks,
Yong

> 
> Apart from these two points, the rest of the patch looks fine to me but
> the ultimate decision about its acceptability is up to other people.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Only stand for myself

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-28  6:29 Dmitry Antipov
  0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-28  6:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Russell King, Rusty Russell, Ingo Molnar,
	Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
	patches, Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.

Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 Documentation/kernel-parameters.txt |    9 +++++----
 arch/arm/kernel/sched_clock.c       |    2 ++
 arch/x86/Kconfig                    |   11 -----------
 arch/x86/kernel/tsc.c               |   12 ++++++------
 include/linux/sched.h               |    6 +-----
 kernel/sched/core.c                 |   24 +++++++++++++++++++-----
 lib/Kconfig.debug                   |   12 ++++++++++++
 7 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..a5da255 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	noirqdebug	[X86-32] Disables the code which attempts to detect and
 			disable unhandled interrupt sources.
 
+	noirqtime	[X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
+			eliminates the timestamping on irq/softirq entry/exit.
+
 	no_timer_check	[X86,APIC] Disables the code which tests for
 			broken timer IRQ sources.
 
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			as the stability checks done at bootup.	Used to enable
 			high-resolution timer mode on older hardware, and in
 			virtualized environment.
-			[x86] noirqtime: Do not use TSC to do irq accounting.
-			Used to run time disable IRQ_TIME_ACCOUNTING on any
-			platforms where RDTSC is slow and this accounting
-			can add overhead.
+			[x86] noirqtime: obsoleted by "noirqtime" generic option,
+			see it's documentation for details.
 
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	 */
 	cd.epoch_ns = 0;
 
+	enable_sched_clock_irqtime();
+
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
-		no_sched_irq_time = 1;
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
+		disable_sched_clock_irqtime();
+	}
 	return 1;
 }
 
@@ -978,8 +979,7 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 	tsc_disabled = 0;
 
-	if (!no_sched_irq_time)
-		enable_sched_clock_irqtime();
+	enable_sched_clock_irqtime();
 
 	lpj = ((u64)tsc_khz * 1000);
 	do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..ea4019c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,11 +1961,7 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
+extern int sched_clock_irqtime;
 extern void enable_sched_clock_irqtime(void);
 extern void disable_sched_clock_irqtime(void);
 #else
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b342f57..4693509 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,11 +757,17 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
 static DEFINE_PER_CPU(u64, cpu_softirq_time);
 
 static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
+
+/*
+ * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+__read_mostly int sched_clock_irqtime = -1;
 
 void enable_sched_clock_irqtime(void)
 {
-	sched_clock_irqtime = 1;
+	if (sched_clock_irqtime == -1)
+		sched_clock_irqtime = 1;
 }
 
 void disable_sched_clock_irqtime(void)
@@ -769,6 +775,14 @@ void disable_sched_clock_irqtime(void)
 	sched_clock_irqtime = 0;
 }
 
+static int __init irqtime_setup(char *str)
+{
+	sched_clock_irqtime = 0;
+	return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
+
 #ifndef CONFIG_64BIT
 static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
 
@@ -822,7 +836,7 @@ void account_system_vtime(struct task_struct *curr)
 	s64 delta;
 	int cpu;
 
-	if (!sched_clock_irqtime)
+	if (sched_clock_irqtime < 1)
 		return;
 
 	local_irq_save(flags);
@@ -2852,7 +2866,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
 	}
@@ -2886,7 +2900,7 @@ void account_steal_ticks(unsigned long ticks)
 void account_idle_ticks(unsigned long ticks)
 {
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_idle_ticks(ticks);
 		return;
 	}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on X86 || ARM
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-27 10:12 ` Peter Zijlstra
@ 2012-02-28  6:19   ` Dmitry Antipov
  0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-28  6:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
	Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
	patches

On 02/27/2012 02:12 PM, Peter Zijlstra wrote:

>> -extern void enable_sched_clock_irqtime(void);
>> -extern void disable_sched_clock_irqtime(void);
>> +extern int sched_clock_irqtime;
>> +static inline void enable_sched_clock_irqtime(void)
>> +{
>> +	if (sched_clock_irqtime == -1)
>> +		sched_clock_irqtime = 1;
>> +}
>> +static inline void disable_sched_clock_irqtime(void)
>> +{
>> +	sched_clock_irqtime = 0;
>> +}
>>   #else
>>   static inline void enable_sched_clock_irqtime(void) {}
>>   static inline void disable_sched_clock_irqtime(void) {}
>
> Please keep them out-of-line, its not a fast path and it avoids having
> to expose the state variable.

OK

>> +/*
>> + * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
>> + * or after unstable clock was detected, 1 if enabled and active.
>> + */
>
> You forgot to explain what you need the tri-state for.
>
>> +__read_mostly int sched_clock_irqtime = -1;

The comment above should be a sufficient explanation, isn't it?

It's a tri-state just because it "merges" two variables: internal state (enabled/disabled)
and the value passed by "noirqtime" option (turn it on, default/turn it off). It can be
enabled only if it was not turned off explicitly, i.e. -1 => 1 transition is possible,
but 0 -> 1 is not. The same rule applies to a situation when an unstable clock is detected.

Dmitry

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-20  6:04 Dmitry Antipov
@ 2012-02-27 10:12 ` Peter Zijlstra
  2012-02-28  6:19   ` Dmitry Antipov
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2012-02-27 10:12 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
	Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
	patches, Venkatesh Pallipadi

On Mon, 2012-02-20 at 10:04 +0400, Dmitry Antipov wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, X86-specific option
> "tsc=noirqtime" is preserved, but issues a warning.
> 
> Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
> Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
> Suggested-by: Ingo Molnar <mingo@elte.hu>
> Acked-by: Venkatesh Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>

Thanks for Cc'ing me (maintainer) and Venki (original author of the
stuff) :-)


> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..9b13f79 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>  #endif
>  
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> -/*
> - * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
> - * The reason for this explicit opt-in is not to have perf penalty with
> - * slow sched_clocks.
> - */
> -extern void enable_sched_clock_irqtime(void);
> -extern void disable_sched_clock_irqtime(void);
> +extern int sched_clock_irqtime;
> +static inline void enable_sched_clock_irqtime(void)
> +{
> +	if (sched_clock_irqtime == -1)
> +		sched_clock_irqtime = 1;
> +}
> +static inline void disable_sched_clock_irqtime(void)
> +{
> +	sched_clock_irqtime = 0;
> +}
>  #else
>  static inline void enable_sched_clock_irqtime(void) {}
>  static inline void disable_sched_clock_irqtime(void) {}

Please keep them out-of-line, its not a fast path and it avoids having
to expose the state variable.

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 5255c9d..4e7a197 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -757,18 +757,21 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
>  static DEFINE_PER_CPU(u64, cpu_softirq_time);
>  
>  static DEFINE_PER_CPU(u64, irq_start_time);
> -static int sched_clock_irqtime;
>  
> -void enable_sched_clock_irqtime(void)
> -{
> -	sched_clock_irqtime = 1;
> -}
> +/*
> + * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
> + * or after unstable clock was detected, 1 if enabled and active.
> + */

You forgot to explain what you need the tri-state for.

> +__read_mostly int sched_clock_irqtime = -1;
>  
> -void disable_sched_clock_irqtime(void)
> +static int __init irqtime_setup(char *str)
>  {
>  	sched_clock_irqtime = 0;
> +	return 1;
>  }
>  
> +__setup("noirqtime", irqtime_setup);
> +
>  #ifndef CONFIG_64BIT
>  static DEFINE_PER_CPU(seqcount_t, irq_time_seq);

Other than that no real objections.. I guess.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-20  6:04 Dmitry Antipov
  2012-02-27 10:12 ` Peter Zijlstra
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-20  6:04 UTC (permalink / raw)
  To: linux-kernel
  Cc: Russell King, Rusty Russell, Ingo Molnar, Yong Zhang,
	Venki Pallipadi, x86, linux-arm-kernel, linaro-dev, patches,
	Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.

Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 Documentation/kernel-parameters.txt |    9 +++++----
 arch/arm/kernel/sched_clock.c       |    2 ++
 arch/x86/Kconfig                    |   11 -----------
 arch/x86/kernel/tsc.c               |   12 ++++++------
 include/linux/sched.h               |   17 ++++++++++-------
 kernel/sched/core.c                 |   21 ++++++++++++---------
 lib/Kconfig.debug                   |   12 ++++++++++++
 7 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..a5da255 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	noirqdebug	[X86-32] Disables the code which attempts to detect and
 			disable unhandled interrupt sources.
 
+	noirqtime	[X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
+			eliminates the timestamping on irq/softirq entry/exit.
+
 	no_timer_check	[X86,APIC] Disables the code which tests for
 			broken timer IRQ sources.
 
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			as the stability checks done at bootup.	Used to enable
 			high-resolution timer mode on older hardware, and in
 			virtualized environment.
-			[x86] noirqtime: Do not use TSC to do irq accounting.
-			Used to run time disable IRQ_TIME_ACCOUNTING on any
-			platforms where RDTSC is slow and this accounting
-			can add overhead.
+			[x86] noirqtime: obsoleted by "noirqtime" generic option,
+			see it's documentation for details.
 
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	 */
 	cd.epoch_ns = 0;
 
+	enable_sched_clock_irqtime();
+
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
-		no_sched_irq_time = 1;
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
+		disable_sched_clock_irqtime();
+	}
 	return 1;
 }
 
@@ -978,8 +979,7 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 	tsc_disabled = 0;
 
-	if (!no_sched_irq_time)
-		enable_sched_clock_irqtime();
+	enable_sched_clock_irqtime();
 
 	lpj = ((u64)tsc_khz * 1000);
 	do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..9b13f79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
+extern int sched_clock_irqtime;
+static inline void enable_sched_clock_irqtime(void)
+{
+	if (sched_clock_irqtime == -1)
+		sched_clock_irqtime = 1;
+}
+static inline void disable_sched_clock_irqtime(void)
+{
+	sched_clock_irqtime = 0;
+}
 #else
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d..4e7a197 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,18 +757,21 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
 static DEFINE_PER_CPU(u64, cpu_softirq_time);
 
 static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
 
-void enable_sched_clock_irqtime(void)
-{
-	sched_clock_irqtime = 1;
-}
+/*
+ * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+__read_mostly int sched_clock_irqtime = -1;
 
-void disable_sched_clock_irqtime(void)
+static int __init irqtime_setup(char *str)
 {
 	sched_clock_irqtime = 0;
+	return 1;
 }
 
+__setup("noirqtime", irqtime_setup);
+
 #ifndef CONFIG_64BIT
 static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
 
@@ -822,7 +825,7 @@ void account_system_vtime(struct task_struct *curr)
 	s64 delta;
 	int cpu;
 
-	if (!sched_clock_irqtime)
+	if (sched_clock_irqtime < 1)
 		return;
 
 	local_irq_save(flags);
@@ -2853,7 +2856,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
 	}
@@ -2887,7 +2890,7 @@ void account_steal_ticks(unsigned long ticks)
 void account_idle_ticks(unsigned long ticks)
 {
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_idle_ticks(ticks);
 		return;
 	}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on X86 || ARM
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-11  0:02 Dmitry Antipov
@ 2012-02-17 11:22 ` Ingo Molnar
  0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2012-02-17 11:22 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
	Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
	patches


* Dmitry Antipov <dmitry.antipov@linaro.org> wrote:

> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
>  static DEFINE_PER_CPU(u64, cpu_softirq_time);
>  
>  static DEFINE_PER_CPU(u64, irq_start_time);
> -static int sched_clock_irqtime;
>  
> -void enable_sched_clock_irqtime(void)
> -{
> -	sched_clock_irqtime = 1;
> -}
> +/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
> + * or after unstable clock was detected, 1 if enabled and active.
> + */

Please use the customary (multi-line) comment style:

  /*
   * Comment .....
   * ...... goes here.
   */

specified in Documentation/CodingStyle.

> +int sched_clock_irqtime = -1;

should be turned into __read_mostly I guess.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-11  0:02 Dmitry Antipov
  2012-02-17 11:22 ` Ingo Molnar
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-11  0:02 UTC (permalink / raw)
  To: linux-kernel
  Cc: Russell King, Rusty Russell, Ingo Molnar, Yong Zhang,
	Venki Pallipadi, x86, linux-arm-kernel, linaro-dev, patches,
	Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.

Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 Documentation/kernel-parameters.txt |    9 +++++----
 arch/arm/kernel/sched_clock.c       |    2 ++
 arch/x86/Kconfig                    |   11 -----------
 arch/x86/kernel/tsc.c               |   12 ++++++------
 include/linux/sched.h               |   17 ++++++++++-------
 kernel/sched/core.c                 |   20 +++++++++++---------
 lib/Kconfig.debug                   |   12 ++++++++++++
 7 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..a5da255 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	noirqdebug	[X86-32] Disables the code which attempts to detect and
 			disable unhandled interrupt sources.
 
+	noirqtime	[X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
+			eliminates the timestamping on irq/softirq entry/exit.
+
 	no_timer_check	[X86,APIC] Disables the code which tests for
 			broken timer IRQ sources.
 
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			as the stability checks done at bootup.	Used to enable
 			high-resolution timer mode on older hardware, and in
 			virtualized environment.
-			[x86] noirqtime: Do not use TSC to do irq accounting.
-			Used to run time disable IRQ_TIME_ACCOUNTING on any
-			platforms where RDTSC is slow and this accounting
-			can add overhead.
+			[x86] noirqtime: obsoleted by "noirqtime" generic option,
+			see it's documentation for details.
 
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	 */
 	cd.epoch_ns = 0;
 
+	enable_sched_clock_irqtime();
+
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
-		no_sched_irq_time = 1;
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
+		disable_sched_clock_irqtime();
+	}
 	return 1;
 }
 
@@ -978,8 +979,7 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 	tsc_disabled = 0;
 
-	if (!no_sched_irq_time)
-		enable_sched_clock_irqtime();
+	enable_sched_clock_irqtime();
 
 	lpj = ((u64)tsc_khz * 1000);
 	do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..9b13f79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
+extern int sched_clock_irqtime;
+static inline void enable_sched_clock_irqtime(void)
+{
+	if (sched_clock_irqtime == -1)
+		sched_clock_irqtime = 1;
+}
+static inline void disable_sched_clock_irqtime(void)
+{
+	sched_clock_irqtime = 0;
+}
 #else
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d..a7ec043 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
 static DEFINE_PER_CPU(u64, cpu_softirq_time);
 
 static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
 
-void enable_sched_clock_irqtime(void)
-{
-	sched_clock_irqtime = 1;
-}
+/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+int sched_clock_irqtime = -1;
 
-void disable_sched_clock_irqtime(void)
+static int __init irqtime_setup(char *str)
 {
 	sched_clock_irqtime = 0;
+	return 1;
 }
 
+__setup("noirqtime", irqtime_setup);
+
 #ifndef CONFIG_64BIT
 static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
 
@@ -822,7 +824,7 @@ void account_system_vtime(struct task_struct *curr)
 	s64 delta;
 	int cpu;
 
-	if (!sched_clock_irqtime)
+	if (sched_clock_irqtime < 1)
 		return;
 
 	local_irq_save(flags);
@@ -2853,7 +2855,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
 	}
@@ -2887,7 +2889,7 @@ void account_steal_ticks(unsigned long ticks)
 void account_idle_ticks(unsigned long ticks)
 {
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_idle_ticks(ticks);
 		return;
 	}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on X86 || ARM
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-09 17:25 Dmitry Antipov
@ 2012-02-10 22:28 ` Venki Pallipadi
  0 siblings, 0 replies; 19+ messages in thread
From: Venki Pallipadi @ 2012-02-10 22:28 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
	Yong Zhang, x86, linux-arm-kernel, linaro-dev, patches

> +       noirqtime       [X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
> +                       should give a negligible performance improvement.

Can you reword the above "negligible performance improvement" above to
something

> +       noirqtime       [X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
> +                       eliminates the timestamping on irq/softirq entry/exit.

Otherwise,
Acked-by: Venkatesh Pallipadi <venki@google.com>


On Thu, Feb 9, 2012 at 9:25 AM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, X86-specific option
> "tsc=noirqtime" is preserved, but issues a warning.
>
> Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
> Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
>  Documentation/kernel-parameters.txt |    9 +++++----
>  arch/arm/kernel/sched_clock.c       |    2 ++
>  arch/x86/Kconfig                    |   11 -----------
>  arch/x86/kernel/tsc.c               |   12 ++++++------
>  include/linux/sched.h               |   17 ++++++++++-------
>  kernel/sched/core.c                 |   20 +++++++++++---------
>  lib/Kconfig.debug                   |   12 ++++++++++++
>  7 files changed, 46 insertions(+), 37 deletions(-)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 033d4e6..666d20e 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>        noirqdebug      [X86-32] Disables the code which attempts to detect and
>                        disable unhandled interrupt sources.
>
> +       noirqtime       [X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
> +                       should give a negligible performance improvement.
> +
>        no_timer_check  [X86,APIC] Disables the code which tests for
>                        broken timer IRQ sources.
>
> @@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>                        as the stability checks done at bootup. Used to enable
>                        high-resolution timer mode on older hardware, and in
>                        virtualized environment.
> -                       [x86] noirqtime: Do not use TSC to do irq accounting.
> -                       Used to run time disable IRQ_TIME_ACCOUNTING on any
> -                       platforms where RDTSC is slow and this accounting
> -                       can add overhead.
> +                       [x86] noirqtime: obsoleted by "noirqtime" generic option,
> +                       see it's documentation for details.
>
>        turbografx.map[2|3]=    [HW,JOY]
>                        TurboGraFX parallel port interface
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..30b5f89 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
>         */
>        cd.epoch_ns = 0;
>
> +       enable_sched_clock_irqtime();
> +
>        pr_debug("Registered %pF as sched_clock source\n", read);
>  }
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
>          making when dealing with multi-core CPU chips at a cost of slightly
>          increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> -       bool "Fine granularity task level IRQ time accounting"
> -       default n
> -       ---help---
> -         Select this option to enable fine granularity task irq time
> -         accounting. This is done by reading a timestamp on each
> -         transitions between softirq and hardirq state, so there can be a
> -         small performance impact.
> -
> -         If in doubt, say N here.
> -
>  source "kernel/Kconfig.preempt"
>
>  config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..f1b2b63 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
>
>  __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
>  static int __init tsc_setup(char *str)
>  {
>        if (!strcmp(str, "reliable"))
>                tsc_clocksource_reliable = 1;
> -       if (!strncmp(str, "noirqtime", 9))
> -               no_sched_irq_time = 1;
> +       if (!strncmp(str, "noirqtime", 9)) {
> +               printk(KERN_WARNING "tsc: tsc=noirqtime is "
> +                      "obsolete, use noirqtime instead\n");
> +               disable_sched_clock_irqtime();
> +       }
>        return 1;
>  }
>
> @@ -978,8 +979,7 @@ void __init tsc_init(void)
>        /* now allow native_sched_clock() to use rdtsc */
>        tsc_disabled = 0;
>
> -       if (!no_sched_irq_time)
> -               enable_sched_clock_irqtime();
> +       enable_sched_clock_irqtime();
>
>        lpj = ((u64)tsc_khz * 1000);
>        do_div(lpj, HZ);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..9b13f79 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>  #endif
>
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> -/*
> - * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
> - * The reason for this explicit opt-in is not to have perf penalty with
> - * slow sched_clocks.
> - */
> -extern void enable_sched_clock_irqtime(void);
> -extern void disable_sched_clock_irqtime(void);
> +extern int sched_clock_irqtime;
> +static inline void enable_sched_clock_irqtime(void)
> +{
> +       if (sched_clock_irqtime == -1)
> +               sched_clock_irqtime = 1;
> +}
> +static inline void disable_sched_clock_irqtime(void)
> +{
> +       sched_clock_irqtime = 0;
> +}
>  #else
>  static inline void enable_sched_clock_irqtime(void) {}
>  static inline void disable_sched_clock_irqtime(void) {}
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 5255c9d..a7ec043 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
>  static DEFINE_PER_CPU(u64, cpu_softirq_time);
>
>  static DEFINE_PER_CPU(u64, irq_start_time);
> -static int sched_clock_irqtime;
>
> -void enable_sched_clock_irqtime(void)
> -{
> -       sched_clock_irqtime = 1;
> -}
> +/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
> + * or after unstable clock was detected, 1 if enabled and active.
> + */
> +int sched_clock_irqtime = -1;
>
> -void disable_sched_clock_irqtime(void)
> +static int __init irqtime_setup(char *str)
>  {
>        sched_clock_irqtime = 0;
> +       return 1;
>  }
>
> +__setup("noirqtime", irqtime_setup);
> +
>  #ifndef CONFIG_64BIT
>  static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
>
> @@ -822,7 +824,7 @@ void account_system_vtime(struct task_struct *curr)
>        s64 delta;
>        int cpu;
>
> -       if (!sched_clock_irqtime)
> +       if (sched_clock_irqtime < 1)
>                return;
>
>        local_irq_save(flags);
> @@ -2853,7 +2855,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
>        cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
>        struct rq *rq = this_rq();
>
> -       if (sched_clock_irqtime) {
> +       if (sched_clock_irqtime > 0) {
>                irqtime_account_process_tick(p, user_tick, rq);
>                return;
>        }
> @@ -2887,7 +2889,7 @@ void account_steal_ticks(unsigned long ticks)
>  void account_idle_ticks(unsigned long ticks)
>  {
>
> -       if (sched_clock_irqtime) {
> +       if (sched_clock_irqtime > 0) {
>                irqtime_account_idle_ticks(ticks);
>                return;
>        }
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..236e814 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
>          application, you can say N to avoid the very slight overhead
>          this adds.
>
> +config IRQ_TIME_ACCOUNTING
> +       bool "Fine granularity task level IRQ time accounting"
> +       depends on X86 || ARM
> +       default n
> +       ---help---
> +         Select this option to enable fine granularity task irq time
> +         accounting. This is done by reading a timestamp on each
> +         transitions between softirq and hardirq state, so there can be a
> +         small performance impact.
> +
> +         If in doubt, say N here.
> +
>  config TIMER_STATS
>        bool "Collect kernel timers statistics"
>        depends on DEBUG_KERNEL && PROC_FS
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-09 17:25 Dmitry Antipov
  2012-02-10 22:28 ` Venki Pallipadi
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-09 17:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Russell King, Rusty Russell, Ingo Molnar, Yong Zhang,
	Venki Pallipadi, x86, linux-arm-kernel, linaro-dev, patches,
	Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.

Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 Documentation/kernel-parameters.txt |    9 +++++----
 arch/arm/kernel/sched_clock.c       |    2 ++
 arch/x86/Kconfig                    |   11 -----------
 arch/x86/kernel/tsc.c               |   12 ++++++------
 include/linux/sched.h               |   17 ++++++++++-------
 kernel/sched/core.c                 |   20 +++++++++++---------
 lib/Kconfig.debug                   |   12 ++++++++++++
 7 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..666d20e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	noirqdebug	[X86-32] Disables the code which attempts to detect and
 			disable unhandled interrupt sources.
 
+	noirqtime	[X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
+			should give a negligible performance improvement.			
+
 	no_timer_check	[X86,APIC] Disables the code which tests for
 			broken timer IRQ sources.
 
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			as the stability checks done at bootup.	Used to enable
 			high-resolution timer mode on older hardware, and in
 			virtualized environment.
-			[x86] noirqtime: Do not use TSC to do irq accounting.
-			Used to run time disable IRQ_TIME_ACCOUNTING on any
-			platforms where RDTSC is slow and this accounting
-			can add overhead.
+			[x86] noirqtime: obsoleted by "noirqtime" generic option,
+			see it's documentation for details.
 
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	 */
 	cd.epoch_ns = 0;
 
+	enable_sched_clock_irqtime();
+
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
-		no_sched_irq_time = 1;
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
+		disable_sched_clock_irqtime();
+	}
 	return 1;
 }
 
@@ -978,8 +979,7 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 	tsc_disabled = 0;
 
-	if (!no_sched_irq_time)
-		enable_sched_clock_irqtime();
+	enable_sched_clock_irqtime();
 
 	lpj = ((u64)tsc_khz * 1000);
 	do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..9b13f79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
+extern int sched_clock_irqtime;
+static inline void enable_sched_clock_irqtime(void)
+{
+	if (sched_clock_irqtime == -1)
+		sched_clock_irqtime = 1;
+}
+static inline void disable_sched_clock_irqtime(void)
+{
+	sched_clock_irqtime = 0;
+}
 #else
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d..a7ec043 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
 static DEFINE_PER_CPU(u64, cpu_softirq_time);
 
 static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
 
-void enable_sched_clock_irqtime(void)
-{
-	sched_clock_irqtime = 1;
-}
+/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+int sched_clock_irqtime = -1;
 
-void disable_sched_clock_irqtime(void)
+static int __init irqtime_setup(char *str)
 {
 	sched_clock_irqtime = 0;
+	return 1;
 }
 
+__setup("noirqtime", irqtime_setup);
+
 #ifndef CONFIG_64BIT
 static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
 
@@ -822,7 +824,7 @@ void account_system_vtime(struct task_struct *curr)
 	s64 delta;
 	int cpu;
 
-	if (!sched_clock_irqtime)
+	if (sched_clock_irqtime < 1)
 		return;
 
 	local_irq_save(flags);
@@ -2853,7 +2855,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
 	}
@@ -2887,7 +2889,7 @@ void account_steal_ticks(unsigned long ticks)
 void account_idle_ticks(unsigned long ticks)
 {
 
-	if (sched_clock_irqtime) {
+	if (sched_clock_irqtime > 0) {
 		irqtime_account_idle_ticks(ticks);
 		return;
 	}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on X86 || ARM
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-08 23:58 Dmitry Antipov
  0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 23:58 UTC (permalink / raw)
  To: Venki Pallipadi
  Cc: Russell King, Rusty Russell, Ingo Molnar, linux-kernel, x86,
	linux-arm-kernel, linaro-dev, patches, Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.

Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 Documentation/kernel-parameters.txt |    9 +++++----
 arch/arm/kernel/sched_clock.c       |    3 +++
 arch/x86/Kconfig                    |   11 -----------
 arch/x86/kernel/tsc.c               |    7 ++++---
 include/linux/sched.h               |    2 ++
 lib/Kconfig.debug                   |   12 ++++++++++++
 lib/Makefile                        |    2 ++
 lib/irqtime.c                       |   12 ++++++++++++
 8 files changed, 40 insertions(+), 18 deletions(-)
 create mode 100644 lib/irqtime.c

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..b64a13f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1719,6 +1719,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	noautogroup	Disable scheduler automatic task group creation.
 
+	noirqtime	[X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
+			should give a negligible performance improvement.			
+
 	nobats		[PPC] Do not use BATs for mapping kernel lowmem
 			on "Classic" PPC cores.
 
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			as the stability checks done at bootup.	Used to enable
 			high-resolution timer mode on older hardware, and in
 			virtualized environment.
-			[x86] noirqtime: Do not use TSC to do irq accounting.
-			Used to run time disable IRQ_TIME_ACCOUNTING on any
-			platforms where RDTSC is slow and this accounting
-			can add overhead.
+			[x86] noirqtime: obsoleted by "noirqtime" generic option,
+			see it's documentation for details.
 
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..961bd2d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,9 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	 */
 	cd.epoch_ns = 0;
 
+	if (!no_sched_irq_time)
+		enable_sched_clock_irqtime();
+
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..70510a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
 		no_sched_irq_time = 1;
+	}
 	return 1;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
  * The reason for this explicit opt-in is not to have perf penalty with
  * slow sched_clocks.
  */
+extern int no_sched_irq_time;
 extern void enable_sched_clock_irqtime(void);
 extern void disable_sched_clock_irqtime(void);
 #else
+#define no_sched_irq_time 1
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on X86 || ARM
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
 ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
   lib-y += dec_and_lock.o
 endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+	no_sched_irq_time = 1;
+	return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-08 16:08 Dmitry Antipov
@ 2012-02-08 22:50 ` Venki Pallipadi
  0 siblings, 0 replies; 19+ messages in thread
From: Venki Pallipadi @ 2012-02-08 22:50 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: Russell King, Rusty Russell, Ingo Molnar, linux-kernel, x86,
	linux-arm-kernel, linaro-dev, patches

On Wed, Feb 8, 2012 at 8:08 AM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, X86-specific option
> "tsc=noirqtime" is preserved, but issues a warning.
>
> Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
>  arch/arm/kernel/sched_clock.c |    3 +++
>  arch/x86/Kconfig              |   11 -----------
>  arch/x86/kernel/tsc.c         |    7 ++++---
>  include/linux/sched.h         |    2 ++
>  lib/Kconfig.debug             |   12 ++++++++++++
>  lib/Makefile                  |    2 ++
>  lib/irqtime.c                 |   12 ++++++++++++
>  7 files changed, 35 insertions(+), 14 deletions(-)
>  create mode 100644 lib/irqtime.c
>
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..961bd2d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -144,6 +144,9 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
>         */
>        cd.epoch_ns = 0;
>
> +       if (!no_sched_irq_time)
> +               enable_sched_clock_irqtime();
> +
>        pr_debug("Registered %pF as sched_clock source\n", read);
>  }
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
>          making when dealing with multi-core CPU chips at a cost of slightly
>          increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> -       bool "Fine granularity task level IRQ time accounting"
> -       default n
> -       ---help---
> -         Select this option to enable fine granularity task irq time
> -         accounting. This is done by reading a timestamp on each
> -         transitions between softirq and hardirq state, so there can be a
> -         small performance impact.
> -
> -         If in doubt, say N here.
> -
>  source "kernel/Kconfig.preempt"
>
>  config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..70510a3 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
>
>  __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
>  static int __init tsc_setup(char *str)
>  {
>        if (!strcmp(str, "reliable"))
>                tsc_clocksource_reliable = 1;
> -       if (!strncmp(str, "noirqtime", 9))
> +       if (!strncmp(str, "noirqtime", 9)) {
> +               printk(KERN_WARNING "tsc: tsc=noirqtime is "
> +                      "obsolete, use noirqtime instead\n");
>                no_sched_irq_time = 1;
> +       }
>        return 1;
>  }
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..b3575b5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>  * The reason for this explicit opt-in is not to have perf penalty with
>  * slow sched_clocks.
>  */
> +extern int no_sched_irq_time;
>  extern void enable_sched_clock_irqtime(void);
>  extern void disable_sched_clock_irqtime(void);
>  #else
> +#define no_sched_irq_time 1
>  static inline void enable_sched_clock_irqtime(void) {}
>  static inline void disable_sched_clock_irqtime(void) {}
>  #endif
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..236e814 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
>          application, you can say N to avoid the very slight overhead
>          this adds.
>
> +config IRQ_TIME_ACCOUNTING
> +       bool "Fine granularity task level IRQ time accounting"
> +       depends on X86 || ARM
> +       default n
> +       ---help---
> +         Select this option to enable fine granularity task irq time
> +         accounting. This is done by reading a timestamp on each
> +         transitions between softirq and hardirq state, so there can be a
> +         small performance impact.
> +
> +         If in doubt, say N here.
> +
>  config TIMER_STATS
>        bool "Collect kernel timers statistics"
>        depends on DEBUG_KERNEL && PROC_FS
> diff --git a/lib/Makefile b/lib/Makefile
> index 18515f0..44d67d4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
>  obj-$(CONFIG_DEBUG_LIST) += list_debug.o
>  obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
>
> +obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
> +
>  ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
>   lib-y += dec_and_lock.o
>  endif
> diff --git a/lib/irqtime.c b/lib/irqtime.c
> new file mode 100644
> index 0000000..10d440d
> --- /dev/null
> +++ b/lib/irqtime.c
> @@ -0,0 +1,12 @@
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +
> +int no_sched_irq_time;
> +
> +static int __init irqtime_setup(char *str)
> +{
> +       no_sched_irq_time = 1;
> +       return 1;
> +}
> +
> +__setup("noirqtime", irqtime_setup);
> --

One more nit-pick. We need corresponding change in
Documentation/kernel-parameters.txt to  deprecate tsc=irqtime and add
this new param.

> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-08 16:08 Dmitry Antipov
  2012-02-08 22:50 ` Venki Pallipadi
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 16:08 UTC (permalink / raw)
  To: Russell King
  Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linux-kernel, x86,
	linux-arm-kernel, linaro-dev, patches, Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.

Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 arch/arm/kernel/sched_clock.c |    3 +++
 arch/x86/Kconfig              |   11 -----------
 arch/x86/kernel/tsc.c         |    7 ++++---
 include/linux/sched.h         |    2 ++
 lib/Kconfig.debug             |   12 ++++++++++++
 lib/Makefile                  |    2 ++
 lib/irqtime.c                 |   12 ++++++++++++
 7 files changed, 35 insertions(+), 14 deletions(-)
 create mode 100644 lib/irqtime.c

diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..961bd2d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,9 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	 */
 	cd.epoch_ns = 0;
 
+	if (!no_sched_irq_time)
+		enable_sched_clock_irqtime();
+
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..70510a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
+	if (!strncmp(str, "noirqtime", 9)) {
+		printk(KERN_WARNING "tsc: tsc=noirqtime is "
+		       "obsolete, use noirqtime instead\n");
 		no_sched_irq_time = 1;
+	}
 	return 1;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
  * The reason for this explicit opt-in is not to have perf penalty with
  * slow sched_clocks.
  */
+extern int no_sched_irq_time;
 extern void enable_sched_clock_irqtime(void);
 extern void disable_sched_clock_irqtime(void);
 #else
+#define no_sched_irq_time 1
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on X86 || ARM
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
 ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
   lib-y += dec_and_lock.o
 endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+	no_sched_irq_time = 1;
+	return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
  2012-02-07 18:06 Dmitry Antipov
@ 2012-02-07 22:56 ` Venki Pallipadi
  0 siblings, 0 replies; 19+ messages in thread
From: Venki Pallipadi @ 2012-02-07 22:56 UTC (permalink / raw)
  To: Dmitry Antipov
  Cc: Rusty Russell, Ingo Molnar, linux-kernel, x86, linux-arm-kernel,
	linaro-dev, patches

On Tue, Feb 7, 2012 at 10:06 AM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
> ARM, move "noirqtime=" option to common debugging code.
>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
>  arch/arm/kernel/sched_clock.c |    3 +++
>  arch/x86/Kconfig              |   11 -----------
>  arch/x86/kernel/tsc.c         |    4 ----
>  include/linux/sched.h         |    2 ++
>  lib/Kconfig.debug             |   12 ++++++++++++
>  lib/Makefile                  |    2 ++
>  lib/irqtime.c                 |   12 ++++++++++++
>  7 files changed, 31 insertions(+), 15 deletions(-)
>  create mode 100644 lib/irqtime.c
>
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..56d2a9d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
>        if (read_sched_clock == jiffy_sched_clock_read)
>                setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>
> +       if (!no_sched_irq_time)
> +               enable_sched_clock_irqtime();
> +
>        sched_clock_poll(sched_clock_timer.data);
>  }
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
>          making when dealing with multi-core CPU chips at a cost of slightly
>          increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> -       bool "Fine granularity task level IRQ time accounting"
> -       default n
> -       ---help---
> -         Select this option to enable fine granularity task irq time
> -         accounting. This is done by reading a timestamp on each
> -         transitions between softirq and hardirq state, so there can be a
> -         small performance impact.
> -
> -         If in doubt, say N here.
> -
>  source "kernel/Kconfig.preempt"
>
>  config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..a3a5465 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,10 @@ int __init notsc_setup(char *str)
>
>  __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
>  static int __init tsc_setup(char *str)
>  {
>        if (!strcmp(str, "reliable"))
>                tsc_clocksource_reliable = 1;
> -       if (!strncmp(str, "noirqtime", 9))
> -               no_sched_irq_time = 1;

I guess we need to keep this option around and gracefully deprecate to
the new one?

>        return 1;
>  }
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..b3575b5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>  * The reason for this explicit opt-in is not to have perf penalty with
>  * slow sched_clocks.
>  */
> +extern int no_sched_irq_time;
>  extern void enable_sched_clock_irqtime(void);
>  extern void disable_sched_clock_irqtime(void);
>  #else
> +#define no_sched_irq_time 1
>  static inline void enable_sched_clock_irqtime(void) {}
>  static inline void disable_sched_clock_irqtime(void) {}
>  #endif
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..a55287e 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
>          application, you can say N to avoid the very slight overhead
>          this adds.
>
> +config IRQ_TIME_ACCOUNTING
> +       bool "Fine granularity task level IRQ time accounting"
> +       depends on DEBUG_KERNEL && (X86 || (ARM && HAVE_SCHED_CLOCK))

Why the new dependency on DEBUG_KERNEL?

Thanks,
-Venki

> +       default n
> +       ---help---
> +         Select this option to enable fine granularity task irq time
> +         accounting. This is done by reading a timestamp on each
> +         transitions between softirq and hardirq state, so there can be a
> +         small performance impact.
> +
> +         If in doubt, say N here.
> +
>  config TIMER_STATS
>        bool "Collect kernel timers statistics"
>        depends on DEBUG_KERNEL && PROC_FS
> diff --git a/lib/Makefile b/lib/Makefile
> index 18515f0..44d67d4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
>  obj-$(CONFIG_DEBUG_LIST) += list_debug.o
>  obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
>
> +obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
> +
>  ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
>   lib-y += dec_and_lock.o
>  endif
> diff --git a/lib/irqtime.c b/lib/irqtime.c
> new file mode 100644
> index 0000000..10d440d
> --- /dev/null
> +++ b/lib/irqtime.c
> @@ -0,0 +1,12 @@
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +
> +int no_sched_irq_time;
> +
> +static int __init irqtime_setup(char *str)
> +{
> +       no_sched_irq_time = 1;
> +       return 1;
> +}
> +
> +__setup("noirqtime", irqtime_setup);
> --
> 1.7.7.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-07 18:06 Dmitry Antipov
  2012-02-07 22:56 ` Venki Pallipadi
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-07 18:06 UTC (permalink / raw)
  To: Rusty Russell, Ingo Molnar
  Cc: linux-kernel, x86, linux-arm-kernel, linaro-dev, patches, Dmitry Antipov

Generalize CONFIG_IRQ_TIME_ACCOUNTING  between X86 and
ARM, move "noirqtime=" option to common debugging code.

Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 arch/arm/kernel/sched_clock.c |    3 +++
 arch/x86/Kconfig              |   11 -----------
 arch/x86/kernel/tsc.c         |    4 ----
 include/linux/sched.h         |    2 ++
 lib/Kconfig.debug             |   12 ++++++++++++
 lib/Makefile                  |    2 ++
 lib/irqtime.c                 |   12 ++++++++++++
 7 files changed, 31 insertions(+), 15 deletions(-)
 create mode 100644 lib/irqtime.c

diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..56d2a9d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
 	if (read_sched_clock == jiffy_sched_clock_read)
 		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
 
+	if (!no_sched_irq_time)
+		enable_sched_clock_irqtime();
+
 	sched_clock_poll(sched_clock_timer.data);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..a3a5465 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,10 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
-static int no_sched_irq_time;
-
 static int __init tsc_setup(char *str)
 {
 	if (!strcmp(str, "reliable"))
 		tsc_clocksource_reliable = 1;
-	if (!strncmp(str, "noirqtime", 9))
-		no_sched_irq_time = 1;
 	return 1;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
  * The reason for this explicit opt-in is not to have perf penalty with
  * slow sched_clocks.
  */
+extern int no_sched_irq_time;
 extern void enable_sched_clock_irqtime(void);
 extern void disable_sched_clock_irqtime(void);
 #else
+#define no_sched_irq_time 1
 static inline void enable_sched_clock_irqtime(void) {}
 static inline void disable_sched_clock_irqtime(void) {}
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..a55287e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on DEBUG_KERNEL && (X86 || (ARM && HAVE_SCHED_CLOCK))
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
 ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
   lib-y += dec_and_lock.o
 endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+	no_sched_irq_time = 1;
+	return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-02-28  6:28 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-08 12:48 [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM Dmitry Antipov
2012-02-08 13:18 ` Russell King - ARM Linux
2012-02-08 15:15   ` Dmitry Antipov
2012-02-08 15:24     ` Russell King - ARM Linux
2012-02-09  2:51   ` Yong Zhang
2012-02-09  2:48 ` Yong Zhang
  -- strict thread matches above, loose matches on Subject: below --
2012-02-28  6:29 Dmitry Antipov
2012-02-20  6:04 Dmitry Antipov
2012-02-27 10:12 ` Peter Zijlstra
2012-02-28  6:19   ` Dmitry Antipov
2012-02-11  0:02 Dmitry Antipov
2012-02-17 11:22 ` Ingo Molnar
2012-02-09 17:25 Dmitry Antipov
2012-02-10 22:28 ` Venki Pallipadi
2012-02-08 23:58 Dmitry Antipov
2012-02-08 16:08 Dmitry Antipov
2012-02-08 22:50 ` Venki Pallipadi
2012-02-07 18:06 Dmitry Antipov
2012-02-07 22:56 ` Venki Pallipadi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).