* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-07 18:06 Dmitry Antipov
2012-02-07 22:56 ` Venki Pallipadi
0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-07 18:06 UTC (permalink / raw)
To: Rusty Russell, Ingo Molnar
Cc: linux-kernel, x86, linux-arm-kernel, linaro-dev, patches, Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
arch/arm/kernel/sched_clock.c | 3 +++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 4 ----
include/linux/sched.h | 2 ++
lib/Kconfig.debug | 12 ++++++++++++
lib/Makefile | 2 ++
lib/irqtime.c | 12 ++++++++++++
7 files changed, 31 insertions(+), 15 deletions(-)
create mode 100644 lib/irqtime.c
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..56d2a9d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
if (read_sched_clock == jiffy_sched_clock_read)
setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
+ if (!no_sched_irq_time)
+ enable_sched_clock_irqtime();
+
sched_clock_poll(sched_clock_timer.data);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..a3a5465 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,10 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
- no_sched_irq_time = 1;
return 1;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
* The reason for this explicit opt-in is not to have perf penalty with
* slow sched_clocks.
*/
+extern int no_sched_irq_time;
extern void enable_sched_clock_irqtime(void);
extern void disable_sched_clock_irqtime(void);
#else
+#define no_sched_irq_time 1
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..a55287e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on DEBUG_KERNEL && (X86 || (ARM && HAVE_SCHED_CLOCK))
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
lib-y += dec_and_lock.o
endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+ no_sched_irq_time = 1;
+ return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-07 18:06 [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM Dmitry Antipov
@ 2012-02-07 22:56 ` Venki Pallipadi
0 siblings, 0 replies; 19+ messages in thread
From: Venki Pallipadi @ 2012-02-07 22:56 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Rusty Russell, Ingo Molnar, linux-kernel, x86, linux-arm-kernel,
linaro-dev, patches
On Tue, Feb 7, 2012 at 10:06 AM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> ARM, move "noirqtime=" option to common debugging code.
>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
> arch/arm/kernel/sched_clock.c | 3 +++
> arch/x86/Kconfig | 11 -----------
> arch/x86/kernel/tsc.c | 4 ----
> include/linux/sched.h | 2 ++
> lib/Kconfig.debug | 12 ++++++++++++
> lib/Makefile | 2 ++
> lib/irqtime.c | 12 ++++++++++++
> 7 files changed, 31 insertions(+), 15 deletions(-)
> create mode 100644 lib/irqtime.c
>
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..56d2a9d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
> if (read_sched_clock == jiffy_sched_clock_read)
> setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>
> + if (!no_sched_irq_time)
> + enable_sched_clock_irqtime();
> +
> sched_clock_poll(sched_clock_timer.data);
> }
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
> making when dealing with multi-core CPU chips at a cost of slightly
> increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> - bool "Fine granularity task level IRQ time accounting"
> - default n
> - ---help---
> - Select this option to enable fine granularity task irq time
> - accounting. This is done by reading a timestamp on each
> - transitions between softirq and hardirq state, so there can be a
> - small performance impact.
> -
> - If in doubt, say N here.
> -
> source "kernel/Kconfig.preempt"
>
> config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..a3a5465 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,10 @@ int __init notsc_setup(char *str)
>
> __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
> static int __init tsc_setup(char *str)
> {
> if (!strcmp(str, "reliable"))
> tsc_clocksource_reliable = 1;
> - if (!strncmp(str, "noirqtime", 9))
> - no_sched_irq_time = 1;
I guess we need to keep this option around and gracefully deprecate to
the new one?
> return 1;
> }
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..b3575b5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
> * The reason for this explicit opt-in is not to have perf penalty with
> * slow sched_clocks.
> */
> +extern int no_sched_irq_time;
> extern void enable_sched_clock_irqtime(void);
> extern void disable_sched_clock_irqtime(void);
> #else
> +#define no_sched_irq_time 1
> static inline void enable_sched_clock_irqtime(void) {}
> static inline void disable_sched_clock_irqtime(void) {}
> #endif
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..a55287e 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
> application, you can say N to avoid the very slight overhead
> this adds.
>
> +config IRQ_TIME_ACCOUNTING
> + bool "Fine granularity task level IRQ time accounting"
> + depends on DEBUG_KERNEL && (X86 || (ARM && HAVE_SCHED_CLOCK))
Why the new dependency on DEBUG_KERNEL?
Thanks,
-Venki
> + default n
> + ---help---
> + Select this option to enable fine granularity task irq time
> + accounting. This is done by reading a timestamp on each
> + transitions between softirq and hardirq state, so there can be a
> + small performance impact.
> +
> + If in doubt, say N here.
> +
> config TIMER_STATS
> bool "Collect kernel timers statistics"
> depends on DEBUG_KERNEL && PROC_FS
> diff --git a/lib/Makefile b/lib/Makefile
> index 18515f0..44d67d4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
> obj-$(CONFIG_DEBUG_LIST) += list_debug.o
> obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
>
> +obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
> +
> ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
> lib-y += dec_and_lock.o
> endif
> diff --git a/lib/irqtime.c b/lib/irqtime.c
> new file mode 100644
> index 0000000..10d440d
> --- /dev/null
> +++ b/lib/irqtime.c
> @@ -0,0 +1,12 @@
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +
> +int no_sched_irq_time;
> +
> +static int __init irqtime_setup(char *str)
> +{
> + no_sched_irq_time = 1;
> + return 1;
> +}
> +
> +__setup("noirqtime", irqtime_setup);
> --
> 1.7.7.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-08 12:48 Dmitry Antipov
2012-02-08 13:18 ` Russell King - ARM Linux
2012-02-09 2:48 ` Yong Zhang
0 siblings, 2 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 12:48 UTC (permalink / raw)
To: Rusty Russell, Ingo Molnar
Cc: Venki Pallipadi, linux-kernel, x86, linux-arm-kernel, linaro-dev,
patches, Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, "tsc=noirqtime"
is preserved, but issues a warning.
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
arch/arm/kernel/sched_clock.c | 3 +++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 7 ++++---
include/linux/sched.h | 2 ++
lib/Kconfig.debug | 12 ++++++++++++
lib/Makefile | 2 ++
lib/irqtime.c | 12 ++++++++++++
7 files changed, 35 insertions(+), 14 deletions(-)
create mode 100644 lib/irqtime.c
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..56d2a9d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
if (read_sched_clock == jiffy_sched_clock_read)
setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
+ if (!no_sched_irq_time)
+ enable_sched_clock_irqtime();
+
sched_clock_poll(sched_clock_timer.data);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..70510a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
no_sched_irq_time = 1;
+ }
return 1;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
* The reason for this explicit opt-in is not to have perf penalty with
* slow sched_clocks.
*/
+extern int no_sched_irq_time;
extern void enable_sched_clock_irqtime(void);
extern void disable_sched_clock_irqtime(void);
#else
+#define no_sched_irq_time 1
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..48be210 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
lib-y += dec_and_lock.o
endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+ no_sched_irq_time = 1;
+ return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-08 12:48 Dmitry Antipov
@ 2012-02-08 13:18 ` Russell King - ARM Linux
2012-02-08 15:15 ` Dmitry Antipov
2012-02-09 2:51 ` Yong Zhang
2012-02-09 2:48 ` Yong Zhang
1 sibling, 2 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2012-02-08 13:18 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linaro-dev, patches,
x86, linux-kernel, linux-arm-kernel
On Wed, Feb 08, 2012 at 04:48:34AM -0800, Dmitry Antipov wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, "tsc=noirqtime"
> is preserved, but issues a warning.
>
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
> arch/arm/kernel/sched_clock.c | 3 +++
> arch/x86/Kconfig | 11 -----------
> arch/x86/kernel/tsc.c | 7 ++++---
> include/linux/sched.h | 2 ++
> lib/Kconfig.debug | 12 ++++++++++++
> lib/Makefile | 2 ++
> lib/irqtime.c | 12 ++++++++++++
> 7 files changed, 35 insertions(+), 14 deletions(-)
> create mode 100644 lib/irqtime.c
>
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..56d2a9d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
> if (read_sched_clock == jiffy_sched_clock_read)
> setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>
> + if (!no_sched_irq_time)
> + enable_sched_clock_irqtime();
Why are you placing this here? sched_clock is available from the point
that it's registered, which should be before the first sched_clock()
call.
> +config IRQ_TIME_ACCOUNTING
> + bool "Fine granularity task level IRQ time accounting"
> + depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
Even though it's not bad here, please get out of the habbit of throwing
unnecessary parens into the mix. It can make stuff more difficult to
read and therefore confirm correctness. (I've spent many a time
rewriting if() statements because of paren overuse.)
This could have been written:
depends on X86 || (ARM && HAVE_SCHED_CLOCK)
However, ARM will always have HAVE_SCHED_CLOCK after the next merge window,
so this can become a much simpler:
depends on X86 || ARM
Apart from these two points, the rest of the patch looks fine to me but
the ultimate decision about its acceptability is up to other people.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-08 13:18 ` Russell King - ARM Linux
@ 2012-02-08 15:15 ` Dmitry Antipov
2012-02-08 15:24 ` Russell King - ARM Linux
2012-02-09 2:51 ` Yong Zhang
1 sibling, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 15:15 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linaro-dev,
linux-kernel, linux-arm-kernel
On 02/08/2012 05:18 AM, Russell King - ARM Linux wrote:
>> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
>> index 5416c7c..56d2a9d 100644
>> --- a/arch/arm/kernel/sched_clock.c
>> +++ b/arch/arm/kernel/sched_clock.c
>> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
>> if (read_sched_clock == jiffy_sched_clock_read)
>> setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>>
>> + if (!no_sched_irq_time)
>> + enable_sched_clock_irqtime();
>
> Why are you placing this here? sched_clock is available from the point
> that it's registered, which should be before the first sched_clock()
> call.
This is just because I'm thinking about:
if (read_sched_clock == jiffy_sched_clock_read)
setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
else if (!no_sched_irq_time)
enable_sched_clock_irqtime();
I suppose that "fine granularity task irq time accounting"
makes no sense if sched_clock() granularity is poor.
> This could have been written:
>
> depends on X86 || (ARM&& HAVE_SCHED_CLOCK)
>
> However, ARM will always have HAVE_SCHED_CLOCK after the next merge window,
> so this can become a much simpler:
>
> depends on X86 || ARM
OK.
Dmitry
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-08 15:15 ` Dmitry Antipov
@ 2012-02-08 15:24 ` Russell King - ARM Linux
0 siblings, 0 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2012-02-08 15:24 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linaro-dev,
linux-kernel, linux-arm-kernel
On Wed, Feb 08, 2012 at 07:15:26AM -0800, Dmitry Antipov wrote:
> On 02/08/2012 05:18 AM, Russell King - ARM Linux wrote:
>> Why are you placing this here? sched_clock is available from the point
>> that it's registered, which should be before the first sched_clock()
>> call.
>
> This is just because I'm thinking about:
>
> if (read_sched_clock == jiffy_sched_clock_read)
> setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
> else if (!no_sched_irq_time)
> enable_sched_clock_irqtime();
>
> I suppose that "fine granularity task irq time accounting"
> makes no sense if sched_clock() granularity is poor.
Let me put it a different way - is there a reason not to do this in
setup_sched_clock() so that it becomes available as soon as sched_clock()
has been initialized by a platform?
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-08 13:18 ` Russell King - ARM Linux
2012-02-08 15:15 ` Dmitry Antipov
@ 2012-02-09 2:51 ` Yong Zhang
1 sibling, 0 replies; 19+ messages in thread
From: Yong Zhang @ 2012-02-09 2:51 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Dmitry Antipov, Rusty Russell, Ingo Molnar, Venki Pallipadi,
linaro-dev, patches, x86, linux-kernel, linux-arm-kernel
On Wed, Feb 08, 2012 at 01:18:33PM +0000, Russell King - ARM Linux wrote:
> On Wed, Feb 08, 2012 at 04:48:34AM -0800, Dmitry Antipov wrote:
> > Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> > ARM, move "noirqtime=" option to common debugging code.
> > For a bit of backward compatibility, "tsc=noirqtime"
> > is preserved, but issues a warning.
> >
> > Suggested-by: Venki Pallipadi <venki@google.com>
> > Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> > ---
> > arch/arm/kernel/sched_clock.c | 3 +++
> > arch/x86/Kconfig | 11 -----------
> > arch/x86/kernel/tsc.c | 7 ++++---
> > include/linux/sched.h | 2 ++
> > lib/Kconfig.debug | 12 ++++++++++++
> > lib/Makefile | 2 ++
> > lib/irqtime.c | 12 ++++++++++++
> > 7 files changed, 35 insertions(+), 14 deletions(-)
> > create mode 100644 lib/irqtime.c
> >
> > diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> > index 5416c7c..56d2a9d 100644
> > --- a/arch/arm/kernel/sched_clock.c
> > +++ b/arch/arm/kernel/sched_clock.c
> > @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
> > if (read_sched_clock == jiffy_sched_clock_read)
> > setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
> >
> > + if (!no_sched_irq_time)
> > + enable_sched_clock_irqtime();
>
> Why are you placing this here? sched_clock is available from the point
> that it's registered, which should be before the first sched_clock()
> call.
>
> > +config IRQ_TIME_ACCOUNTING
> > + bool "Fine granularity task level IRQ time accounting"
> > + depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
>
> Even though it's not bad here, please get out of the habbit of throwing
> unnecessary parens into the mix. It can make stuff more difficult to
> read and therefore confirm correctness. (I've spent many a time
> rewriting if() statements because of paren overuse.)
>
> This could have been written:
>
> depends on X86 || (ARM && HAVE_SCHED_CLOCK)
>
> However, ARM will always have HAVE_SCHED_CLOCK after the next merge window,
> so this can become a much simpler:
>
> depends on X86 || ARM
Maybe we can hand the depend-things to every ARCH, say let ARCH provides
HAVE_IRQ_TIME_ACCOUNTING. Thus we can make IRQ_TIME_ACCOUNTING
denpend on HAVE_IRQ_TIME_ACCOUNTING.
Thanks,
Yong
>
> Apart from these two points, the rest of the patch looks fine to me but
> the ultimate decision about its acceptability is up to other people.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Only stand for myself
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-08 12:48 Dmitry Antipov
2012-02-08 13:18 ` Russell King - ARM Linux
@ 2012-02-09 2:48 ` Yong Zhang
1 sibling, 0 replies; 19+ messages in thread
From: Yong Zhang @ 2012-02-09 2:48 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linux-kernel, x86,
linux-arm-kernel, linaro-dev, patches, Peter Zijlstra
Cc'ing PeterZ.
On Wed, Feb 08, 2012 at 04:48:34AM -0800, Dmitry Antipov wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, "tsc=noirqtime"
> is preserved, but issues a warning.
>
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
> lib/Kconfig.debug | 12 ++++++++++++
> lib/Makefile | 2 ++
> lib/irqtime.c | 12 ++++++++++++
Do we need a single file for this?
You know this feature is sched related, why not just move it
to kernel/sched/core.c?
Thanks,
Yong
> 7 files changed, 35 insertions(+), 14 deletions(-)
> create mode 100644 lib/irqtime.c
>
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..56d2a9d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -162,5 +162,8 @@ void __init sched_clock_postinit(void)
> if (read_sched_clock == jiffy_sched_clock_read)
> setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
>
> + if (!no_sched_irq_time)
> + enable_sched_clock_irqtime();
> +
> sched_clock_poll(sched_clock_timer.data);
> }
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
> making when dealing with multi-core CPU chips at a cost of slightly
> increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> - bool "Fine granularity task level IRQ time accounting"
> - default n
> - ---help---
> - Select this option to enable fine granularity task irq time
> - accounting. This is done by reading a timestamp on each
> - transitions between softirq and hardirq state, so there can be a
> - small performance impact.
> -
> - If in doubt, say N here.
> -
> source "kernel/Kconfig.preempt"
>
> config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..70510a3 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
>
> __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
> static int __init tsc_setup(char *str)
> {
> if (!strcmp(str, "reliable"))
> tsc_clocksource_reliable = 1;
> - if (!strncmp(str, "noirqtime", 9))
> + if (!strncmp(str, "noirqtime", 9)) {
> + printk(KERN_WARNING "tsc: tsc=noirqtime is "
> + "obsolete, use noirqtime instead\n");
> no_sched_irq_time = 1;
> + }
> return 1;
> }
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..b3575b5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
> * The reason for this explicit opt-in is not to have perf penalty with
> * slow sched_clocks.
> */
> +extern int no_sched_irq_time;
> extern void enable_sched_clock_irqtime(void);
> extern void disable_sched_clock_irqtime(void);
> #else
> +#define no_sched_irq_time 1
> static inline void enable_sched_clock_irqtime(void) {}
> static inline void disable_sched_clock_irqtime(void) {}
> #endif
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..48be210 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
> application, you can say N to avoid the very slight overhead
> this adds.
>
> +config IRQ_TIME_ACCOUNTING
> + bool "Fine granularity task level IRQ time accounting"
> + depends on (X86 || (ARM && HAVE_SCHED_CLOCK))
> + default n
> + ---help---
> + Select this option to enable fine granularity task irq time
> + accounting. This is done by reading a timestamp on each
> + transitions between softirq and hardirq state, so there can be a
> + small performance impact.
> +
> + If in doubt, say N here.
> +
> config TIMER_STATS
> bool "Collect kernel timers statistics"
> depends on DEBUG_KERNEL && PROC_FS
> diff --git a/lib/Makefile b/lib/Makefile
> index 18515f0..44d67d4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
> obj-$(CONFIG_DEBUG_LIST) += list_debug.o
> obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
>
> +obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
> +
> ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
> lib-y += dec_and_lock.o
> endif
> diff --git a/lib/irqtime.c b/lib/irqtime.c
> new file mode 100644
> index 0000000..10d440d
> --- /dev/null
> +++ b/lib/irqtime.c
> @@ -0,0 +1,12 @@
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +
> +int no_sched_irq_time;
> +
> +static int __init irqtime_setup(char *str)
> +{
> + no_sched_irq_time = 1;
> + return 1;
> +}
> +
> +__setup("noirqtime", irqtime_setup);
> --
> 1.7.7.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Only stand for myself
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-08 16:08 Dmitry Antipov
2012-02-08 22:50 ` Venki Pallipadi
0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 16:08 UTC (permalink / raw)
To: Russell King
Cc: Rusty Russell, Ingo Molnar, Venki Pallipadi, linux-kernel, x86,
linux-arm-kernel, linaro-dev, patches, Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
arch/arm/kernel/sched_clock.c | 3 +++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 7 ++++---
include/linux/sched.h | 2 ++
lib/Kconfig.debug | 12 ++++++++++++
lib/Makefile | 2 ++
lib/irqtime.c | 12 ++++++++++++
7 files changed, 35 insertions(+), 14 deletions(-)
create mode 100644 lib/irqtime.c
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..961bd2d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,9 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
*/
cd.epoch_ns = 0;
+ if (!no_sched_irq_time)
+ enable_sched_clock_irqtime();
+
pr_debug("Registered %pF as sched_clock source\n", read);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..70510a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
no_sched_irq_time = 1;
+ }
return 1;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
* The reason for this explicit opt-in is not to have perf penalty with
* slow sched_clocks.
*/
+extern int no_sched_irq_time;
extern void enable_sched_clock_irqtime(void);
extern void disable_sched_clock_irqtime(void);
#else
+#define no_sched_irq_time 1
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on X86 || ARM
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
lib-y += dec_and_lock.o
endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+ no_sched_irq_time = 1;
+ return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-08 16:08 Dmitry Antipov
@ 2012-02-08 22:50 ` Venki Pallipadi
0 siblings, 0 replies; 19+ messages in thread
From: Venki Pallipadi @ 2012-02-08 22:50 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Russell King, Rusty Russell, Ingo Molnar, linux-kernel, x86,
linux-arm-kernel, linaro-dev, patches
On Wed, Feb 8, 2012 at 8:08 AM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, X86-specific option
> "tsc=noirqtime" is preserved, but issues a warning.
>
> Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
> arch/arm/kernel/sched_clock.c | 3 +++
> arch/x86/Kconfig | 11 -----------
> arch/x86/kernel/tsc.c | 7 ++++---
> include/linux/sched.h | 2 ++
> lib/Kconfig.debug | 12 ++++++++++++
> lib/Makefile | 2 ++
> lib/irqtime.c | 12 ++++++++++++
> 7 files changed, 35 insertions(+), 14 deletions(-)
> create mode 100644 lib/irqtime.c
>
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..961bd2d 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -144,6 +144,9 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
> */
> cd.epoch_ns = 0;
>
> + if (!no_sched_irq_time)
> + enable_sched_clock_irqtime();
> +
> pr_debug("Registered %pF as sched_clock source\n", read);
> }
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
> making when dealing with multi-core CPU chips at a cost of slightly
> increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> - bool "Fine granularity task level IRQ time accounting"
> - default n
> - ---help---
> - Select this option to enable fine granularity task irq time
> - accounting. This is done by reading a timestamp on each
> - transitions between softirq and hardirq state, so there can be a
> - small performance impact.
> -
> - If in doubt, say N here.
> -
> source "kernel/Kconfig.preempt"
>
> config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..70510a3 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
>
> __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
> static int __init tsc_setup(char *str)
> {
> if (!strcmp(str, "reliable"))
> tsc_clocksource_reliable = 1;
> - if (!strncmp(str, "noirqtime", 9))
> + if (!strncmp(str, "noirqtime", 9)) {
> + printk(KERN_WARNING "tsc: tsc=noirqtime is "
> + "obsolete, use noirqtime instead\n");
> no_sched_irq_time = 1;
> + }
> return 1;
> }
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..b3575b5 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
> * The reason for this explicit opt-in is not to have perf penalty with
> * slow sched_clocks.
> */
> +extern int no_sched_irq_time;
> extern void enable_sched_clock_irqtime(void);
> extern void disable_sched_clock_irqtime(void);
> #else
> +#define no_sched_irq_time 1
> static inline void enable_sched_clock_irqtime(void) {}
> static inline void disable_sched_clock_irqtime(void) {}
> #endif
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..236e814 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
> application, you can say N to avoid the very slight overhead
> this adds.
>
> +config IRQ_TIME_ACCOUNTING
> + bool "Fine granularity task level IRQ time accounting"
> + depends on X86 || ARM
> + default n
> + ---help---
> + Select this option to enable fine granularity task irq time
> + accounting. This is done by reading a timestamp on each
> + transitions between softirq and hardirq state, so there can be a
> + small performance impact.
> +
> + If in doubt, say N here.
> +
> config TIMER_STATS
> bool "Collect kernel timers statistics"
> depends on DEBUG_KERNEL && PROC_FS
> diff --git a/lib/Makefile b/lib/Makefile
> index 18515f0..44d67d4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
> obj-$(CONFIG_DEBUG_LIST) += list_debug.o
> obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
>
> +obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
> +
> ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
> lib-y += dec_and_lock.o
> endif
> diff --git a/lib/irqtime.c b/lib/irqtime.c
> new file mode 100644
> index 0000000..10d440d
> --- /dev/null
> +++ b/lib/irqtime.c
> @@ -0,0 +1,12 @@
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +
> +int no_sched_irq_time;
> +
> +static int __init irqtime_setup(char *str)
> +{
> + no_sched_irq_time = 1;
> + return 1;
> +}
> +
> +__setup("noirqtime", irqtime_setup);
> --
One more nit-pick. We need corresponding change in
Documentation/kernel-parameters.txt to deprecate tsc=irqtime and add
this new param.
> 1.7.7.6
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-08 23:58 Dmitry Antipov
0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-08 23:58 UTC (permalink / raw)
To: Venki Pallipadi
Cc: Russell King, Rusty Russell, Ingo Molnar, linux-kernel, x86,
linux-arm-kernel, linaro-dev, patches, Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/kernel-parameters.txt | 9 +++++----
arch/arm/kernel/sched_clock.c | 3 +++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 7 ++++---
include/linux/sched.h | 2 ++
lib/Kconfig.debug | 12 ++++++++++++
lib/Makefile | 2 ++
lib/irqtime.c | 12 ++++++++++++
8 files changed, 40 insertions(+), 18 deletions(-)
create mode 100644 lib/irqtime.c
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..b64a13f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1719,6 +1719,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noautogroup Disable scheduler automatic task group creation.
+ noirqtime [X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
+ should give a negligible performance improvement.
+
nobats [PPC] Do not use BATs for mapping kernel lowmem
on "Classic" PPC cores.
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
as the stability checks done at bootup. Used to enable
high-resolution timer mode on older hardware, and in
virtualized environment.
- [x86] noirqtime: Do not use TSC to do irq accounting.
- Used to run time disable IRQ_TIME_ACCOUNTING on any
- platforms where RDTSC is slow and this accounting
- can add overhead.
+ [x86] noirqtime: obsoleted by "noirqtime" generic option,
+ see it's documentation for details.
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..961bd2d 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,9 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
*/
cd.epoch_ns = 0;
+ if (!no_sched_irq_time)
+ enable_sched_clock_irqtime();
+
pr_debug("Registered %pF as sched_clock source\n", read);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..70510a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
no_sched_irq_time = 1;
+ }
return 1;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..b3575b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,11 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
* The reason for this explicit opt-in is not to have perf penalty with
* slow sched_clocks.
*/
+extern int no_sched_irq_time;
extern void enable_sched_clock_irqtime(void);
extern void disable_sched_clock_irqtime(void);
#else
+#define no_sched_irq_time 1
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on X86 || ARM
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..44d67d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+obj-$(CONFIG_IRQ_TIME_ACCOUNTING) += irqtime.o
+
ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
lib-y += dec_and_lock.o
endif
diff --git a/lib/irqtime.c b/lib/irqtime.c
new file mode 100644
index 0000000..10d440d
--- /dev/null
+++ b/lib/irqtime.c
@@ -0,0 +1,12 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+int no_sched_irq_time;
+
+static int __init irqtime_setup(char *str)
+{
+ no_sched_irq_time = 1;
+ return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-09 17:25 Dmitry Antipov
2012-02-10 22:28 ` Venki Pallipadi
0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-09 17:25 UTC (permalink / raw)
To: linux-kernel
Cc: Russell King, Rusty Russell, Ingo Molnar, Yong Zhang,
Venki Pallipadi, x86, linux-arm-kernel, linaro-dev, patches,
Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.
Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Venki Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/kernel-parameters.txt | 9 +++++----
arch/arm/kernel/sched_clock.c | 2 ++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 12 ++++++------
include/linux/sched.h | 17 ++++++++++-------
kernel/sched/core.c | 20 +++++++++++---------
lib/Kconfig.debug | 12 ++++++++++++
7 files changed, 46 insertions(+), 37 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..666d20e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noirqdebug [X86-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
+ noirqtime [X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
+ should give a negligible performance improvement.
+
no_timer_check [X86,APIC] Disables the code which tests for
broken timer IRQ sources.
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
as the stability checks done at bootup. Used to enable
high-resolution timer mode on older hardware, and in
virtualized environment.
- [x86] noirqtime: Do not use TSC to do irq accounting.
- Used to run time disable IRQ_TIME_ACCOUNTING on any
- platforms where RDTSC is slow and this accounting
- can add overhead.
+ [x86] noirqtime: obsoleted by "noirqtime" generic option,
+ see it's documentation for details.
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
*/
cd.epoch_ns = 0;
+ enable_sched_clock_irqtime();
+
pr_debug("Registered %pF as sched_clock source\n", read);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
- no_sched_irq_time = 1;
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
+ disable_sched_clock_irqtime();
+ }
return 1;
}
@@ -978,8 +979,7 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
- if (!no_sched_irq_time)
- enable_sched_clock_irqtime();
+ enable_sched_clock_irqtime();
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..9b13f79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
+extern int sched_clock_irqtime;
+static inline void enable_sched_clock_irqtime(void)
+{
+ if (sched_clock_irqtime == -1)
+ sched_clock_irqtime = 1;
+}
+static inline void disable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 0;
+}
#else
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d..a7ec043 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
-void enable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 1;
-}
+/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+int sched_clock_irqtime = -1;
-void disable_sched_clock_irqtime(void)
+static int __init irqtime_setup(char *str)
{
sched_clock_irqtime = 0;
+ return 1;
}
+__setup("noirqtime", irqtime_setup);
+
#ifndef CONFIG_64BIT
static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
@@ -822,7 +824,7 @@ void account_system_vtime(struct task_struct *curr)
s64 delta;
int cpu;
- if (!sched_clock_irqtime)
+ if (sched_clock_irqtime < 1)
return;
local_irq_save(flags);
@@ -2853,7 +2855,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_process_tick(p, user_tick, rq);
return;
}
@@ -2887,7 +2889,7 @@ void account_steal_ticks(unsigned long ticks)
void account_idle_ticks(unsigned long ticks)
{
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_idle_ticks(ticks);
return;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on X86 || ARM
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-09 17:25 Dmitry Antipov
@ 2012-02-10 22:28 ` Venki Pallipadi
0 siblings, 0 replies; 19+ messages in thread
From: Venki Pallipadi @ 2012-02-10 22:28 UTC (permalink / raw)
To: Dmitry Antipov
Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
Yong Zhang, x86, linux-arm-kernel, linaro-dev, patches
> + noirqtime [X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
> + should give a negligible performance improvement.
Can you reword the above "negligible performance improvement" above to
something
> + noirqtime [X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
> + eliminates the timestamping on irq/softirq entry/exit.
Otherwise,
Acked-by: Venkatesh Pallipadi <venki@google.com>
On Thu, Feb 9, 2012 at 9:25 AM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, X86-specific option
> "tsc=noirqtime" is preserved, but issues a warning.
>
> Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
> Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
> Suggested-by: Venki Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
> ---
> Documentation/kernel-parameters.txt | 9 +++++----
> arch/arm/kernel/sched_clock.c | 2 ++
> arch/x86/Kconfig | 11 -----------
> arch/x86/kernel/tsc.c | 12 ++++++------
> include/linux/sched.h | 17 ++++++++++-------
> kernel/sched/core.c | 20 +++++++++++---------
> lib/Kconfig.debug | 12 ++++++++++++
> 7 files changed, 46 insertions(+), 37 deletions(-)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 033d4e6..666d20e 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> noirqdebug [X86-32] Disables the code which attempts to detect and
> disable unhandled interrupt sources.
>
> + noirqtime [X86,ARM] Used to run time disable IRQ_TIME_ACCOUNTING,
> + should give a negligible performance improvement.
> +
> no_timer_check [X86,APIC] Disables the code which tests for
> broken timer IRQ sources.
>
> @@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> as the stability checks done at bootup. Used to enable
> high-resolution timer mode on older hardware, and in
> virtualized environment.
> - [x86] noirqtime: Do not use TSC to do irq accounting.
> - Used to run time disable IRQ_TIME_ACCOUNTING on any
> - platforms where RDTSC is slow and this accounting
> - can add overhead.
> + [x86] noirqtime: obsoleted by "noirqtime" generic option,
> + see it's documentation for details.
>
> turbografx.map[2|3]= [HW,JOY]
> TurboGraFX parallel port interface
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 5416c7c..30b5f89 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
> */
> cd.epoch_ns = 0;
>
> + enable_sched_clock_irqtime();
> +
> pr_debug("Registered %pF as sched_clock source\n", read);
> }
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5bed94e..4759676 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -805,17 +805,6 @@ config SCHED_MC
> making when dealing with multi-core CPU chips at a cost of slightly
> increased overhead in some places. If unsure say N here.
>
> -config IRQ_TIME_ACCOUNTING
> - bool "Fine granularity task level IRQ time accounting"
> - default n
> - ---help---
> - Select this option to enable fine granularity task irq time
> - accounting. This is done by reading a timestamp on each
> - transitions between softirq and hardirq state, so there can be a
> - small performance impact.
> -
> - If in doubt, say N here.
> -
> source "kernel/Kconfig.preempt"
>
> config X86_UP_APIC
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index a62c201..f1b2b63 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
>
> __setup("notsc", notsc_setup);
>
> -static int no_sched_irq_time;
> -
> static int __init tsc_setup(char *str)
> {
> if (!strcmp(str, "reliable"))
> tsc_clocksource_reliable = 1;
> - if (!strncmp(str, "noirqtime", 9))
> - no_sched_irq_time = 1;
> + if (!strncmp(str, "noirqtime", 9)) {
> + printk(KERN_WARNING "tsc: tsc=noirqtime is "
> + "obsolete, use noirqtime instead\n");
> + disable_sched_clock_irqtime();
> + }
> return 1;
> }
>
> @@ -978,8 +979,7 @@ void __init tsc_init(void)
> /* now allow native_sched_clock() to use rdtsc */
> tsc_disabled = 0;
>
> - if (!no_sched_irq_time)
> - enable_sched_clock_irqtime();
> + enable_sched_clock_irqtime();
>
> lpj = ((u64)tsc_khz * 1000);
> do_div(lpj, HZ);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..9b13f79 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
> #endif
>
> #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> -/*
> - * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
> - * The reason for this explicit opt-in is not to have perf penalty with
> - * slow sched_clocks.
> - */
> -extern void enable_sched_clock_irqtime(void);
> -extern void disable_sched_clock_irqtime(void);
> +extern int sched_clock_irqtime;
> +static inline void enable_sched_clock_irqtime(void)
> +{
> + if (sched_clock_irqtime == -1)
> + sched_clock_irqtime = 1;
> +}
> +static inline void disable_sched_clock_irqtime(void)
> +{
> + sched_clock_irqtime = 0;
> +}
> #else
> static inline void enable_sched_clock_irqtime(void) {}
> static inline void disable_sched_clock_irqtime(void) {}
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 5255c9d..a7ec043 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
> static DEFINE_PER_CPU(u64, cpu_softirq_time);
>
> static DEFINE_PER_CPU(u64, irq_start_time);
> -static int sched_clock_irqtime;
>
> -void enable_sched_clock_irqtime(void)
> -{
> - sched_clock_irqtime = 1;
> -}
> +/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
> + * or after unstable clock was detected, 1 if enabled and active.
> + */
> +int sched_clock_irqtime = -1;
>
> -void disable_sched_clock_irqtime(void)
> +static int __init irqtime_setup(char *str)
> {
> sched_clock_irqtime = 0;
> + return 1;
> }
>
> +__setup("noirqtime", irqtime_setup);
> +
> #ifndef CONFIG_64BIT
> static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
>
> @@ -822,7 +824,7 @@ void account_system_vtime(struct task_struct *curr)
> s64 delta;
> int cpu;
>
> - if (!sched_clock_irqtime)
> + if (sched_clock_irqtime < 1)
> return;
>
> local_irq_save(flags);
> @@ -2853,7 +2855,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
> cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
> struct rq *rq = this_rq();
>
> - if (sched_clock_irqtime) {
> + if (sched_clock_irqtime > 0) {
> irqtime_account_process_tick(p, user_tick, rq);
> return;
> }
> @@ -2887,7 +2889,7 @@ void account_steal_ticks(unsigned long ticks)
> void account_idle_ticks(unsigned long ticks)
> {
>
> - if (sched_clock_irqtime) {
> + if (sched_clock_irqtime > 0) {
> irqtime_account_idle_ticks(ticks);
> return;
> }
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8745ac7..236e814 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -299,6 +299,18 @@ config SCHEDSTATS
> application, you can say N to avoid the very slight overhead
> this adds.
>
> +config IRQ_TIME_ACCOUNTING
> + bool "Fine granularity task level IRQ time accounting"
> + depends on X86 || ARM
> + default n
> + ---help---
> + Select this option to enable fine granularity task irq time
> + accounting. This is done by reading a timestamp on each
> + transitions between softirq and hardirq state, so there can be a
> + small performance impact.
> +
> + If in doubt, say N here.
> +
> config TIMER_STATS
> bool "Collect kernel timers statistics"
> depends on DEBUG_KERNEL && PROC_FS
> --
> 1.7.7.6
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-11 0:02 Dmitry Antipov
2012-02-17 11:22 ` Ingo Molnar
0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-11 0:02 UTC (permalink / raw)
To: linux-kernel
Cc: Russell King, Rusty Russell, Ingo Molnar, Yong Zhang,
Venki Pallipadi, x86, linux-arm-kernel, linaro-dev, patches,
Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.
Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/kernel-parameters.txt | 9 +++++----
arch/arm/kernel/sched_clock.c | 2 ++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 12 ++++++------
include/linux/sched.h | 17 ++++++++++-------
kernel/sched/core.c | 20 +++++++++++---------
lib/Kconfig.debug | 12 ++++++++++++
7 files changed, 46 insertions(+), 37 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..a5da255 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noirqdebug [X86-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
+ noirqtime [X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
+ eliminates the timestamping on irq/softirq entry/exit.
+
no_timer_check [X86,APIC] Disables the code which tests for
broken timer IRQ sources.
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
as the stability checks done at bootup. Used to enable
high-resolution timer mode on older hardware, and in
virtualized environment.
- [x86] noirqtime: Do not use TSC to do irq accounting.
- Used to run time disable IRQ_TIME_ACCOUNTING on any
- platforms where RDTSC is slow and this accounting
- can add overhead.
+ [x86] noirqtime: obsoleted by "noirqtime" generic option,
+ see it's documentation for details.
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
*/
cd.epoch_ns = 0;
+ enable_sched_clock_irqtime();
+
pr_debug("Registered %pF as sched_clock source\n", read);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
- no_sched_irq_time = 1;
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
+ disable_sched_clock_irqtime();
+ }
return 1;
}
@@ -978,8 +979,7 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
- if (!no_sched_irq_time)
- enable_sched_clock_irqtime();
+ enable_sched_clock_irqtime();
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..9b13f79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
+extern int sched_clock_irqtime;
+static inline void enable_sched_clock_irqtime(void)
+{
+ if (sched_clock_irqtime == -1)
+ sched_clock_irqtime = 1;
+}
+static inline void disable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 0;
+}
#else
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d..a7ec043 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
-void enable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 1;
-}
+/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+int sched_clock_irqtime = -1;
-void disable_sched_clock_irqtime(void)
+static int __init irqtime_setup(char *str)
{
sched_clock_irqtime = 0;
+ return 1;
}
+__setup("noirqtime", irqtime_setup);
+
#ifndef CONFIG_64BIT
static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
@@ -822,7 +824,7 @@ void account_system_vtime(struct task_struct *curr)
s64 delta;
int cpu;
- if (!sched_clock_irqtime)
+ if (sched_clock_irqtime < 1)
return;
local_irq_save(flags);
@@ -2853,7 +2855,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_process_tick(p, user_tick, rq);
return;
}
@@ -2887,7 +2889,7 @@ void account_steal_ticks(unsigned long ticks)
void account_idle_ticks(unsigned long ticks)
{
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_idle_ticks(ticks);
return;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on X86 || ARM
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-11 0:02 Dmitry Antipov
@ 2012-02-17 11:22 ` Ingo Molnar
0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2012-02-17 11:22 UTC (permalink / raw)
To: Dmitry Antipov
Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
patches
* Dmitry Antipov <dmitry.antipov@linaro.org> wrote:
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -757,18 +757,20 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
> static DEFINE_PER_CPU(u64, cpu_softirq_time);
>
> static DEFINE_PER_CPU(u64, irq_start_time);
> -static int sched_clock_irqtime;
>
> -void enable_sched_clock_irqtime(void)
> -{
> - sched_clock_irqtime = 1;
> -}
> +/* -1 if not initialized, 0 if disabled with "noirqtime" kernel option
> + * or after unstable clock was detected, 1 if enabled and active.
> + */
Please use the customary (multi-line) comment style:
/*
* Comment .....
* ...... goes here.
*/
specified in Documentation/CodingStyle.
> +int sched_clock_irqtime = -1;
should be turned into __read_mostly I guess.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-20 6:04 Dmitry Antipov
2012-02-27 10:12 ` Peter Zijlstra
0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-20 6:04 UTC (permalink / raw)
To: linux-kernel
Cc: Russell King, Rusty Russell, Ingo Molnar, Yong Zhang,
Venki Pallipadi, x86, linux-arm-kernel, linaro-dev, patches,
Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.
Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/kernel-parameters.txt | 9 +++++----
arch/arm/kernel/sched_clock.c | 2 ++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 12 ++++++------
include/linux/sched.h | 17 ++++++++++-------
kernel/sched/core.c | 21 ++++++++++++---------
lib/Kconfig.debug | 12 ++++++++++++
7 files changed, 47 insertions(+), 37 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..a5da255 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noirqdebug [X86-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
+ noirqtime [X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
+ eliminates the timestamping on irq/softirq entry/exit.
+
no_timer_check [X86,APIC] Disables the code which tests for
broken timer IRQ sources.
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
as the stability checks done at bootup. Used to enable
high-resolution timer mode on older hardware, and in
virtualized environment.
- [x86] noirqtime: Do not use TSC to do irq accounting.
- Used to run time disable IRQ_TIME_ACCOUNTING on any
- platforms where RDTSC is slow and this accounting
- can add overhead.
+ [x86] noirqtime: obsoleted by "noirqtime" generic option,
+ see it's documentation for details.
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
*/
cd.epoch_ns = 0;
+ enable_sched_clock_irqtime();
+
pr_debug("Registered %pF as sched_clock source\n", read);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
- no_sched_irq_time = 1;
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
+ disable_sched_clock_irqtime();
+ }
return 1;
}
@@ -978,8 +979,7 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
- if (!no_sched_irq_time)
- enable_sched_clock_irqtime();
+ enable_sched_clock_irqtime();
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..9b13f79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
+extern int sched_clock_irqtime;
+static inline void enable_sched_clock_irqtime(void)
+{
+ if (sched_clock_irqtime == -1)
+ sched_clock_irqtime = 1;
+}
+static inline void disable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 0;
+}
#else
static inline void enable_sched_clock_irqtime(void) {}
static inline void disable_sched_clock_irqtime(void) {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d..4e7a197 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,18 +757,21 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
-void enable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 1;
-}
+/*
+ * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+__read_mostly int sched_clock_irqtime = -1;
-void disable_sched_clock_irqtime(void)
+static int __init irqtime_setup(char *str)
{
sched_clock_irqtime = 0;
+ return 1;
}
+__setup("noirqtime", irqtime_setup);
+
#ifndef CONFIG_64BIT
static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
@@ -822,7 +825,7 @@ void account_system_vtime(struct task_struct *curr)
s64 delta;
int cpu;
- if (!sched_clock_irqtime)
+ if (sched_clock_irqtime < 1)
return;
local_irq_save(flags);
@@ -2853,7 +2856,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_process_tick(p, user_tick, rq);
return;
}
@@ -2887,7 +2890,7 @@ void account_steal_ticks(unsigned long ticks)
void account_idle_ticks(unsigned long ticks)
{
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_idle_ticks(ticks);
return;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on X86 || ARM
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-20 6:04 Dmitry Antipov
@ 2012-02-27 10:12 ` Peter Zijlstra
2012-02-28 6:19 ` Dmitry Antipov
0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2012-02-27 10:12 UTC (permalink / raw)
To: Dmitry Antipov
Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
patches, Venkatesh Pallipadi
On Mon, 2012-02-20 at 10:04 +0400, Dmitry Antipov wrote:
> Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
> ARM, move "noirqtime=" option to common debugging code.
> For a bit of backward compatibility, X86-specific option
> "tsc=noirqtime" is preserved, but issues a warning.
>
> Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
> Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
> Suggested-by: Ingo Molnar <mingo@elte.hu>
> Acked-by: Venkatesh Pallipadi <venki@google.com>
> Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
Thanks for Cc'ing me (maintainer) and Venki (original author of the
stuff) :-)
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7d379a6..9b13f79 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1961,13 +1961,16 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
> #endif
>
> #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> -/*
> - * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
> - * The reason for this explicit opt-in is not to have perf penalty with
> - * slow sched_clocks.
> - */
> -extern void enable_sched_clock_irqtime(void);
> -extern void disable_sched_clock_irqtime(void);
> +extern int sched_clock_irqtime;
> +static inline void enable_sched_clock_irqtime(void)
> +{
> + if (sched_clock_irqtime == -1)
> + sched_clock_irqtime = 1;
> +}
> +static inline void disable_sched_clock_irqtime(void)
> +{
> + sched_clock_irqtime = 0;
> +}
> #else
> static inline void enable_sched_clock_irqtime(void) {}
> static inline void disable_sched_clock_irqtime(void) {}
Please keep them out-of-line, its not a fast path and it avoids having
to expose the state variable.
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 5255c9d..4e7a197 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -757,18 +757,21 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
> static DEFINE_PER_CPU(u64, cpu_softirq_time);
>
> static DEFINE_PER_CPU(u64, irq_start_time);
> -static int sched_clock_irqtime;
>
> -void enable_sched_clock_irqtime(void)
> -{
> - sched_clock_irqtime = 1;
> -}
> +/*
> + * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
> + * or after unstable clock was detected, 1 if enabled and active.
> + */
You forgot to explain what you need the tri-state for.
> +__read_mostly int sched_clock_irqtime = -1;
>
> -void disable_sched_clock_irqtime(void)
> +static int __init irqtime_setup(char *str)
> {
> sched_clock_irqtime = 0;
> + return 1;
> }
>
> +__setup("noirqtime", irqtime_setup);
> +
> #ifndef CONFIG_64BIT
> static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
Other than that no real objections.. I guess.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
2012-02-27 10:12 ` Peter Zijlstra
@ 2012-02-28 6:19 ` Dmitry Antipov
0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-28 6:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Russell King, Rusty Russell, Ingo Molnar,
Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
patches
On 02/27/2012 02:12 PM, Peter Zijlstra wrote:
>> -extern void enable_sched_clock_irqtime(void);
>> -extern void disable_sched_clock_irqtime(void);
>> +extern int sched_clock_irqtime;
>> +static inline void enable_sched_clock_irqtime(void)
>> +{
>> + if (sched_clock_irqtime == -1)
>> + sched_clock_irqtime = 1;
>> +}
>> +static inline void disable_sched_clock_irqtime(void)
>> +{
>> + sched_clock_irqtime = 0;
>> +}
>> #else
>> static inline void enable_sched_clock_irqtime(void) {}
>> static inline void disable_sched_clock_irqtime(void) {}
>
> Please keep them out-of-line, its not a fast path and it avoids having
> to expose the state variable.
OK
>> +/*
>> + * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
>> + * or after unstable clock was detected, 1 if enabled and active.
>> + */
>
> You forgot to explain what you need the tri-state for.
>
>> +__read_mostly int sched_clock_irqtime = -1;
The comment above should be a sufficient explanation, isn't it?
It's a tri-state just because it "merges" two variables: internal state (enabled/disabled)
and the value passed by "noirqtime" option (turn it on, default/turn it off). It can be
enabled only if it was not turned off explicitly, i.e. -1 => 1 transition is possible,
but 0 -> 1 is not. The same rule applies to a situation when an unstable clock is detected.
Dmitry
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM
@ 2012-02-28 6:29 Dmitry Antipov
0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-02-28 6:29 UTC (permalink / raw)
To: linux-kernel
Cc: Peter Zijlstra, Russell King, Rusty Russell, Ingo Molnar,
Yong Zhang, Venki Pallipadi, x86, linux-arm-kernel, linaro-dev,
patches, Dmitry Antipov
Generalize CONFIG_IRQ_TIME_ACCOUNTING between X86 and
ARM, move "noirqtime=" option to common debugging code.
For a bit of backward compatibility, X86-specific option
"tsc=noirqtime" is preserved, but issues a warning.
Suggested-by: Yong Zhang <yong.zhang0@gmail.com>
Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/kernel-parameters.txt | 9 +++++----
arch/arm/kernel/sched_clock.c | 2 ++
arch/x86/Kconfig | 11 -----------
arch/x86/kernel/tsc.c | 12 ++++++------
include/linux/sched.h | 6 +-----
kernel/sched/core.c | 24 +++++++++++++++++++-----
lib/Kconfig.debug | 12 ++++++++++++
7 files changed, 45 insertions(+), 31 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 033d4e6..a5da255 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1790,6 +1790,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noirqdebug [X86-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
+ noirqtime [X86,ARM] Run time disables IRQ_TIME_ACCOUNTING and
+ eliminates the timestamping on irq/softirq entry/exit.
+
no_timer_check [X86,APIC] Disables the code which tests for
broken timer IRQ sources.
@@ -2636,10 +2639,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
as the stability checks done at bootup. Used to enable
high-resolution timer mode on older hardware, and in
virtualized environment.
- [x86] noirqtime: Do not use TSC to do irq accounting.
- Used to run time disable IRQ_TIME_ACCOUNTING on any
- platforms where RDTSC is slow and this accounting
- can add overhead.
+ [x86] noirqtime: obsoleted by "noirqtime" generic option,
+ see it's documentation for details.
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
index 5416c7c..30b5f89 100644
--- a/arch/arm/kernel/sched_clock.c
+++ b/arch/arm/kernel/sched_clock.c
@@ -144,6 +144,8 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
*/
cd.epoch_ns = 0;
+ enable_sched_clock_irqtime();
+
pr_debug("Registered %pF as sched_clock source\n", read);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e..4759676 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -805,17 +805,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..f1b2b63 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -103,14 +103,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
-static int no_sched_irq_time;
-
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
- if (!strncmp(str, "noirqtime", 9))
- no_sched_irq_time = 1;
+ if (!strncmp(str, "noirqtime", 9)) {
+ printk(KERN_WARNING "tsc: tsc=noirqtime is "
+ "obsolete, use noirqtime instead\n");
+ disable_sched_clock_irqtime();
+ }
return 1;
}
@@ -978,8 +979,7 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
- if (!no_sched_irq_time)
- enable_sched_clock_irqtime();
+ enable_sched_clock_irqtime();
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..ea4019c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1961,11 +1961,7 @@ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
+extern int sched_clock_irqtime;
extern void enable_sched_clock_irqtime(void);
extern void disable_sched_clock_irqtime(void);
#else
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b342f57..4693509 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -757,11 +757,17 @@ static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
+
+/*
+ * -1 if not initialized, 0 if disabled with "noirqtime" kernel option
+ * or after unstable clock was detected, 1 if enabled and active.
+ */
+__read_mostly int sched_clock_irqtime = -1;
void enable_sched_clock_irqtime(void)
{
- sched_clock_irqtime = 1;
+ if (sched_clock_irqtime == -1)
+ sched_clock_irqtime = 1;
}
void disable_sched_clock_irqtime(void)
@@ -769,6 +775,14 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0;
}
+static int __init irqtime_setup(char *str)
+{
+ sched_clock_irqtime = 0;
+ return 1;
+}
+
+__setup("noirqtime", irqtime_setup);
+
#ifndef CONFIG_64BIT
static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
@@ -822,7 +836,7 @@ void account_system_vtime(struct task_struct *curr)
s64 delta;
int cpu;
- if (!sched_clock_irqtime)
+ if (sched_clock_irqtime < 1)
return;
local_irq_save(flags);
@@ -2852,7 +2866,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_process_tick(p, user_tick, rq);
return;
}
@@ -2886,7 +2900,7 @@ void account_steal_ticks(unsigned long ticks)
void account_idle_ticks(unsigned long ticks)
{
- if (sched_clock_irqtime) {
+ if (sched_clock_irqtime > 0) {
irqtime_account_idle_ticks(ticks);
return;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7..236e814 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -299,6 +299,18 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on X86 || ARM
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
--
1.7.7.6
^ permalink raw reply related [flat|nested] 19+ messages in thread
end of thread, other threads:[~2012-02-28 6:28 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-07 18:06 [PATCH] sched: generalize CONFIG_IRQ_TIME_ACCOUNTING for X86 and ARM Dmitry Antipov
2012-02-07 22:56 ` Venki Pallipadi
2012-02-08 12:48 Dmitry Antipov
2012-02-08 13:18 ` Russell King - ARM Linux
2012-02-08 15:15 ` Dmitry Antipov
2012-02-08 15:24 ` Russell King - ARM Linux
2012-02-09 2:51 ` Yong Zhang
2012-02-09 2:48 ` Yong Zhang
2012-02-08 16:08 Dmitry Antipov
2012-02-08 22:50 ` Venki Pallipadi
2012-02-08 23:58 Dmitry Antipov
2012-02-09 17:25 Dmitry Antipov
2012-02-10 22:28 ` Venki Pallipadi
2012-02-11 0:02 Dmitry Antipov
2012-02-17 11:22 ` Ingo Molnar
2012-02-20 6:04 Dmitry Antipov
2012-02-27 10:12 ` Peter Zijlstra
2012-02-28 6:19 ` Dmitry Antipov
2012-02-28 6:29 Dmitry Antipov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).