All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
@ 2011-06-01 12:35 Deepthi Dharwar
  2011-06-17  4:24 ` Benjamin Herrenschmidt
  2011-06-17  4:24 ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 12+ messages in thread
From: Deepthi Dharwar @ 2011-06-01 12:35 UTC (permalink / raw)
  To: linuxppc-dev, linux-pm, linux-kernel

Hi,

Please find below a patch, which has perf_events added for pseries (ppc64)
platform in order to emit the trace required for perf timechart. 
It essentially enables perf timechart for pseries platfrom to analyse
power savings events like cpuidle states.

Steps to enable and disable the trace;
1) Mount  debugfs;
   mount -t debugfs none /sys/kernel/debug
2) Then, enable the event using;
    echo 1 > /sys/kernel/debug/tracing/events/power/cpu_idle/enable
3) The output of the trace can be seen in /sys/kernel/debug/tracing/trace
4) To disable the trace use;
    echo 0 > sys/kernel/debug/tracing/events/power/cpu_idle/enable

Trace .svg o/p can be viewed for pseries (ppc64) systems showing various 
cpu-idle states as a part of perf timechart tool. 
References: http://blog.fenrus.org/?p=5

Issue command 'perf timechart record' to enable tracing. 
This generates the trace and records in  perf.data file by default.
One can generate output.svg file by issuing 'perf timechart'. 

Sample o/p from the trace file: 
===============================

State 1 -> Snooze
State 2 -> Cede

# tracer: nop
#
TASK-PID    CPU#    TIMESTAMP  FUNCTION
  | |       |          |         |
<idle>-0     [000]   292.482314: cpu_idle: state=1 cpu_id=0 
						^^ Enter Snooze
<idle>-0     [001]   292.482363: cpu_idle: state=1 cpu_id=1
<idle>-0     [000]   292.492315: cpu_idle: state=4294967295 cpu_id=0
						^^ Exit Snooze 	
<idle>-0     [000]   292.492316: cpu_idle: state=2 cpu_id=0 
						^^ Enter  Cede 
<idle>-0     [001]   292.492364: cpu_idle: state=4294967295 cpu_id=1
<idle>-0     [001]   292.492364: cpu_idle: state=2 cpu_id=1	
<idle>-0     [000]   292.504198: cpu_idle: state=4294967295 cpu_id=0 
						^^Exit Cede 
<idle>-0     [000]   292.504204: cpu_idle: state=1 cpu_id=0
<idle>-0     [001]   292.504921: cpu_idle: state=4294967295 cpu_id=1
<idle>-0     [001]   292.504936: cpu_idle: state=1 cpu_id=1
<idle>-0     [000]   292.514205: cpu_idle: state=4294967295 cpu_id=0    

This patch applies on 2.6.39 and tested on a IBM POWER7 machine.  

-Deepthi

Adding perf events to trace various cpu idle states on ppc64 (pseries) platform.
Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>

pseries.h |    4 ++++
setup.c   |   14 ++++++++++++++
2 files changed, 18 insertions(+)

Index: linux-2.6.39/arch/powerpc/platforms/pseries/setup.c
===================================================================
--- linux-2.6.39.orig/arch/powerpc/platforms/pseries/setup.c	2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/arch/powerpc/platforms/pseries/setup.c	2011-06-01 07:46:00.000000000 -0400
@@ -39,6 +39,7 @@
 #include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
+#include <trace/events/power.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -582,6 +583,10 @@
 	 * while, do so.
 	 */
 	if (snooze) {
+
+		trace_power_start(POWER_CSTATE, CPU_IDLE_SNOOZE, cpu);
+		trace_cpu_idle(CPU_IDLE_SNOOZE, cpu);
+
 		start_snooze = get_tb() + snooze * tb_ticks_per_usec;
 		local_irq_enable();
 		set_thread_flag(TIF_POLLING_NRFLAG);
@@ -602,9 +607,19 @@
 			goto out;
 	}
 
+	trace_power_end(cpu);
+	trace_cpu_idle(PWR_EVENT_EXIT, cpu);
+
+	trace_power_start(POWER_CSTATE, CPU_IDLE_CEDE, cpu);
+	trace_cpu_idle(CPU_IDLE_CEDE, cpu);
+
 	cede_processor();
 
 out:
+
+	trace_power_end(cpu);
+	trace_cpu_idle(PWR_EVENT_EXIT, cpu);
+
 	HMT_medium();
 	out_purr = mfspr(SPRN_PURR);
 	get_lppaca()->wait_state_cycles += out_purr - in_purr;
Index: linux-2.6.39/arch/powerpc/platforms/pseries/pseries.h
===================================================================
--- linux-2.6.39.orig/arch/powerpc/platforms/pseries/pseries.h	2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/arch/powerpc/platforms/pseries/pseries.h	2011-06-01 07:53:24.000000000 -0400
@@ -12,6 +12,9 @@
 
 #include <linux/interrupt.h>
 
+#define CPU_IDLE_SNOOZE 1
+#define CPU_IDLE_CEDE	 2
+
 struct device_node;
 
 extern void request_event_sources_irqs(struct device_node *np,
@@ -56,4 +59,5 @@
 extern int dlpar_attach_node(struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 
+
 #endif /* _PSERIES_PSERIES_H */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-01 12:35 [PATCH] perf_events: Enable idle state tracing for pseries (ppc64) Deepthi Dharwar
@ 2011-06-17  4:24 ` Benjamin Herrenschmidt
  2011-06-20 17:18   ` deepthi
  2011-06-20 17:18   ` deepthi
  2011-06-17  4:24 ` Benjamin Herrenschmidt
  1 sibling, 2 replies; 12+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-17  4:24 UTC (permalink / raw)
  To: deepthi; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
> Hi,
> 
> Please find below a patch, which has perf_events added for pseries (ppc64)
> platform in order to emit the trace required for perf timechart. 
> It essentially enables perf timechart for pseries platfrom to analyse
> power savings events like cpuidle states.

Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
not shared processor. Any reason ?

Also I don't really know that tracing stuff but what's the point of
having start/end _and trace_cpu_idle if you're going to always start &
end around a single occurence of trace_cpu_idle ?

Wouldn't there be a way to start/end and then trace the snooze and
subsequent cede within the same start/end section or that makes no
sense ?

Also would there be any interest in doing the tracing more generically
in idle.c ?

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-01 12:35 [PATCH] perf_events: Enable idle state tracing for pseries (ppc64) Deepthi Dharwar
  2011-06-17  4:24 ` Benjamin Herrenschmidt
@ 2011-06-17  4:24 ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 12+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-17  4:24 UTC (permalink / raw)
  To: deepthi; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
> Hi,
> 
> Please find below a patch, which has perf_events added for pseries (ppc64)
> platform in order to emit the trace required for perf timechart. 
> It essentially enables perf timechart for pseries platfrom to analyse
> power savings events like cpuidle states.

Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
not shared processor. Any reason ?

Also I don't really know that tracing stuff but what's the point of
having start/end _and trace_cpu_idle if you're going to always start &
end around a single occurence of trace_cpu_idle ?

Wouldn't there be a way to start/end and then trace the snooze and
subsequent cede within the same start/end section or that makes no
sense ?

Also would there be any interest in doing the tracing more generically
in idle.c ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-17  4:24 ` Benjamin Herrenschmidt
@ 2011-06-20 17:18   ` deepthi
  2011-06-20 21:42     ` Benjamin Herrenschmidt
  2011-06-20 21:42     ` Benjamin Herrenschmidt
  2011-06-20 17:18   ` deepthi
  1 sibling, 2 replies; 12+ messages in thread
From: deepthi @ 2011-06-20 17:18 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
> On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
>> Hi,
>>
>> Please find below a patch, which has perf_events added for pseries (ppc64)
>> platform in order to emit the trace required for perf timechart. 
>> It essentially enables perf timechart for pseries platfrom to analyse
>> power savings events like cpuidle states.
> 
> Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
> not shared processor. Any reason ?
> 
Yes, the traces were added only to dedicated CPU idle sleep and not for 
shared processor. This was added only for RFC purpose, and looking for 
comments from trace implementation point of view. This can be
easily extended to the latter too.

> Also I don't really know that tracing stuff but what's the point of
> having start/end _and trace_cpu_idle if you're going to always start &
> end around a single occurence of trace_cpu_idle ?
> 
power_start/end are the APIs that were used initially
and they are going to be deprecated in the upcoming kernel releases.
trace_cpu_idle call is going to replace power start/end routines. 
To maintain backward compatibility and uniformity, both the routines 
have been used.
(ref:https://lkml.org/lkml/2010/11/14/60)

> Wouldn't there be a way to start/end and then trace the snooze and
> subsequent cede within the same start/end section or that makes no
> sense ?
> 
We wanted to find the residency time of both Snooze as well as cede 
separately. Knowing this will help us tweak our cpuidle code. So, both 
have been captured separately.

> Also would there be any interest in doing the tracing more generically
> in idle.c ?
> 
Yes, this tracing is already implemented for Intel platform. This would
be a part of cpuidle framework. Going further, once the power cpuidle 
framework is ported and ready, we will extend this trace there as well.
(ref:https://lkml.org/lkml/2011/6/7/375)

> Cheers,
> Ben.
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

Regards,
Deepthi


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-17  4:24 ` Benjamin Herrenschmidt
  2011-06-20 17:18   ` deepthi
@ 2011-06-20 17:18   ` deepthi
  1 sibling, 0 replies; 12+ messages in thread
From: deepthi @ 2011-06-20 17:18 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
> On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
>> Hi,
>>
>> Please find below a patch, which has perf_events added for pseries (ppc64)
>> platform in order to emit the trace required for perf timechart. 
>> It essentially enables perf timechart for pseries platfrom to analyse
>> power savings events like cpuidle states.
> 
> Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
> not shared processor. Any reason ?
> 
Yes, the traces were added only to dedicated CPU idle sleep and not for 
shared processor. This was added only for RFC purpose, and looking for 
comments from trace implementation point of view. This can be
easily extended to the latter too.

> Also I don't really know that tracing stuff but what's the point of
> having start/end _and trace_cpu_idle if you're going to always start &
> end around a single occurence of trace_cpu_idle ?
> 
power_start/end are the APIs that were used initially
and they are going to be deprecated in the upcoming kernel releases.
trace_cpu_idle call is going to replace power start/end routines. 
To maintain backward compatibility and uniformity, both the routines 
have been used.
(ref:https://lkml.org/lkml/2010/11/14/60)

> Wouldn't there be a way to start/end and then trace the snooze and
> subsequent cede within the same start/end section or that makes no
> sense ?
> 
We wanted to find the residency time of both Snooze as well as cede 
separately. Knowing this will help us tweak our cpuidle code. So, both 
have been captured separately.

> Also would there be any interest in doing the tracing more generically
> in idle.c ?
> 
Yes, this tracing is already implemented for Intel platform. This would
be a part of cpuidle framework. Going further, once the power cpuidle 
framework is ported and ready, we will extend this trace there as well.
(ref:https://lkml.org/lkml/2011/6/7/375)

> Cheers,
> Ben.
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

Regards,
Deepthi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-20 17:18   ` deepthi
  2011-06-20 21:42     ` Benjamin Herrenschmidt
@ 2011-06-20 21:42     ` Benjamin Herrenschmidt
  2011-06-21 16:29       ` deepthi
  2011-06-21 16:29       ` deepthi
  1 sibling, 2 replies; 12+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-20 21:42 UTC (permalink / raw)
  To: deepthi; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Mon, 2011-06-20 at 22:48 +0530, deepthi wrote:
> On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
> > On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
> >> Hi,
> >>
> >> Please find below a patch, which has perf_events added for pseries (ppc64)
> >> platform in order to emit the trace required for perf timechart. 
> >> It essentially enables perf timechart for pseries platfrom to analyse
> >> power savings events like cpuidle states.
> > 
> > Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
> > not shared processor. Any reason ?
> > 
> Yes, the traces were added only to dedicated CPU idle sleep and not for 
> shared processor. This was added only for RFC purpose, and looking for 
> comments from trace implementation point of view. This can be
> easily extended to the latter too.

Please do both.

> > Also I don't really know that tracing stuff but what's the point of
> > having start/end _and trace_cpu_idle if you're going to always start &
> > end around a single occurence of trace_cpu_idle ?
> > 
> power_start/end are the APIs that were used initially
> and they are going to be deprecated in the upcoming kernel releases.
> trace_cpu_idle call is going to replace power start/end routines. 
> To maintain backward compatibility and uniformity, both the routines 
> have been used.
> (ref:https://lkml.org/lkml/2010/11/14/60ref:https://lkml.org/lkml/2010/11/14/60)

Backward compatible with what ? Userspace ? Do we care in that specific
case since it's a new feature ?

> > Wouldn't there be a way to start/end and then trace the snooze and
> > subsequent cede within the same start/end section or that makes no
> > sense ?
> > 
> We wanted to find the residency time of both Snooze as well as cede 
> separately. Knowing this will help us tweak our cpuidle code. So, both 
> have been captured separately.
> 
> > Also would there be any interest in doing the tracing more generically
> > in idle.c ?
> > 
> Yes, this tracing is already implemented for Intel platform. This would
> be a part of cpuidle framework. Going further, once the power cpuidle 
> framework is ported and ready, we will extend this trace there as well.
> (ref:https://lkml.org/lkml/2011/6/7/375)

So do we need to apply this patch at all since the cpuidle stuff is
happening too ?

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-20 17:18   ` deepthi
@ 2011-06-20 21:42     ` Benjamin Herrenschmidt
  2011-06-20 21:42     ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 12+ messages in thread
From: Benjamin Herrenschmidt @ 2011-06-20 21:42 UTC (permalink / raw)
  To: deepthi; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Mon, 2011-06-20 at 22:48 +0530, deepthi wrote:
> On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
> > On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
> >> Hi,
> >>
> >> Please find below a patch, which has perf_events added for pseries (ppc64)
> >> platform in order to emit the trace required for perf timechart. 
> >> It essentially enables perf timechart for pseries platfrom to analyse
> >> power savings events like cpuidle states.
> > 
> > Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
> > not shared processor. Any reason ?
> > 
> Yes, the traces were added only to dedicated CPU idle sleep and not for 
> shared processor. This was added only for RFC purpose, and looking for 
> comments from trace implementation point of view. This can be
> easily extended to the latter too.

Please do both.

> > Also I don't really know that tracing stuff but what's the point of
> > having start/end _and trace_cpu_idle if you're going to always start &
> > end around a single occurence of trace_cpu_idle ?
> > 
> power_start/end are the APIs that were used initially
> and they are going to be deprecated in the upcoming kernel releases.
> trace_cpu_idle call is going to replace power start/end routines. 
> To maintain backward compatibility and uniformity, both the routines 
> have been used.
> (ref:https://lkml.org/lkml/2010/11/14/60ref:https://lkml.org/lkml/2010/11/14/60)

Backward compatible with what ? Userspace ? Do we care in that specific
case since it's a new feature ?

> > Wouldn't there be a way to start/end and then trace the snooze and
> > subsequent cede within the same start/end section or that makes no
> > sense ?
> > 
> We wanted to find the residency time of both Snooze as well as cede 
> separately. Knowing this will help us tweak our cpuidle code. So, both 
> have been captured separately.
> 
> > Also would there be any interest in doing the tracing more generically
> > in idle.c ?
> > 
> Yes, this tracing is already implemented for Intel platform. This would
> be a part of cpuidle framework. Going further, once the power cpuidle 
> framework is ported and ready, we will extend this trace there as well.
> (ref:https://lkml.org/lkml/2011/6/7/375)

So do we need to apply this patch at all since the cpuidle stuff is
happening too ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-20 21:42     ` Benjamin Herrenschmidt
@ 2011-06-21 16:29       ` deepthi
  2011-06-21 16:40         ` Deepthi Dharwar
  2011-06-21 16:40         ` Deepthi Dharwar
  2011-06-21 16:29       ` deepthi
  1 sibling, 2 replies; 12+ messages in thread
From: deepthi @ 2011-06-21 16:29 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Tuesday 21 June 2011 03:12 AM, Benjamin Herrenschmidt wrote:
> On Mon, 2011-06-20 at 22:48 +0530, deepthi wrote:
>> On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
>>> On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
>>>> Hi,
>>>>
>>>> Please find below a patch, which has perf_events added for pseries (ppc64)
>>>> platform in order to emit the trace required for perf timechart. 
>>>> It essentially enables perf timechart for pseries platfrom to analyse
>>>> power savings events like cpuidle states.
>>>
>>> Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
>>> not shared processor. Any reason ?
>>>
>> Yes, the traces were added only to dedicated CPU idle sleep and not for 
>> shared processor. This was added only for RFC purpose, and looking for 
>> comments from trace implementation point of view. This can be
>> easily extended to the latter too.
> 
> Please do both.
> 
Yes, I ll do so.

>>> Also I don't really know that tracing stuff but what's the point of
>>> having start/end _and trace_cpu_idle if you're going to always start &
>>> end around a single occurence of trace_cpu_idle ?
>>>
>> power_start/end are the APIs that were used initially
>> and they are going to be deprecated in the upcoming kernel releases.
>> trace_cpu_idle call is going to replace power start/end routines. 
>> To maintain backward compatibility and uniformity, both the routines 
>> have been used.
>> (ref:https://lkml.org/lkml/2010/11/14/60ref:https://lkml.org/lkml/2010/11/14/60)
> 
> Backward compatible with what ? Userspace ? Do we care in that specific
> case since it's a new feature ?
> 
Going forward, we can just have trace_cpu_idle call and 
remove the power_start/end calls.

>>> Wouldn't there be a way to start/end and then trace the snooze and
>>> subsequent cede within the same start/end section or that makes no
>>> sense ?
>>>
>> We wanted to find the residency time of both Snooze as well as cede 
>> separately. Knowing this will help us tweak our cpuidle code. So, both 
>> have been captured separately.
>>
>>> Also would there be any interest in doing the tracing more generically
>>> in idle.c ?
>>>
>> Yes, this tracing is already implemented for Intel platform. This would
>> be a part of cpuidle framework. Going further, once the power cpuidle 
>> framework is ported and ready, we will extend this trace there as well.
>> (ref:https://lkml.org/lkml/2011/6/7/375)
> 
> So do we need to apply this patch at all since the cpuidle stuff is
> happening too ?
> 

Well, not really. This is more for RFC purpose. 
I just wanted to share this patch, as we are using it to evaluate
cpu idle on ppc64.

> Cheers,
> Ben.
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-20 21:42     ` Benjamin Herrenschmidt
  2011-06-21 16:29       ` deepthi
@ 2011-06-21 16:29       ` deepthi
  1 sibling, 0 replies; 12+ messages in thread
From: deepthi @ 2011-06-21 16:29 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Tuesday 21 June 2011 03:12 AM, Benjamin Herrenschmidt wrote:
> On Mon, 2011-06-20 at 22:48 +0530, deepthi wrote:
>> On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
>>> On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
>>>> Hi,
>>>>
>>>> Please find below a patch, which has perf_events added for pseries (ppc64)
>>>> platform in order to emit the trace required for perf timechart. 
>>>> It essentially enables perf timechart for pseries platfrom to analyse
>>>> power savings events like cpuidle states.
>>>
>>> Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
>>> not shared processor. Any reason ?
>>>
>> Yes, the traces were added only to dedicated CPU idle sleep and not for 
>> shared processor. This was added only for RFC purpose, and looking for 
>> comments from trace implementation point of view. This can be
>> easily extended to the latter too.
> 
> Please do both.
> 
Yes, I ll do so.

>>> Also I don't really know that tracing stuff but what's the point of
>>> having start/end _and trace_cpu_idle if you're going to always start &
>>> end around a single occurence of trace_cpu_idle ?
>>>
>> power_start/end are the APIs that were used initially
>> and they are going to be deprecated in the upcoming kernel releases.
>> trace_cpu_idle call is going to replace power start/end routines. 
>> To maintain backward compatibility and uniformity, both the routines 
>> have been used.
>> (ref:https://lkml.org/lkml/2010/11/14/60ref:https://lkml.org/lkml/2010/11/14/60)
> 
> Backward compatible with what ? Userspace ? Do we care in that specific
> case since it's a new feature ?
> 
Going forward, we can just have trace_cpu_idle call and 
remove the power_start/end calls.

>>> Wouldn't there be a way to start/end and then trace the snooze and
>>> subsequent cede within the same start/end section or that makes no
>>> sense ?
>>>
>> We wanted to find the residency time of both Snooze as well as cede 
>> separately. Knowing this will help us tweak our cpuidle code. So, both 
>> have been captured separately.
>>
>>> Also would there be any interest in doing the tracing more generically
>>> in idle.c ?
>>>
>> Yes, this tracing is already implemented for Intel platform. This would
>> be a part of cpuidle framework. Going further, once the power cpuidle 
>> framework is ported and ready, we will extend this trace there as well.
>> (ref:https://lkml.org/lkml/2011/6/7/375)
> 
> So do we need to apply this patch at all since the cpuidle stuff is
> happening too ?
> 

Well, not really. This is more for RFC purpose. 
I just wanted to share this patch, as we are using it to evaluate
cpu idle on ppc64.

> Cheers,
> Ben.
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-21 16:29       ` deepthi
  2011-06-21 16:40         ` Deepthi Dharwar
@ 2011-06-21 16:40         ` Deepthi Dharwar
  1 sibling, 0 replies; 12+ messages in thread
From: Deepthi Dharwar @ 2011-06-21 16:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Tuesday 21 June 2011 09:59 PM, deepthi wrote:
> On Tuesday 21 June 2011 03:12 AM, Benjamin Herrenschmidt wrote:
>> On Mon, 2011-06-20 at 22:48 +0530, deepthi wrote:
>>> On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
>>>> On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
>>>>> Hi,
>>>>>
>>>>> Please find below a patch, which has perf_events added for pseries (ppc64)
>>>>> platform in order to emit the trace required for perf timechart. 
>>>>> It essentially enables perf timechart for pseries platfrom to analyse
>>>>> power savings events like cpuidle states.
>>>>
>>>> Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
>>>> not shared processor. Any reason ?
>>>>
>>> Yes, the traces were added only to dedicated CPU idle sleep and not for 
>>> shared processor. This was added only for RFC purpose, and looking for 
>>> comments from trace implementation point of view. This can be
>>> easily extended to the latter too.
>>
>> Please do both.
>>
> Yes, I ll do so.
> 
>>>> Also I don't really know that tracing stuff but what's the point of
>>>> having start/end _and trace_cpu_idle if you're going to always start &
>>>> end around a single occurence of trace_cpu_idle ?
>>>>
>>> power_start/end are the APIs that were used initially
>>> and they are going to be deprecated in the upcoming kernel releases.
>>> trace_cpu_idle call is going to replace power start/end routines. 
>>> To maintain backward compatibility and uniformity, both the routines 
>>> have been used.
>>> (ref:https://lkml.org/lkml/2010/11/14/60ref:https://lkml.org/lkml/2010/11/14/60)
>>
>> Backward compatible with what ? Userspace ? Do we care in that specific
>> case since it's a new feature ?
>>
> Going forward, we can just have trace_cpu_idle call and 
> remove the power_start/end calls.
> 
>>>> Wouldn't there be a way to start/end and then trace the snooze and
>>>> subsequent cede within the same start/end section or that makes no
>>>> sense ?
>>>>
>>> We wanted to find the residency time of both Snooze as well as cede 
>>> separately. Knowing this will help us tweak our cpuidle code. So, both 
>>> have been captured separately.
>>>
>>>> Also would there be any interest in doing the tracing more generically
>>>> in idle.c ?
>>>>
>>> Yes, this tracing is already implemented for Intel platform. This would
>>> be a part of cpuidle framework. Going further, once the power cpuidle 
>>> framework is ported and ready, we will extend this trace there as well.
>>> (ref:https://lkml.org/lkml/2011/6/7/375)
>>
>> So do we need to apply this patch at all since the cpuidle stuff is
>> happening too ?
>>
> 
> Well, not really. This is more for RFC purpose. 
> I just wanted to share this patch, as we are using it to evaluate
> cpu idle on ppc64.
> 
  I will re-base the patch and move it to the cpu idle for power 
  framework. So the tracing too gets in along with the 
  cpu idle support. 

  Thanks Ben. 

>> Cheers,
>> Ben.
>>
>>
>> _______________________________________________
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

Regards,
Deepthi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
  2011-06-21 16:29       ` deepthi
@ 2011-06-21 16:40         ` Deepthi Dharwar
  2011-06-21 16:40         ` Deepthi Dharwar
  1 sibling, 0 replies; 12+ messages in thread
From: Deepthi Dharwar @ 2011-06-21 16:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-pm, linux-kernel

On Tuesday 21 June 2011 09:59 PM, deepthi wrote:
> On Tuesday 21 June 2011 03:12 AM, Benjamin Herrenschmidt wrote:
>> On Mon, 2011-06-20 at 22:48 +0530, deepthi wrote:
>>> On Friday 17 June 2011 09:54 AM, Benjamin Herrenschmidt wrote:
>>>> On Wed, 2011-06-01 at 18:05 +0530, Deepthi Dharwar wrote:
>>>>> Hi,
>>>>>
>>>>> Please find below a patch, which has perf_events added for pseries (ppc64)
>>>>> platform in order to emit the trace required for perf timechart. 
>>>>> It essentially enables perf timechart for pseries platfrom to analyse
>>>>> power savings events like cpuidle states.
>>>>
>>>> Unless I'm mistaken, you added traces to dedicated CPU idle sleep but
>>>> not shared processor. Any reason ?
>>>>
>>> Yes, the traces were added only to dedicated CPU idle sleep and not for 
>>> shared processor. This was added only for RFC purpose, and looking for 
>>> comments from trace implementation point of view. This can be
>>> easily extended to the latter too.
>>
>> Please do both.
>>
> Yes, I ll do so.
> 
>>>> Also I don't really know that tracing stuff but what's the point of
>>>> having start/end _and trace_cpu_idle if you're going to always start &
>>>> end around a single occurence of trace_cpu_idle ?
>>>>
>>> power_start/end are the APIs that were used initially
>>> and they are going to be deprecated in the upcoming kernel releases.
>>> trace_cpu_idle call is going to replace power start/end routines. 
>>> To maintain backward compatibility and uniformity, both the routines 
>>> have been used.
>>> (ref:https://lkml.org/lkml/2010/11/14/60ref:https://lkml.org/lkml/2010/11/14/60)
>>
>> Backward compatible with what ? Userspace ? Do we care in that specific
>> case since it's a new feature ?
>>
> Going forward, we can just have trace_cpu_idle call and 
> remove the power_start/end calls.
> 
>>>> Wouldn't there be a way to start/end and then trace the snooze and
>>>> subsequent cede within the same start/end section or that makes no
>>>> sense ?
>>>>
>>> We wanted to find the residency time of both Snooze as well as cede 
>>> separately. Knowing this will help us tweak our cpuidle code. So, both 
>>> have been captured separately.
>>>
>>>> Also would there be any interest in doing the tracing more generically
>>>> in idle.c ?
>>>>
>>> Yes, this tracing is already implemented for Intel platform. This would
>>> be a part of cpuidle framework. Going further, once the power cpuidle 
>>> framework is ported and ready, we will extend this trace there as well.
>>> (ref:https://lkml.org/lkml/2011/6/7/375)
>>
>> So do we need to apply this patch at all since the cpuidle stuff is
>> happening too ?
>>
> 
> Well, not really. This is more for RFC purpose. 
> I just wanted to share this patch, as we are using it to evaluate
> cpu idle on ppc64.
> 
  I will re-base the patch and move it to the cpu idle for power 
  framework. So the tracing too gets in along with the 
  cpu idle support. 

  Thanks Ben. 

>> Cheers,
>> Ben.
>>
>>
>> _______________________________________________
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

Regards,
Deepthi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] perf_events: Enable idle state tracing for pseries (ppc64)
@ 2011-06-01 12:35 Deepthi Dharwar
  0 siblings, 0 replies; 12+ messages in thread
From: Deepthi Dharwar @ 2011-06-01 12:35 UTC (permalink / raw)
  To: linuxppc-dev, linux-pm, linux-kernel

Hi,

Please find below a patch, which has perf_events added for pseries (ppc64)
platform in order to emit the trace required for perf timechart. 
It essentially enables perf timechart for pseries platfrom to analyse
power savings events like cpuidle states.

Steps to enable and disable the trace;
1) Mount  debugfs;
   mount -t debugfs none /sys/kernel/debug
2) Then, enable the event using;
    echo 1 > /sys/kernel/debug/tracing/events/power/cpu_idle/enable
3) The output of the trace can be seen in /sys/kernel/debug/tracing/trace
4) To disable the trace use;
    echo 0 > sys/kernel/debug/tracing/events/power/cpu_idle/enable

Trace .svg o/p can be viewed for pseries (ppc64) systems showing various 
cpu-idle states as a part of perf timechart tool. 
References: http://blog.fenrus.org/?p=5

Issue command 'perf timechart record' to enable tracing. 
This generates the trace and records in  perf.data file by default.
One can generate output.svg file by issuing 'perf timechart'. 

Sample o/p from the trace file: 
===============================

State 1 -> Snooze
State 2 -> Cede

# tracer: nop
#
TASK-PID    CPU#    TIMESTAMP  FUNCTION
  | |       |          |         |
<idle>-0     [000]   292.482314: cpu_idle: state=1 cpu_id=0 
						^^ Enter Snooze
<idle>-0     [001]   292.482363: cpu_idle: state=1 cpu_id=1
<idle>-0     [000]   292.492315: cpu_idle: state=4294967295 cpu_id=0
						^^ Exit Snooze 	
<idle>-0     [000]   292.492316: cpu_idle: state=2 cpu_id=0 
						^^ Enter  Cede 
<idle>-0     [001]   292.492364: cpu_idle: state=4294967295 cpu_id=1
<idle>-0     [001]   292.492364: cpu_idle: state=2 cpu_id=1	
<idle>-0     [000]   292.504198: cpu_idle: state=4294967295 cpu_id=0 
						^^Exit Cede 
<idle>-0     [000]   292.504204: cpu_idle: state=1 cpu_id=0
<idle>-0     [001]   292.504921: cpu_idle: state=4294967295 cpu_id=1
<idle>-0     [001]   292.504936: cpu_idle: state=1 cpu_id=1
<idle>-0     [000]   292.514205: cpu_idle: state=4294967295 cpu_id=0    

This patch applies on 2.6.39 and tested on a IBM POWER7 machine.  

-Deepthi

Adding perf events to trace various cpu idle states on ppc64 (pseries) platform.
Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>

pseries.h |    4 ++++
setup.c   |   14 ++++++++++++++
2 files changed, 18 insertions(+)

Index: linux-2.6.39/arch/powerpc/platforms/pseries/setup.c
===================================================================
--- linux-2.6.39.orig/arch/powerpc/platforms/pseries/setup.c	2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/arch/powerpc/platforms/pseries/setup.c	2011-06-01 07:46:00.000000000 -0400
@@ -39,6 +39,7 @@
 #include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
+#include <trace/events/power.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -582,6 +583,10 @@
 	 * while, do so.
 	 */
 	if (snooze) {
+
+		trace_power_start(POWER_CSTATE, CPU_IDLE_SNOOZE, cpu);
+		trace_cpu_idle(CPU_IDLE_SNOOZE, cpu);
+
 		start_snooze = get_tb() + snooze * tb_ticks_per_usec;
 		local_irq_enable();
 		set_thread_flag(TIF_POLLING_NRFLAG);
@@ -602,9 +607,19 @@
 			goto out;
 	}
 
+	trace_power_end(cpu);
+	trace_cpu_idle(PWR_EVENT_EXIT, cpu);
+
+	trace_power_start(POWER_CSTATE, CPU_IDLE_CEDE, cpu);
+	trace_cpu_idle(CPU_IDLE_CEDE, cpu);
+
 	cede_processor();
 
 out:
+
+	trace_power_end(cpu);
+	trace_cpu_idle(PWR_EVENT_EXIT, cpu);
+
 	HMT_medium();
 	out_purr = mfspr(SPRN_PURR);
 	get_lppaca()->wait_state_cycles += out_purr - in_purr;
Index: linux-2.6.39/arch/powerpc/platforms/pseries/pseries.h
===================================================================
--- linux-2.6.39.orig/arch/powerpc/platforms/pseries/pseries.h	2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/arch/powerpc/platforms/pseries/pseries.h	2011-06-01 07:53:24.000000000 -0400
@@ -12,6 +12,9 @@
 
 #include <linux/interrupt.h>
 
+#define CPU_IDLE_SNOOZE 1
+#define CPU_IDLE_CEDE	 2
+
 struct device_node;
 
 extern void request_event_sources_irqs(struct device_node *np,
@@ -56,4 +59,5 @@
 extern int dlpar_attach_node(struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 
+
 #endif /* _PSERIES_PSERIES_H */

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-06-21 16:41 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-01 12:35 [PATCH] perf_events: Enable idle state tracing for pseries (ppc64) Deepthi Dharwar
2011-06-17  4:24 ` Benjamin Herrenschmidt
2011-06-20 17:18   ` deepthi
2011-06-20 21:42     ` Benjamin Herrenschmidt
2011-06-20 21:42     ` Benjamin Herrenschmidt
2011-06-21 16:29       ` deepthi
2011-06-21 16:40         ` Deepthi Dharwar
2011-06-21 16:40         ` Deepthi Dharwar
2011-06-21 16:29       ` deepthi
2011-06-20 17:18   ` deepthi
2011-06-17  4:24 ` Benjamin Herrenschmidt
2011-06-01 12:35 Deepthi Dharwar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.