linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH v2] perf data: Add stat subcommand to show sample event stat
@ 2015-05-11 15:25 Namhyung Kim
  2015-05-11 15:44 ` Andi Kleen
  0 siblings, 1 reply; 4+ messages in thread
From: Namhyung Kim @ 2015-05-11 15:25 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ingo Molnar, Peter Zijlstra, Jiri Olsa, LKML, David Ahern,
	Stephane Eranian, Andi Kleen, Minchan Kim

Add stat subbcommand to 'perf data' so that it can show various event
stats in the file.

The example output looks like below:

  $ perf record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.635 MB perf.data (1946 samples) ]

  $ perf data stat

    Total event stats for 'perf.data' file:

              TOTAL events:       6837
               MMAP events:        116
               COMM events:        375
               EXIT events:          2
           THROTTLE events:         12
         UNTHROTTLE events:         11
               FORK events:        374
             SAMPLE events:       1946
              MMAP2 events:       3999
     FINISHED_ROUND events:          2

   Sample event stats:

         361,255,234      cycles
               1,946      samples                   #   sampling ratio  12.162% (486/4000)

         0.998581085 second time sampled

The sampling ratio was useful for me to determine how often the event
was sampled - in this case the cpu cycles event was only sampled at 12%
of the expected sampling frequency, so the system was mostly idle in
terms of cpu cycles (or the event was multiplexed in case of recording
large number of events at once).

The sampling ratio was calulated like below:

  expected_freq  = evsel->attr.sample_freq
  actual_freq    = (nr_samples / nr_cpus) / sample_time
  sampling_ratio = 100 * actual_freq / expected_freq

Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
* move it to 'perf data' rather than 'perf stat'
* show event rate for non-freq events

 tools/perf/builtin-data.c | 130 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index d6525bc54d13..13031743f22c 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -4,6 +4,12 @@
 #include "debug.h"
 #include "parse-options.h"
 #include "data-convert-bt.h"
+#include "hist.h"
+#include "data.h"
+#include "session.h"
+#include "evlist.h"
+
+#include <locale.h>
 
 typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix);
 
@@ -22,7 +28,7 @@ static const struct option data_options[] = {
 	OPT_END()
 };
 
-static const char * const data_subcommands[] = { "convert", NULL };
+static const char * const data_subcommands[] = { "convert", "stat", NULL };
 
 static const char *data_usage[] = {
 	"perf data [<common options>] <command> [<options>]",
@@ -88,8 +94,128 @@ static int cmd_data_convert(int argc, const char **argv,
 	return 0;
 }
 
+static u64 first_sample_nsec;
+static u64 last_sample_nsec;
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine __maybe_unused)
+{
+	struct hists *hists = evsel__hists(evsel);
+
+	if (!first_sample_nsec)
+		first_sample_nsec = sample->time;
+	last_sample_nsec = sample->time;
+
+	hists->stats.total_period += sample->period;
+	hists->stats.nr_events[PERF_RECORD_SAMPLE]++;
+	return 0;
+}
+
+static int show_sample_stat(bool force)
+{
+	struct perf_data_file file = {
+		.mode  = PERF_DATA_MODE_READ,
+		.path  = input_name,
+		.force = force,
+	};
+	struct perf_tool tool = {
+		.sample = process_sample_event,
+	};
+	struct perf_session *session;
+	struct perf_evsel *evsel;
+	u64 sample_nsec;
+	int nr_cpus;
+	int ret;
+
+	session = perf_session__new(&file, false, &tool);
+	if (session == NULL)
+		return -1;
+
+	ret = perf_session__process_events(session);
+	if (ret < 0)
+		goto out;
+
+	nr_cpus = session->header.env.nr_cpus_online;
+	sample_nsec = last_sample_nsec - first_sample_nsec;
+
+	printf("\n Total event stats for '%s' file:\n\n", file.path);
+	events_stats__fprintf(&session->evlist->stats, stdout);
+
+	printf("\n Sample event stats:\n\n");
+	evlist__for_each(session->evlist, evsel) {
+		struct hists *hists = evsel__hists(evsel);
+		struct events_stats *stats = &hists->stats;
+		u64 total_period = stats->total_period;
+		u32 nr_samples = stats->nr_events[PERF_RECORD_SAMPLE];
+
+		printf("%'18"PRIu64"      ", total_period);
+		printf("%-25s", perf_evsel__name(evsel));
+		printf("\n");
+		printf("%'18u      ", nr_samples);
+		printf("%-25s", "samples");
+
+		if (sample_nsec == 0 || evsel->attr.sample_freq == 0)
+			goto next;
+
+		if (evsel->attr.freq) {
+			int expected_rate = evsel->attr.sample_freq;
+			double sample_rate = 1.0 * nr_samples / nr_cpus;
+
+			sample_rate *= (double)NSEC_PER_SEC / sample_nsec;
+			printf(" #   sampling ratio  %.3f%% (%d/%d)",
+				100 * sample_rate / expected_rate,
+				(int)sample_rate, expected_rate);
+		} else {
+			double event_rate = NSEC_PER_SEC * nr_samples;
+
+			event_rate /= 1000 * sample_nsec;
+			printf(" #   event rate  %.3f (K/sec)", event_rate);
+		}
+
+next:
+		printf("\n\n");
+	}
+
+	printf("%18.9f second time sampled\n",
+	       (double)sample_nsec / NSEC_PER_SEC);
+
+out:
+	perf_session__delete(session);
+	return ret;
+}
+
+
+static const char * const data_stat_usage[] = {
+	"perf data stat [<options>]",
+	NULL
+};
+
+static int cmd_data_stat(int argc, const char **argv,
+			 const char *prefix __maybe_unused)
+{
+	bool force = false;
+	const struct option options[] = {
+		OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+		OPT_STRING('i', "input", &input_name, "file", "input file name"),
+		OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+		OPT_END()
+	};
+
+	argc = parse_options(argc, argv, options, data_stat_usage, 0);
+	if (argc) {
+		usage_with_options(data_stat_usage, options);
+		return -1;
+	}
+
+	return show_sample_stat(force);
+}
+
 static struct data_cmd data_cmds[] = {
 	{ "convert", "converts data file between formats", cmd_data_convert },
+	{ "stat", "show event stats in data file", cmd_data_stat },
 	{ .name = NULL, },
 };
 
@@ -107,6 +233,8 @@ int cmd_data(int argc, const char **argv, const char *prefix)
 	if (argc < 1)
 		goto usage;
 
+	setlocale(LC_ALL, "");
+
 	cmdstr = argv[0];
 
 	for_each_cmd(cmd) {
-- 
2.4.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC/PATCH v2] perf data: Add stat subcommand to show sample event stat
  2015-05-11 15:25 [RFC/PATCH v2] perf data: Add stat subcommand to show sample event stat Namhyung Kim
@ 2015-05-11 15:44 ` Andi Kleen
  2015-05-13 12:05   ` Namhyung Kim
  0 siblings, 1 reply; 4+ messages in thread
From: Andi Kleen @ 2015-05-11 15:44 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, Jiri Olsa,
	LKML, David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

> The sampling ratio was useful for me to determine how often the event
> was sampled - in this case the cpu cycles event was only sampled at 12%

That's dangerous to determine without a plot. It could be that it was bimodal: 
100% busy and then idle. You may want to add something like the spark
plots I submitted for stat some time ago.

-Andi

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC/PATCH v2] perf data: Add stat subcommand to show sample event stat
  2015-05-11 15:44 ` Andi Kleen
@ 2015-05-13 12:05   ` Namhyung Kim
  2015-05-14 19:32     ` Andi Kleen
  0 siblings, 1 reply; 4+ messages in thread
From: Namhyung Kim @ 2015-05-13 12:05 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, Jiri Olsa,
	LKML, David Ahern, Stephane Eranian, Minchan Kim

Hi Andi,

On Mon, May 11, 2015 at 05:44:05PM +0200, Andi Kleen wrote:
> > The sampling ratio was useful for me to determine how often the event
> > was sampled - in this case the cpu cycles event was only sampled at 12%
> 
> That's dangerous to determine without a plot. It could be that it was bimodal: 
> 100% busy and then idle. You may want to add something like the spark
> plots I submitted for stat some time ago.

Right, we cannot know the exact situation from a single number.  But
it was okay for me just to see overall status from the number.  This
is what we cannot know from the output of 'perf report' easily, so I'd
like to add this info in some way.

Anyway, I wrote a script to plot the number of samples and periods
using python's matplotlib package.  Maybe we can add it to the script
database.

Thanks,
Namhyung


/* sample-chart.py */
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

sys.path.append(os.environ['PERF_EXEC_PATH'] + \
        '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')

from perf_trace_context import *
from EventClass import *

events = {}
mode = None  # 'cpu' or 'task'
nr_events = 0
first_time = 0
last_time = 0

def trace_begin():
    pass

def trace_end():
    xcnt = last_time - first_time + 1
    xpos = np.arange(xcnt)
    times = np.arange(first_time, last_time + 1)

    fig, plt_array = plt.subplots(nrows = nr_events, ncols = 2)
    fig.suptitle("Event stat", fontsize=20)

    n = 0
    for e in events:
        p1 = plt_array[n][0]
        p2 = plt_array[n][1]
        for k in events[e]:  # key = cpu or tid
            ev_stats = events[e][k]

            samples = np.zeros(xcnt)
            periods = np.zeros(xcnt)
            for t in ev_stats:
                samples[t - first_time] = ev_stats[t][0]
                periods[t - first_time] = ev_stats[t][1]

            key = "%s %d" % (mode, k)

            p1.plot(times, samples, 'o', linewidth=2, label=key)
            p2.plot(times, periods, '-', linewidth=2, label=key)

        expect = 400 * np.ones(xcnt)
        p1.plot(times, expect, '--')

        p1.set_title("Number of samples in '%s'" % e)
        p1.legend()
        p2.set_title("Event values in '%s'" % e)
        p2.legend()
        n += 1

    plt.show()

def process_event(param_dict):
    evt  = param_dict["ev_name"]
    cpu  = param_dict["sample"]["cpu"]
    tid  = param_dict["sample"]["tid"]
    time = param_dict["sample"]["time"] / 100000000  # 100 ms
    val  = param_dict["sample"]["period"]

    if evt not in events:
        global nr_events
        nr_events += 1
        events[evt] = {}

    global mode
    if mode is None:
        if cpu >= 10000000:
            mode = 'task'
        else:
            mode = 'cpu'

    key = cpu if mode == 'cpu' else tid
    if key not in events[evt]:
        events[evt][key] = {}

    global first_time, last_time
    if first_time == 0 or first_time > time:
        first_time = time
    if last_time < time:
        last_time = time

    ev_stat = events[evt][key]
    if time not in ev_stat:
        ev_stat[time] = [0, 0]  # (nr_sample, period)
    ev_stat[time][0] += 1
    ev_stat[time][1] += val

def trace_unhandled(event_name, context, event_fields_dict):
    pass

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC/PATCH v2] perf data: Add stat subcommand to show sample event stat
  2015-05-13 12:05   ` Namhyung Kim
@ 2015-05-14 19:32     ` Andi Kleen
  0 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-05-14 19:32 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Andi Kleen, Arnaldo Carvalho de Melo, Ingo Molnar,
	Peter Zijlstra, Jiri Olsa, LKML, David Ahern, Stephane Eranian,
	Minchan Kim

On Wed, May 13, 2015 at 09:05:22PM +0900, Namhyung Kim wrote:
> Hi Andi,
> 
> On Mon, May 11, 2015 at 05:44:05PM +0200, Andi Kleen wrote:
> > > The sampling ratio was useful for me to determine how often the event
> > > was sampled - in this case the cpu cycles event was only sampled at 12%
> > 
> > That's dangerous to determine without a plot. It could be that it was bimodal: 
> > 100% busy and then idle. You may want to add something like the spark
> > plots I submitted for stat some time ago.
> 
> Right, we cannot know the exact situation from a single number.  But
> it was okay for me just to see overall status from the number.  This
> is what we cannot know from the output of 'perf report' easily, so I'd
> like to add this info in some way.
> 
> Anyway, I wrote a script to plot the number of samples and periods
> using python's matplotlib package.  Maybe we can add it to the script
> database.

Looks good. Yes it would be useful to have in the database.

-Andi

> 
> Thanks,
> Namhyung
> 
> 
> /* sample-chart.py */
> import os
> import sys
> import numpy as np
> import matplotlib.pyplot as plt
> 
> sys.path.append(os.environ['PERF_EXEC_PATH'] + \
>         '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
> 
> from perf_trace_context import *
> from EventClass import *
> 
> events = {}
> mode = None  # 'cpu' or 'task'
> nr_events = 0
> first_time = 0
> last_time = 0
> 
> def trace_begin():
>     pass
> 
> def trace_end():
>     xcnt = last_time - first_time + 1
>     xpos = np.arange(xcnt)
>     times = np.arange(first_time, last_time + 1)
> 
>     fig, plt_array = plt.subplots(nrows = nr_events, ncols = 2)
>     fig.suptitle("Event stat", fontsize=20)
> 
>     n = 0
>     for e in events:
>         p1 = plt_array[n][0]
>         p2 = plt_array[n][1]
>         for k in events[e]:  # key = cpu or tid
>             ev_stats = events[e][k]
> 
>             samples = np.zeros(xcnt)
>             periods = np.zeros(xcnt)
>             for t in ev_stats:
>                 samples[t - first_time] = ev_stats[t][0]
>                 periods[t - first_time] = ev_stats[t][1]
> 
>             key = "%s %d" % (mode, k)
> 
>             p1.plot(times, samples, 'o', linewidth=2, label=key)
>             p2.plot(times, periods, '-', linewidth=2, label=key)
> 
>         expect = 400 * np.ones(xcnt)
>         p1.plot(times, expect, '--')
> 
>         p1.set_title("Number of samples in '%s'" % e)
>         p1.legend()
>         p2.set_title("Event values in '%s'" % e)
>         p2.legend()
>         n += 1
> 
>     plt.show()
> 
> def process_event(param_dict):
>     evt  = param_dict["ev_name"]
>     cpu  = param_dict["sample"]["cpu"]
>     tid  = param_dict["sample"]["tid"]
>     time = param_dict["sample"]["time"] / 100000000  # 100 ms
>     val  = param_dict["sample"]["period"]
> 
>     if evt not in events:
>         global nr_events
>         nr_events += 1
>         events[evt] = {}
> 
>     global mode
>     if mode is None:
>         if cpu >= 10000000:
>             mode = 'task'
>         else:
>             mode = 'cpu'
> 
>     key = cpu if mode == 'cpu' else tid
>     if key not in events[evt]:
>         events[evt][key] = {}
> 
>     global first_time, last_time
>     if first_time == 0 or first_time > time:
>         first_time = time
>     if last_time < time:
>         last_time = time
> 
>     ev_stat = events[evt][key]
>     if time not in ev_stat:
>         ev_stat[time] = [0, 0]  # (nr_sample, period)
>     ev_stat[time][0] += 1
>     ev_stat[time][1] += val
> 
> def trace_unhandled(event_name, context, event_fields_dict):
>     pass
> 

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-05-14 19:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-11 15:25 [RFC/PATCH v2] perf data: Add stat subcommand to show sample event stat Namhyung Kim
2015-05-11 15:44 ` Andi Kleen
2015-05-13 12:05   ` Namhyung Kim
2015-05-14 19:32     ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).