All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC/PATCH] perf stat: Show sample events stat for a data file
@ 2015-04-29  7:27 Namhyung Kim
  2015-04-29  7:42 ` Jiri Olsa
  2015-04-29  9:27 ` Peter Zijlstra
  0 siblings, 2 replies; 7+ messages in thread
From: Namhyung Kim @ 2015-04-29  7:27 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ingo Molnar, Peter Zijlstra, Jiri Olsa, LKML, David Ahern,
	Stephane Eranian, Andi Kleen, Minchan Kim

Add --input option to 'perf stat' so that it can show event stats of the
file.  I would like to use a short '-i' option to be compatible with
other commands but it was already taken by '--no-inherit' option, so it
only supports the long option.

The example output looks like below:

  $ perf record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.635 MB perf.data (1946 samples) ]

  $ perf stat --input perf.data

    Total event stats for 'perf.data' file:

              TOTAL events:       6837
               MMAP events:        116
               COMM events:        375
               EXIT events:          2
           THROTTLE events:         12
         UNTHROTTLE events:         11
               FORK events:        374
             SAMPLE events:       1946
              MMAP2 events:       3999
     FINISHED_ROUND events:          2

   Sample event stats:

         361,255,234      cycles
               1,946      samples                   #   sampling ratio  12.162% (486/4000)

         0.998581085 second time sampled

The sampling ratio was useful for me to determine how often the event
was sampled - in this case the cpu cycles event was only sampled at 12%
of the expected sampling frequency, so the system was mostly idle in
terms of cpu cycles (or the event was multiplexed in case of recording
large number of events at once).

The sampling ratio was calulated like below:

  expected_freq  = evsel->attr.sample_freq
  actual_freq    = (nr_samples / nr_cpus) / sample_time
  sampling_ratio = 100 * actual_freq / expected_freq

Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-stat.c | 91 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index fd577f725d23..78ded82df4a9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -58,6 +58,7 @@
 #include "util/cpumap.h"
 #include "util/thread.h"
 #include "util/thread_map.h"
+#include "util/hist.h"
 
 #include <stdlib.h>
 #include <sys/prctl.h>
@@ -1576,6 +1577,87 @@ static int perf_stat_init_aggr_mode(void)
 	return 0;
 }
 
+static u64 first_sample_nsec;
+static u64 last_sample_nsec;
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine __maybe_unused)
+{
+	struct hists *hists = evsel__hists(evsel);
+
+	if (!first_sample_nsec)
+		first_sample_nsec = sample->time;
+	last_sample_nsec = sample->time;
+
+	hists->stats.total_period += sample->period;
+	hists->stats.nr_events[PERF_RECORD_SAMPLE]++;
+	return 0;
+}
+
+static int show_sample_stat(void)
+{
+	struct perf_data_file file = {
+		.mode  = PERF_DATA_MODE_READ,
+		.path  = input_name,
+	};
+	struct perf_tool tool = {
+		.sample = process_sample_event,
+	};
+	struct perf_session *session;
+	struct perf_evsel *evsel;
+	u64 sample_nsec;
+	int nr_cpus;
+	int ret;
+
+	session = perf_session__new(&file, false, &tool);
+	if (session == NULL)
+		return -1;
+
+	ret = perf_session__process_events(session);
+	if (ret < 0)
+		return ret;
+
+	nr_cpus = session->header.env.nr_cpus_online;
+	sample_nsec = last_sample_nsec - first_sample_nsec;
+
+	fprintf(output, "\n Total event stats for '%s' file:\n\n", input_name);
+	events_stats__fprintf(&session->evlist->stats, output);
+
+	fprintf(output, "\n Sample event stats:\n\n");
+	evlist__for_each(session->evlist, evsel) {
+		struct hists *hists = evsel__hists(evsel);
+		struct events_stats *stats = &hists->stats;
+		u64 total_period = stats->total_period;
+		u32 nr_samples = stats->nr_events[PERF_RECORD_SAMPLE];
+
+		fprintf(output, "%'18"PRIu64"      ", total_period);
+		fprintf(output, "%-25s", perf_evsel__name(evsel));
+		fprintf(output, "\n");
+		fprintf(output, "%'18u      ", nr_samples);
+		fprintf(output, "%-25s", "samples");
+
+		if (sample_nsec && evsel->attr.freq && evsel->attr.sample_freq) {
+			int expected_rate = evsel->attr.sample_freq;
+			double sample_rate = 1.0 * nr_samples / nr_cpus;
+
+			sample_rate *= (double)NSEC_PER_SEC / sample_nsec;
+			fprintf(output, " #   sampling ratio  %.3f%% (%d/%d)",
+				100 * sample_rate / expected_rate,
+				(int)sample_rate, expected_rate);
+		}
+		fprintf(output, "\n\n");
+	}
+
+	fprintf(output, "%18.9f second time sampled\n",
+		(double)sample_nsec / NSEC_PER_SEC);
+
+	perf_session__delete(session);
+	return 0;
+}
+
 static int setup_events(const char * const *attrs, unsigned len)
 {
 	unsigned i;
@@ -1793,6 +1875,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "print counts with custom separator"),
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
 		     "monitor event in cgroup name only", parse_cgroups),
+	OPT_STRING(0, "input", &input_name, "file", "input file name"),
 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
 	OPT_INTEGER(0, "log-fd", &output_fd,
@@ -1886,8 +1969,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
 		big_num = false;
 
-	if (!argc && target__none(&target))
-		usage_with_options(stat_usage, options);
+	if (!argc && target__none(&target)) {
+		if (!input_name)
+			usage_with_options(stat_usage, options);
+		status = show_sample_stat();
+		goto out;
+	}
 
 	if (run_count < 0) {
 		pr_err("Run count must be a positive number\n");
-- 
2.3.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC/PATCH] perf stat: Show sample events stat for a data file
  2015-04-29  7:27 [RFC/PATCH] perf stat: Show sample events stat for a data file Namhyung Kim
@ 2015-04-29  7:42 ` Jiri Olsa
  2015-04-29  7:58   ` Namhyung Kim
  2015-04-29  9:27 ` Peter Zijlstra
  1 sibling, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2015-04-29  7:42 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, LKML,
	David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

On Wed, Apr 29, 2015 at 04:27:45PM +0900, Namhyung Kim wrote:
> Add --input option to 'perf stat' so that it can show event stats of the
> file.  I would like to use a short '-i' option to be compatible with
> other commands but it was already taken by '--no-inherit' option, so it
> only supports the long option.
> 
> The example output looks like below:
> 
>   $ perf record -a sleep 1
>   [ perf record: Woken up 1 times to write data ]
>   [ perf record: Captured and wrote 0.635 MB perf.data (1946 samples) ]
> 
>   $ perf stat --input perf.data

nice, but maybe I'd see this under 'perf data' command, like:

    $ perf data --stat

'perf stat' does not deal with sampling..  I think that 'perf data'
is for perf.data related stuff like:

  perf data convert ...
  perf data ls
  perf data --features
  perf data --stat
  ...

thoughts? ;-)

jirka

> 
>     Total event stats for 'perf.data' file:
> 
>               TOTAL events:       6837
>                MMAP events:        116
>                COMM events:        375
>                EXIT events:          2
>            THROTTLE events:         12
>          UNTHROTTLE events:         11
>                FORK events:        374
>              SAMPLE events:       1946
>               MMAP2 events:       3999
>      FINISHED_ROUND events:          2
> 
>    Sample event stats:
> 
>          361,255,234      cycles
>                1,946      samples                   #   sampling ratio  12.162% (486/4000)
> 
>          0.998581085 second time sampled
> 
> The sampling ratio was useful for me to determine how often the event
> was sampled - in this case the cpu cycles event was only sampled at 12%
> of the expected sampling frequency, so the system was mostly idle in
> terms of cpu cycles (or the event was multiplexed in case of recording
> large number of events at once).
> 
> The sampling ratio was calulated like below:
> 
>   expected_freq  = evsel->attr.sample_freq
>   actual_freq    = (nr_samples / nr_cpus) / sample_time
>   sampling_ratio = 100 * actual_freq / expected_freq
> 
> Cc: Minchan Kim <minchan@kernel.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/builtin-stat.c | 91 +++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 89 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index fd577f725d23..78ded82df4a9 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -58,6 +58,7 @@
>  #include "util/cpumap.h"
>  #include "util/thread.h"
>  #include "util/thread_map.h"
> +#include "util/hist.h"
>  
>  #include <stdlib.h>
>  #include <sys/prctl.h>
> @@ -1576,6 +1577,87 @@ static int perf_stat_init_aggr_mode(void)
>  	return 0;
>  }
>  
> +static u64 first_sample_nsec;
> +static u64 last_sample_nsec;
> +
> +static int process_sample_event(struct perf_tool *tool __maybe_unused,
> +				union perf_event *event __maybe_unused,
> +				struct perf_sample *sample,
> +				struct perf_evsel *evsel,
> +				struct machine *machine __maybe_unused)
> +{
> +	struct hists *hists = evsel__hists(evsel);
> +
> +	if (!first_sample_nsec)
> +		first_sample_nsec = sample->time;
> +	last_sample_nsec = sample->time;
> +
> +	hists->stats.total_period += sample->period;
> +	hists->stats.nr_events[PERF_RECORD_SAMPLE]++;
> +	return 0;
> +}
> +
> +static int show_sample_stat(void)
> +{
> +	struct perf_data_file file = {
> +		.mode  = PERF_DATA_MODE_READ,
> +		.path  = input_name,
> +	};
> +	struct perf_tool tool = {
> +		.sample = process_sample_event,
> +	};
> +	struct perf_session *session;
> +	struct perf_evsel *evsel;
> +	u64 sample_nsec;
> +	int nr_cpus;
> +	int ret;
> +
> +	session = perf_session__new(&file, false, &tool);
> +	if (session == NULL)
> +		return -1;
> +
> +	ret = perf_session__process_events(session);
> +	if (ret < 0)
> +		return ret;
> +
> +	nr_cpus = session->header.env.nr_cpus_online;
> +	sample_nsec = last_sample_nsec - first_sample_nsec;
> +
> +	fprintf(output, "\n Total event stats for '%s' file:\n\n", input_name);
> +	events_stats__fprintf(&session->evlist->stats, output);
> +
> +	fprintf(output, "\n Sample event stats:\n\n");
> +	evlist__for_each(session->evlist, evsel) {
> +		struct hists *hists = evsel__hists(evsel);
> +		struct events_stats *stats = &hists->stats;
> +		u64 total_period = stats->total_period;
> +		u32 nr_samples = stats->nr_events[PERF_RECORD_SAMPLE];
> +
> +		fprintf(output, "%'18"PRIu64"      ", total_period);
> +		fprintf(output, "%-25s", perf_evsel__name(evsel));
> +		fprintf(output, "\n");
> +		fprintf(output, "%'18u      ", nr_samples);
> +		fprintf(output, "%-25s", "samples");
> +
> +		if (sample_nsec && evsel->attr.freq && evsel->attr.sample_freq) {
> +			int expected_rate = evsel->attr.sample_freq;
> +			double sample_rate = 1.0 * nr_samples / nr_cpus;
> +
> +			sample_rate *= (double)NSEC_PER_SEC / sample_nsec;
> +			fprintf(output, " #   sampling ratio  %.3f%% (%d/%d)",
> +				100 * sample_rate / expected_rate,
> +				(int)sample_rate, expected_rate);
> +		}
> +		fprintf(output, "\n\n");
> +	}
> +
> +	fprintf(output, "%18.9f second time sampled\n",
> +		(double)sample_nsec / NSEC_PER_SEC);
> +
> +	perf_session__delete(session);
> +	return 0;
> +}
> +
>  static int setup_events(const char * const *attrs, unsigned len)
>  {
>  	unsigned i;
> @@ -1793,6 +1875,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
>  		   "print counts with custom separator"),
>  	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
>  		     "monitor event in cgroup name only", parse_cgroups),
> +	OPT_STRING(0, "input", &input_name, "file", "input file name"),
>  	OPT_STRING('o', "output", &output_name, "file", "output file name"),
>  	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
>  	OPT_INTEGER(0, "log-fd", &output_fd,
> @@ -1886,8 +1969,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
>  	} else if (big_num_opt == 0) /* User passed --no-big-num */
>  		big_num = false;
>  
> -	if (!argc && target__none(&target))
> -		usage_with_options(stat_usage, options);
> +	if (!argc && target__none(&target)) {
> +		if (!input_name)
> +			usage_with_options(stat_usage, options);
> +		status = show_sample_stat();
> +		goto out;
> +	}
>  
>  	if (run_count < 0) {
>  		pr_err("Run count must be a positive number\n");
> -- 
> 2.3.5
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/PATCH] perf stat: Show sample events stat for a data file
  2015-04-29  7:42 ` Jiri Olsa
@ 2015-04-29  7:58   ` Namhyung Kim
  0 siblings, 0 replies; 7+ messages in thread
From: Namhyung Kim @ 2015-04-29  7:58 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, LKML,
	David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

Hi Jiri,

On Wed, Apr 29, 2015 at 4:42 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> On Wed, Apr 29, 2015 at 04:27:45PM +0900, Namhyung Kim wrote:
>> Add --input option to 'perf stat' so that it can show event stats of the
>> file.  I would like to use a short '-i' option to be compatible with
>> other commands but it was already taken by '--no-inherit' option, so it
>> only supports the long option.
>>
>> The example output looks like below:
>>
>>   $ perf record -a sleep 1
>>   [ perf record: Woken up 1 times to write data ]
>>   [ perf record: Captured and wrote 0.635 MB perf.data (1946 samples) ]
>>
>>   $ perf stat --input perf.data
>
> nice, but maybe I'd see this under 'perf data' command, like:
>
>     $ perf data --stat
>
> 'perf stat' does not deal with sampling..  I think that 'perf data'
> is for perf.data related stuff like:
>
>   perf data convert ...
>   perf data ls
>   perf data --features
>   perf data --stat
>   ...
>
> thoughts? ;-)

Well, I don't mind where this feature goes into.  :)

Actually I first thought to implement this in 'perf report --stat' as
it deals with samples.  But then changed my mind to go with 'perf
stat' because it's the command for showing stat result and the sample
processing required for this feature is trivial.  Also I tried to
match the output format to normal 'perf stat'.

Thanks,
Namhyung


>
> jirka
>
>>
>>     Total event stats for 'perf.data' file:
>>
>>               TOTAL events:       6837
>>                MMAP events:        116
>>                COMM events:        375
>>                EXIT events:          2
>>            THROTTLE events:         12
>>          UNTHROTTLE events:         11
>>                FORK events:        374
>>              SAMPLE events:       1946
>>               MMAP2 events:       3999
>>      FINISHED_ROUND events:          2
>>
>>    Sample event stats:
>>
>>          361,255,234      cycles
>>                1,946      samples                   #   sampling ratio  12.162% (486/4000)
>>
>>          0.998581085 second time sampled
>>
>> The sampling ratio was useful for me to determine how often the event
>> was sampled - in this case the cpu cycles event was only sampled at 12%
>> of the expected sampling frequency, so the system was mostly idle in
>> terms of cpu cycles (or the event was multiplexed in case of recording
>> large number of events at once).
>>
>> The sampling ratio was calulated like below:
>>
>>   expected_freq  = evsel->attr.sample_freq
>>   actual_freq    = (nr_samples / nr_cpus) / sample_time
>>   sampling_ratio = 100 * actual_freq / expected_freq
>>
>> Cc: Minchan Kim <minchan@kernel.org>
>> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
>> ---
>>  tools/perf/builtin-stat.c | 91 +++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 89 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index fd577f725d23..78ded82df4a9 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -58,6 +58,7 @@
>>  #include "util/cpumap.h"
>>  #include "util/thread.h"
>>  #include "util/thread_map.h"
>> +#include "util/hist.h"
>>
>>  #include <stdlib.h>
>>  #include <sys/prctl.h>
>> @@ -1576,6 +1577,87 @@ static int perf_stat_init_aggr_mode(void)
>>       return 0;
>>  }
>>
>> +static u64 first_sample_nsec;
>> +static u64 last_sample_nsec;
>> +
>> +static int process_sample_event(struct perf_tool *tool __maybe_unused,
>> +                             union perf_event *event __maybe_unused,
>> +                             struct perf_sample *sample,
>> +                             struct perf_evsel *evsel,
>> +                             struct machine *machine __maybe_unused)
>> +{
>> +     struct hists *hists = evsel__hists(evsel);
>> +
>> +     if (!first_sample_nsec)
>> +             first_sample_nsec = sample->time;
>> +     last_sample_nsec = sample->time;
>> +
>> +     hists->stats.total_period += sample->period;
>> +     hists->stats.nr_events[PERF_RECORD_SAMPLE]++;
>> +     return 0;
>> +}
>> +
>> +static int show_sample_stat(void)
>> +{
>> +     struct perf_data_file file = {
>> +             .mode  = PERF_DATA_MODE_READ,
>> +             .path  = input_name,
>> +     };
>> +     struct perf_tool tool = {
>> +             .sample = process_sample_event,
>> +     };
>> +     struct perf_session *session;
>> +     struct perf_evsel *evsel;
>> +     u64 sample_nsec;
>> +     int nr_cpus;
>> +     int ret;
>> +
>> +     session = perf_session__new(&file, false, &tool);
>> +     if (session == NULL)
>> +             return -1;
>> +
>> +     ret = perf_session__process_events(session);
>> +     if (ret < 0)
>> +             return ret;
>> +
>> +     nr_cpus = session->header.env.nr_cpus_online;
>> +     sample_nsec = last_sample_nsec - first_sample_nsec;
>> +
>> +     fprintf(output, "\n Total event stats for '%s' file:\n\n", input_name);
>> +     events_stats__fprintf(&session->evlist->stats, output);
>> +
>> +     fprintf(output, "\n Sample event stats:\n\n");
>> +     evlist__for_each(session->evlist, evsel) {
>> +             struct hists *hists = evsel__hists(evsel);
>> +             struct events_stats *stats = &hists->stats;
>> +             u64 total_period = stats->total_period;
>> +             u32 nr_samples = stats->nr_events[PERF_RECORD_SAMPLE];
>> +
>> +             fprintf(output, "%'18"PRIu64"      ", total_period);
>> +             fprintf(output, "%-25s", perf_evsel__name(evsel));
>> +             fprintf(output, "\n");
>> +             fprintf(output, "%'18u      ", nr_samples);
>> +             fprintf(output, "%-25s", "samples");
>> +
>> +             if (sample_nsec && evsel->attr.freq && evsel->attr.sample_freq) {
>> +                     int expected_rate = evsel->attr.sample_freq;
>> +                     double sample_rate = 1.0 * nr_samples / nr_cpus;
>> +
>> +                     sample_rate *= (double)NSEC_PER_SEC / sample_nsec;
>> +                     fprintf(output, " #   sampling ratio  %.3f%% (%d/%d)",
>> +                             100 * sample_rate / expected_rate,
>> +                             (int)sample_rate, expected_rate);
>> +             }
>> +             fprintf(output, "\n\n");
>> +     }
>> +
>> +     fprintf(output, "%18.9f second time sampled\n",
>> +             (double)sample_nsec / NSEC_PER_SEC);
>> +
>> +     perf_session__delete(session);
>> +     return 0;
>> +}
>> +
>>  static int setup_events(const char * const *attrs, unsigned len)
>>  {
>>       unsigned i;
>> @@ -1793,6 +1875,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
>>                  "print counts with custom separator"),
>>       OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
>>                    "monitor event in cgroup name only", parse_cgroups),
>> +     OPT_STRING(0, "input", &input_name, "file", "input file name"),
>>       OPT_STRING('o', "output", &output_name, "file", "output file name"),
>>       OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
>>       OPT_INTEGER(0, "log-fd", &output_fd,
>> @@ -1886,8 +1969,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
>>       } else if (big_num_opt == 0) /* User passed --no-big-num */
>>               big_num = false;
>>
>> -     if (!argc && target__none(&target))
>> -             usage_with_options(stat_usage, options);
>> +     if (!argc && target__none(&target)) {
>> +             if (!input_name)
>> +                     usage_with_options(stat_usage, options);
>> +             status = show_sample_stat();
>> +             goto out;
>> +     }
>>
>>       if (run_count < 0) {
>>               pr_err("Run count must be a positive number\n");
>> --
>> 2.3.5
>>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/PATCH] perf stat: Show sample events stat for a data file
  2015-04-29  7:27 [RFC/PATCH] perf stat: Show sample events stat for a data file Namhyung Kim
  2015-04-29  7:42 ` Jiri Olsa
@ 2015-04-29  9:27 ` Peter Zijlstra
  2015-04-29 11:58   ` Namhyung Kim
  1 sibling, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2015-04-29  9:27 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Jiri Olsa, LKML,
	David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

On Wed, Apr 29, 2015 at 04:27:45PM +0900, Namhyung Kim wrote:
> Add --input option to 'perf stat' so that it can show event stats of the
> file.  I would like to use a short '-i' option to be compatible with
> other commands but it was already taken by '--no-inherit' option, so it
> only supports the long option.
> 
> The example output looks like below:
> 
>   $ perf record -a sleep 1
>   [ perf record: Woken up 1 times to write data ]
>   [ perf record: Captured and wrote 0.635 MB perf.data (1946 samples) ]
> 
>   $ perf stat --input perf.data
> 
>     Total event stats for 'perf.data' file:
> 
>               TOTAL events:       6837
>                MMAP events:        116
>                COMM events:        375
>                EXIT events:          2
>            THROTTLE events:         12
>          UNTHROTTLE events:         11
>                FORK events:        374
>              SAMPLE events:       1946
>               MMAP2 events:       3999
>      FINISHED_ROUND events:          2
> 
>    Sample event stats:
> 
>          361,255,234      cycles
>                1,946      samples                   #   sampling ratio  12.162% (486/4000)
> 
>          0.998581085 second time sampled

It would be nice if this thing could support 'perf record -s' which
enables perf_event_attr::inherit_stat, which in turn results in
PERF_RECORD_READ entries after every PERF_RECORD_EXIT which give the
total number of events for each recorded task.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/PATCH] perf stat: Show sample events stat for a data file
  2015-04-29  9:27 ` Peter Zijlstra
@ 2015-04-29 11:58   ` Namhyung Kim
  2015-05-08 14:46     ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Namhyung Kim @ 2015-04-29 11:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Jiri Olsa, LKML,
	David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

Hi Peter,

On Wed, Apr 29, 2015 at 11:27:51AM +0200, Peter Zijlstra wrote:
> On Wed, Apr 29, 2015 at 04:27:45PM +0900, Namhyung Kim wrote:
> > Add --input option to 'perf stat' so that it can show event stats of the
> > file.  I would like to use a short '-i' option to be compatible with
> > other commands but it was already taken by '--no-inherit' option, so it
> > only supports the long option.
> > 
> > The example output looks like below:
> > 
> >   $ perf record -a sleep 1
> >   [ perf record: Woken up 1 times to write data ]
> >   [ perf record: Captured and wrote 0.635 MB perf.data (1946 samples) ]
> > 
> >   $ perf stat --input perf.data
> > 
> >     Total event stats for 'perf.data' file:
> > 
> >               TOTAL events:       6837
> >                MMAP events:        116
> >                COMM events:        375
> >                EXIT events:          2
> >            THROTTLE events:         12
> >          UNTHROTTLE events:         11
> >                FORK events:        374
> >              SAMPLE events:       1946
> >               MMAP2 events:       3999
> >      FINISHED_ROUND events:          2
> > 
> >    Sample event stats:
> > 
> >          361,255,234      cycles
> >                1,946      samples                   #   sampling ratio  12.162% (486/4000)
> > 
> >          0.998581085 second time sampled
> 
> It would be nice if this thing could support 'perf record -s' which
> enables perf_event_attr::inherit_stat, which in turn results in
> PERF_RECORD_READ entries after every PERF_RECORD_EXIT which give the
> total number of events for each recorded task.

Hmm.. this is one of the areas that I don't understand what's going
on.  It'd be nice if you can help me understand and possibly improve
the doc too.

So the combination of 'perf record -s' and 'perf report -T' should
show some per-thread counter values, right?  I've found a bug on it.

Anyway, I don't understand the numbers it shows.

  $ perf record -s -- perf bench sched messaging
  # Running 'sched/messaging' benchmark:
  # 20 sender and receiver processes per group
  # 10 groups == 400 processes run

       Total time: 0.192 [sec]
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.376 MB perf.data (6806 samples) ]


  $ perf report -T --stdio
  ...
  #  PID   TID           cycles           cycles                cycles                cycles                cycles
    9728  9728          5865527                0                     0                     0                     0
    9686  9686                0                0               5692534                     0                     0
    9650  9650                0                0                     0                     0                     0
    9649  9649          5578209                0                     0                     0                     0
    9685  9685          6227490                0                     0                     0                     0
    9596  9596                0  140483569056856                     0                     0                     0
    9837  9837           120079  140483569056856                     0                     0                     0
    9561  9561                0  140483569056856                     0                     0                     0
    9475  9475                0         46264256                     0                 14012                 14172
  ...

  $ perf stat --input perf.data

   Total event stats for 'perf.data.inherit' file:

             TOTAL events:       9370
              MMAP events:        140
              COMM events:          2
              EXIT events:        401
              FORK events:        400
              READ events:       1600
            SAMPLE events:       6806
             MMAP2 events:         20
    FINISHED_ROUND events:          1

   Sample event stats:

     2,984,107,504      cycles
             6,806      samples                   #   sampling ratio  161.107% (6444/4000)

       0.264032172 second time sampled

So the cycles values on 'perf report -T' are a lot more than the value
of perf stat.  And I have no idea why it shows 5 different cycles for
each thread.  Could you shed some light on this?

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/PATCH] perf stat: Show sample events stat for a data file
  2015-04-29 11:58   ` Namhyung Kim
@ 2015-05-08 14:46     ` Peter Zijlstra
  2015-05-09 14:39       ` Namhyung Kim
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2015-05-08 14:46 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Jiri Olsa, LKML,
	David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

On Wed, Apr 29, 2015 at 08:58:22PM +0900, Namhyung Kim wrote:
> > It would be nice if this thing could support 'perf record -s' which
> > enables perf_event_attr::inherit_stat, which in turn results in
> > PERF_RECORD_READ entries after every PERF_RECORD_EXIT which give the
> > total number of events for each recorded task.
> 
> Hmm.. this is one of the areas that I don't understand what's going
> on.  It'd be nice if you can help me understand and possibly improve
> the doc too.
> 
> So the combination of 'perf record -s' and 'perf report -T' should
> show some per-thread counter values, right?  I've found a bug on it.

Right, they should. perf-record -s should give the event count per
monitored thread, which when summed, should match what perf-stat gives
for that workload.

And agreed on the bug, I can see it too.

> Anyway, I don't understand the numbers it shows.

Me neither. I don't quite know why it has 5 cycles columns, we only
measure the one cycles event.

> 
> So the cycles values on 'perf report -T' are a lot more than the value
> of perf stat.  And I have no idea why it shows 5 different cycles for
> each thread.  Could you shed some light on this?

Sadly no, not making sense. But I can confirm this is what it does.

Lemme go poke at this, its weird.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/PATCH] perf stat: Show sample events stat for a data file
  2015-05-08 14:46     ` Peter Zijlstra
@ 2015-05-09 14:39       ` Namhyung Kim
  0 siblings, 0 replies; 7+ messages in thread
From: Namhyung Kim @ 2015-05-09 14:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Jiri Olsa, LKML,
	David Ahern, Stephane Eranian, Andi Kleen, Minchan Kim

Hi Peter,

On Fri, May 08, 2015 at 04:46:20PM +0200, Peter Zijlstra wrote:
> On Wed, Apr 29, 2015 at 08:58:22PM +0900, Namhyung Kim wrote:
> > > It would be nice if this thing could support 'perf record -s' which
> > > enables perf_event_attr::inherit_stat, which in turn results in
> > > PERF_RECORD_READ entries after every PERF_RECORD_EXIT which give the
> > > total number of events for each recorded task.
> > 
> > Hmm.. this is one of the areas that I don't understand what's going
> > on.  It'd be nice if you can help me understand and possibly improve
> > the doc too.
> > 
> > So the combination of 'perf record -s' and 'perf report -T' should
> > show some per-thread counter values, right?  I've found a bug on it.
> 
> Right, they should. perf-record -s should give the event count per
> monitored thread, which when summed, should match what perf-stat gives
> for that workload.

Hmm.. I still don't understand what's the difference of those values
and the output of 'perf report -s pid --show-total-period'.


> 
> And agreed on the bug, I can see it too.

I even think that 'perf report -T' should enforce to use --stdio (as
it's the only supported mode) and show a warning if used with --sort
or --parent option.  I'll send a patch soon.

> 
> > Anyway, I don't understand the numbers it shows.
> 
> Me neither. I don't quite know why it has 5 cycles columns, we only
> measure the one cycles event.

Right.

> 
> > 
> > So the cycles values on 'perf report -T' are a lot more than the value
> > of perf stat.  And I have no idea why it shows 5 different cycles for
> > each thread.  Could you shed some light on this?
> 
> Sadly no, not making sense. But I can confirm this is what it does.
> 
> Lemme go poke at this, its weird.

OK, so I'll focus on the original event stat feature first, and then
look into the thread stat after you figure out something.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-05-09 14:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-29  7:27 [RFC/PATCH] perf stat: Show sample events stat for a data file Namhyung Kim
2015-04-29  7:42 ` Jiri Olsa
2015-04-29  7:58   ` Namhyung Kim
2015-04-29  9:27 ` Peter Zijlstra
2015-04-29 11:58   ` Namhyung Kim
2015-05-08 14:46     ` Peter Zijlstra
2015-05-09 14:39       ` Namhyung Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.