All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>, linux-kernel@vger.kernel.org
Cc: Paul Mackerras <paulus@samba.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 09/15] perf_counter tools: optionally scale counter values in perfstat mode
Date: Mon, 30 Mar 2009 19:07:10 +0200	[thread overview]
Message-ID: <20090330171023.871484899@chello.nl> (raw)
In-Reply-To: 20090330170701.856843742@chello.nl

[-- Attachment #1: paulus-perf_counter_tools-optionally_scale_counter_values_in_perfstat_mode.patch --]
[-- Type: text/plain, Size: 5885 bytes --]

From: Paul Mackerras <paulus@samba.org>

Impact: new functionality

This adds add an option to the perfstat mode of kerneltop to scale the
reported counter values according to the fraction of time that each
counter gets to count.  This is invoked with the -l option (I used 'l'
because s, c, a and e were all taken already.)  This uses the new
PERF_RECORD_TOTAL_TIME_{ENABLED,RUNNING} read format options.

With this, we get output like this:

$ ./perfstat -l -e 0:0,0:1,0:2,0:3,0:4,0:5 ./spin

 Performance counter stats for './spin':

     4016072055  CPU cycles           (events)  (scaled from 66.53%)
     2005887318  instructions         (events)  (scaled from 66.53%)
        1762849  cache references     (events)  (scaled from 66.69%)
         165229  cache misses         (events)  (scaled from 66.85%)
     1001298009  branches             (events)  (scaled from 66.78%)
          41566  branch misses        (events)  (scaled from 66.61%)

 Wall-clock time elapsed:  2438.227446 msecs

This also lets us detect when a counter is zero because the counter
never got to go on the CPU at all.  In that case we print <not counted>
rather than 0.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 Documentation/perf_counter/kerneltop.c |   56 ++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 11 deletions(-)

Index: linux-2.6/Documentation/perf_counter/kerneltop.c
===================================================================
--- linux-2.6.orig/Documentation/perf_counter/kerneltop.c
+++ linux-2.6/Documentation/perf_counter/kerneltop.c
@@ -197,6 +197,8 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
+static int			scale;
+
 struct source_line {
 	uint64_t		EIP;
 	unsigned long		count;
@@ -305,6 +307,7 @@ static void display_perfstat_help(void)
 	display_events_help();
 
 	printf(
+	" -l                           # scale counter values\n"
 	" -a                           # system-wide collection\n");
 	exit(0);
 }
@@ -328,6 +331,7 @@ static void display_help(void)
 	" -c CNT    --count=CNT        # event period to sample\n\n"
 	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
 	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
+	" -l                           # show scale factor for RR events\n"
 	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
 	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
 	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
@@ -436,6 +440,9 @@ static void create_perfstat_counter(int 
 	hw_event.config		= event_id[counter];
 	hw_event.record_type	= PERF_RECORD_SIMPLE;
 	hw_event.nmi		= 0;
+	if (scale)
+		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
+					  PERF_FORMAT_TOTAL_TIME_RUNNING;
 
 	if (system_wide) {
 		int cpu;
@@ -507,28 +514,53 @@ int do_perfstat(int argc, char *argv[])
 	fprintf(stderr, "\n");
 
 	for (counter = 0; counter < nr_counters; counter++) {
-		int cpu;
-		__u64 count, single_count;
+		int cpu, nv;
+		__u64 count[3], single_count[3];
+		int scaled;
 
-		count = 0;
+		count[0] = count[1] = count[2] = 0;
+		nv = scale ? 3 : 1;
 		for (cpu = 0; cpu < nr_cpus; cpu ++) {
 			res = read(fd[cpu][counter],
-					(char *) &single_count, sizeof(single_count));
-			assert(res == sizeof(single_count));
-			count += single_count;
+				   single_count, nv * sizeof(__u64));
+			assert(res == nv * sizeof(__u64));
+
+			count[0] += single_count[0];
+			if (scale) {
+				count[1] += single_count[1];
+				count[2] += single_count[2];
+			}
+		}
+
+		scaled = 0;
+		if (scale) {
+			if (count[2] == 0) {
+				fprintf(stderr, " %14s  %-20s\n",
+					"<not counted>", event_name(counter));
+				continue;
+			}
+			if (count[2] < count[1]) {
+				scaled = 1;
+				count[0] = (unsigned long long)
+					((double)count[0] * count[1] / count[2] + 0.5);
+			}
 		}
 
 		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
 		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
 
-			double msecs = (double)count / 1000000;
+			double msecs = (double)count[0] / 1000000;
 
-			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
+			fprintf(stderr, " %14.6f  %-20s (msecs)",
 				msecs, event_name(counter));
 		} else {
-			fprintf(stderr, " %14Ld  %-20s (events)\n",
-				count, event_name(counter));
+			fprintf(stderr, " %14Ld  %-20s (events)",
+				count[0], event_name(counter));
 		}
+		if (scaled)
+			fprintf(stderr, "  (scaled from %.2f%%)",
+				(double) count[2] / count[1] * 100);
+		fprintf(stderr, "\n");
 	}
 	fprintf(stderr, "\n");
 	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
@@ -1049,6 +1081,7 @@ static void process_options(int argc, ch
 			{"filter",	required_argument,	NULL, 'f'},
 			{"group",	required_argument,	NULL, 'g'},
 			{"help",	no_argument,		NULL, 'h'},
+			{"scale",	no_argument,		NULL, 'l'},
 			{"nmi",		required_argument,	NULL, 'n'},
 			{"pid",		required_argument,	NULL, 'p'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
@@ -1060,7 +1093,7 @@ static void process_options(int argc, ch
 			{"munmap_info",	no_argument,		NULL, 'U'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1084,6 +1117,7 @@ static void process_options(int argc, ch
 		case 'f': count_filter			=   atoi(optarg); break;
 		case 'g': group				=   atoi(optarg); break;
 		case 'h':      				  display_help(); break;
+		case 'l': scale				=	       1; break;
 		case 'n': nmi				=   atoi(optarg); break;
 		case 'p':
 			/* CPU and PID are mutually exclusive */

-- 


  parent reply	other threads:[~2009-03-30 17:13 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-30 17:07 [PATCH 00/15] pending perf_counter bits Peter Zijlstra
2009-03-30 17:07 ` [PATCH 01/15] perf_counter: unify and fix delayed counter wakeup Peter Zijlstra
2009-03-31  5:45   ` Paul Mackerras
2009-03-31  6:37     ` Peter Zijlstra
2009-04-01  9:04       ` Ingo Molnar
2009-04-01 10:12   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 02/15] perf_counter: fix update_userpage() Peter Zijlstra
2009-04-01 10:12   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 03/15] perf_counter: kerneltop: simplify data_head read Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 04/15] perf_counter: executable mmap() information Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 05/15] perf_counter: kerneltop: parse the mmap data stream Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 06/15] perf_counter: powerpc: only reserve PMU hardware when we need it Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Paul Mackerras
2009-03-30 17:07 ` [PATCH 07/15] perf_counter: make it possible for hw_perf_counter_init to return error codes Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Paul Mackerras
2009-03-30 17:07 ` [PATCH 08/15] perf_counter: x86: proper error propagation for the x86 hw_perf_counter_init() Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` Peter Zijlstra [this message]
2009-04-01  9:39   ` [PATCH 09/15] perf_counter tools: optionally scale counter values in perfstat mode Ingo Molnar
2009-04-01 10:50     ` Paul Mackerras
2009-04-01 10:14   ` [tip:perfcounters/core] " Paul Mackerras
2009-03-30 17:07 ` [PATCH 10/15] perf_counter: small cleanup of the output routines Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 11/15] perf_counter: re-arrange the perf_event_type Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 12/15] pref_counter: kerneltop: update event_types Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] perf_counter tools: " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 13/15] perf_counter: provide generic callchain bits Peter Zijlstra
2009-03-31  6:12   ` Paul Mackerras
2009-03-31  6:39     ` Peter Zijlstra
2009-03-31  7:24       ` Corey Ashford
2009-03-31  8:43         ` Peter Zijlstra
2009-03-31  9:09           ` Paul Mackerras
2009-03-31  9:12         ` Paul Mackerras
2009-03-31 14:00           ` Peter Zijlstra
2009-03-31 17:11             ` Corey Ashford
2009-04-01  3:48             ` Paul Mackerras
2009-04-01  7:59               ` Peter Zijlstra
2009-04-01  8:45                 ` Paul Mackerras
2009-04-01 10:00                   ` Ingo Molnar
2009-04-01 11:53                     ` Paul Mackerras
2009-04-01 23:25                 ` Corey Ashford
2009-04-02  6:43                   ` Peter Zijlstra
2009-04-02  7:41                     ` Peter Zijlstra
2009-04-02  9:10                       ` Paul Mackerras
2009-04-02  9:14                         ` Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 14/15] perf_counter: x86: callchain support Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 15/15] perf_counter: pmc arbitration Peter Zijlstra
2009-04-01 10:15   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-31  5:43 ` [PATCH 00/15] pending perf_counter bits Paul Mackerras

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090330171023.871484899@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.