All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf tool: Fix ppid for synthesized fork events
@ 2015-03-19 17:41 David Ahern
  2015-03-19 20:56 ` Don Zickus
  0 siblings, 1 reply; 11+ messages in thread
From: David Ahern @ 2015-03-19 17:41 UTC (permalink / raw)
  To: acme; +Cc: linux-kernel, David Ahern, Don Zickus, Jiri Olsa

363b785f38 added synthesized fork events and set a thread's parent id
to itself. Since we are already processing /proc/<pid>/status the ppid
can be determined properly. Make it so.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
---
 tools/perf/util/event.c | 94 ++++++++++++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 41 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index d5efa5092ce6..62bf7b71ac7b 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -49,23 +49,26 @@ static struct perf_sample synth_sample = {
 	.period	   = 1,
 };
 
-static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
+static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
+				    pid_t *tgid, pid_t *ppid)
 {
 	char filename[PATH_MAX];
 	char bf[BUFSIZ];
 	FILE *fp;
 	size_t size = 0;
-	pid_t tgid = -1;
+
+	*tgid = -1;
+	*ppid = -1;
 
 	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
 
 	fp = fopen(filename, "r");
 	if (fp == NULL) {
 		pr_debug("couldn't open %s\n", filename);
-		return 0;
+		return -1;
 	}
 
-	while (!comm[0] || (tgid < 0)) {
+	while (!comm[0] || (*tgid < 0) || (*ppid < 0)) {
 		if (fgets(bf, sizeof(bf), fp) == NULL) {
 			pr_warning("couldn't get COMM and pgid, malformed %s\n",
 				   filename);
@@ -86,33 +89,45 @@ static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
 			char *tgids = bf + 5;
 			while (*tgids && isspace(*tgids))
 				++tgids;
-			tgid = atoi(tgids);
+			*tgid = atoi(tgids);
+
+		} else if (memcmp(bf, "PPid:", 5) == 0) {
+			char *ppids = bf + 5;
+
+			while (*ppids && isspace(*ppids))
+				++ppids;
+			*ppid = atoi(ppids);
 		}
 	}
 
 	fclose(fp);
 
-	return tgid;
+	return 0;
 }
 
-static pid_t perf_event__prepare_comm(union perf_event *event, pid_t pid,
-					 struct machine *machine)
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+				    struct machine *machine,
+				    pid_t *tgid, pid_t *ppid)
 {
 	size_t size;
-	pid_t tgid;
+
+	*ppid = -1;
 
 	memset(&event->comm, 0, sizeof(event->comm));
 
-	if (machine__is_host(machine))
-		tgid = perf_event__get_comm_tgid(pid, event->comm.comm,
-						 sizeof(event->comm.comm));
-	else
-		tgid = machine->pid;
+	if (machine__is_host(machine)) {
+		if (perf_event__get_comm_ids(pid, event->comm.comm,
+					     sizeof(event->comm.comm),
+					     tgid, ppid) != 0) {
+			return -1;
+		}
+	} else
+		*tgid = machine->pid;
 
-	if (tgid < 0)
-		goto out;
+	if (*tgid < 0)
+		return -1;
 
-	event->comm.pid = tgid;
+	event->comm.pid = *tgid;
 	event->comm.header.type = PERF_RECORD_COMM;
 
 	size = strlen(event->comm.comm) + 1;
@@ -122,36 +137,35 @@ static pid_t perf_event__prepare_comm(union perf_event *event, pid_t pid,
 				(sizeof(event->comm.comm) - size) +
 				machine->id_hdr_size);
 	event->comm.tid = pid;
-out:
-	return tgid;
+
+	return 0;
 }
 
-static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
-					 union perf_event *event, pid_t pid,
-					 perf_event__handler_t process,
-					 struct machine *machine)
+static int perf_event__synthesize_comm(struct perf_tool *tool,
+				       union perf_event *event, pid_t pid,
+				       perf_event__handler_t process,
+				       struct machine *machine,
+				       pid_t *tgid, pid_t *ppid)
 {
-	pid_t tgid = perf_event__prepare_comm(event, pid, machine);
-
-	if (tgid == -1)
-		goto out;
+	if (perf_event__prepare_comm(event, pid, machine, tgid, ppid) != 0)
+		return -1;
 
 	if (process(tool, event, &synth_sample, machine) != 0)
 		return -1;
 
-out:
-	return tgid;
+	return 0;
 }
 
 static int perf_event__synthesize_fork(struct perf_tool *tool,
-				       union perf_event *event, pid_t pid,
-				       pid_t tgid, perf_event__handler_t process,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid, pid_t ppid,
+				       perf_event__handler_t process,
 				       struct machine *machine)
 {
 	memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
 
-	event->fork.ppid = tgid;
-	event->fork.ptid = tgid;
+	event->fork.ppid = ppid;
+	event->fork.ptid = ppid;
 	event->fork.pid  = tgid;
 	event->fork.tid  = pid;
 	event->fork.header.type = PERF_RECORD_FORK;
@@ -343,14 +357,12 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 	char filename[PATH_MAX];
 	DIR *tasks;
 	struct dirent dirent, *next;
-	pid_t tgid;
+	pid_t tgid, ppid;
 
 	/* special case: only send one comm event using passed in pid */
 	if (!full) {
-		tgid = perf_event__synthesize_comm(tool, comm_event, pid,
-						   process, machine);
-
-		if (tgid == -1)
+		if (perf_event__synthesize_comm(tool, comm_event, pid,
+						process, machine, &tgid, &ppid) != 0)
 			return -1;
 
 		return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
@@ -378,12 +390,12 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 		if (*end)
 			continue;
 
-		tgid = perf_event__prepare_comm(comm_event, _pid, machine);
-		if (tgid == -1)
+		if (perf_event__prepare_comm(comm_event, _pid, machine,
+					     &tgid, &ppid) != 0)
 			return -1;
 
 		if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
-						process, machine) < 0)
+						ppid, process, machine) < 0)
 			return -1;
 		/*
 		 * Send the prepared comm event
-- 
2.2.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-19 17:41 [PATCH] perf tool: Fix ppid for synthesized fork events David Ahern
@ 2015-03-19 20:56 ` Don Zickus
  2015-03-19 21:06   ` David Ahern
  0 siblings, 1 reply; 11+ messages in thread
From: Don Zickus @ 2015-03-19 20:56 UTC (permalink / raw)
  To: David Ahern; +Cc: acme, linux-kernel, Jiri Olsa

On Thu, Mar 19, 2015 at 11:41:15AM -0600, David Ahern wrote:
> 363b785f38 added synthesized fork events and set a thread's parent id
> to itself. Since we are already processing /proc/<pid>/status the ppid
> can be determined properly. Make it so.

Thanks David.  My tester, Joe, is currently running other tests and then is
out until Tuesday.  I will try to provide test feedback by Tuesday or
Wednesday if it can wait.

Cheers,
Don

> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> Cc: Don Zickus <dzickus@redhat.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> ---
>  tools/perf/util/event.c | 94 ++++++++++++++++++++++++++++---------------------
>  1 file changed, 53 insertions(+), 41 deletions(-)
> 
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index d5efa5092ce6..62bf7b71ac7b 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -49,23 +49,26 @@ static struct perf_sample synth_sample = {
>  	.period	   = 1,
>  };
>  
> -static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
> +static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
> +				    pid_t *tgid, pid_t *ppid)
>  {
>  	char filename[PATH_MAX];
>  	char bf[BUFSIZ];
>  	FILE *fp;
>  	size_t size = 0;
> -	pid_t tgid = -1;
> +
> +	*tgid = -1;
> +	*ppid = -1;
>  
>  	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
>  
>  	fp = fopen(filename, "r");
>  	if (fp == NULL) {
>  		pr_debug("couldn't open %s\n", filename);
> -		return 0;
> +		return -1;
>  	}
>  
> -	while (!comm[0] || (tgid < 0)) {
> +	while (!comm[0] || (*tgid < 0) || (*ppid < 0)) {
>  		if (fgets(bf, sizeof(bf), fp) == NULL) {
>  			pr_warning("couldn't get COMM and pgid, malformed %s\n",
>  				   filename);
> @@ -86,33 +89,45 @@ static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
>  			char *tgids = bf + 5;
>  			while (*tgids && isspace(*tgids))
>  				++tgids;
> -			tgid = atoi(tgids);
> +			*tgid = atoi(tgids);
> +
> +		} else if (memcmp(bf, "PPid:", 5) == 0) {
> +			char *ppids = bf + 5;
> +
> +			while (*ppids && isspace(*ppids))
> +				++ppids;
> +			*ppid = atoi(ppids);
>  		}
>  	}
>  
>  	fclose(fp);
>  
> -	return tgid;
> +	return 0;
>  }
>  
> -static pid_t perf_event__prepare_comm(union perf_event *event, pid_t pid,
> -					 struct machine *machine)
> +static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
> +				    struct machine *machine,
> +				    pid_t *tgid, pid_t *ppid)
>  {
>  	size_t size;
> -	pid_t tgid;
> +
> +	*ppid = -1;
>  
>  	memset(&event->comm, 0, sizeof(event->comm));
>  
> -	if (machine__is_host(machine))
> -		tgid = perf_event__get_comm_tgid(pid, event->comm.comm,
> -						 sizeof(event->comm.comm));
> -	else
> -		tgid = machine->pid;
> +	if (machine__is_host(machine)) {
> +		if (perf_event__get_comm_ids(pid, event->comm.comm,
> +					     sizeof(event->comm.comm),
> +					     tgid, ppid) != 0) {
> +			return -1;
> +		}
> +	} else
> +		*tgid = machine->pid;
>  
> -	if (tgid < 0)
> -		goto out;
> +	if (*tgid < 0)
> +		return -1;
>  
> -	event->comm.pid = tgid;
> +	event->comm.pid = *tgid;
>  	event->comm.header.type = PERF_RECORD_COMM;
>  
>  	size = strlen(event->comm.comm) + 1;
> @@ -122,36 +137,35 @@ static pid_t perf_event__prepare_comm(union perf_event *event, pid_t pid,
>  				(sizeof(event->comm.comm) - size) +
>  				machine->id_hdr_size);
>  	event->comm.tid = pid;
> -out:
> -	return tgid;
> +
> +	return 0;
>  }
>  
> -static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
> -					 union perf_event *event, pid_t pid,
> -					 perf_event__handler_t process,
> -					 struct machine *machine)
> +static int perf_event__synthesize_comm(struct perf_tool *tool,
> +				       union perf_event *event, pid_t pid,
> +				       perf_event__handler_t process,
> +				       struct machine *machine,
> +				       pid_t *tgid, pid_t *ppid)
>  {
> -	pid_t tgid = perf_event__prepare_comm(event, pid, machine);
> -
> -	if (tgid == -1)
> -		goto out;
> +	if (perf_event__prepare_comm(event, pid, machine, tgid, ppid) != 0)
> +		return -1;
>  
>  	if (process(tool, event, &synth_sample, machine) != 0)
>  		return -1;
>  
> -out:
> -	return tgid;
> +	return 0;
>  }
>  
>  static int perf_event__synthesize_fork(struct perf_tool *tool,
> -				       union perf_event *event, pid_t pid,
> -				       pid_t tgid, perf_event__handler_t process,
> +				       union perf_event *event,
> +				       pid_t pid, pid_t tgid, pid_t ppid,
> +				       perf_event__handler_t process,
>  				       struct machine *machine)
>  {
>  	memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
>  
> -	event->fork.ppid = tgid;
> -	event->fork.ptid = tgid;
> +	event->fork.ppid = ppid;
> +	event->fork.ptid = ppid;
>  	event->fork.pid  = tgid;
>  	event->fork.tid  = pid;
>  	event->fork.header.type = PERF_RECORD_FORK;
> @@ -343,14 +357,12 @@ static int __event__synthesize_thread(union perf_event *comm_event,
>  	char filename[PATH_MAX];
>  	DIR *tasks;
>  	struct dirent dirent, *next;
> -	pid_t tgid;
> +	pid_t tgid, ppid;
>  
>  	/* special case: only send one comm event using passed in pid */
>  	if (!full) {
> -		tgid = perf_event__synthesize_comm(tool, comm_event, pid,
> -						   process, machine);
> -
> -		if (tgid == -1)
> +		if (perf_event__synthesize_comm(tool, comm_event, pid,
> +						process, machine, &tgid, &ppid) != 0)
>  			return -1;
>  
>  		return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
> @@ -378,12 +390,12 @@ static int __event__synthesize_thread(union perf_event *comm_event,
>  		if (*end)
>  			continue;
>  
> -		tgid = perf_event__prepare_comm(comm_event, _pid, machine);
> -		if (tgid == -1)
> +		if (perf_event__prepare_comm(comm_event, _pid, machine,
> +					     &tgid, &ppid) != 0)
>  			return -1;
>  
>  		if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
> -						process, machine) < 0)
> +						ppid, process, machine) < 0)
>  			return -1;
>  		/*
>  		 * Send the prepared comm event
> -- 
> 2.2.1
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-19 20:56 ` Don Zickus
@ 2015-03-19 21:06   ` David Ahern
  2015-03-20 14:01     ` Arnaldo Carvalho de Melo
  2015-03-24 20:10     ` Don Zickus
  0 siblings, 2 replies; 11+ messages in thread
From: David Ahern @ 2015-03-19 21:06 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, linux-kernel, Jiri Olsa

On 3/19/15 2:56 PM, Don Zickus wrote:
> On Thu, Mar 19, 2015 at 11:41:15AM -0600, David Ahern wrote:
>> >363b785f38 added synthesized fork events and set a thread's parent id
>> >to itself. Since we are already processing/proc/<pid>/status the ppid
>> >can be determined properly. Make it so.
> Thanks David.  My tester, Joe, is currently running other tests and then is
> out until Tuesday.  I will try to provide test feedback by Tuesday or
> Wednesday if it can wait.

ok. thanks for the heads up.

David

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-19 21:06   ` David Ahern
@ 2015-03-20 14:01     ` Arnaldo Carvalho de Melo
  2015-03-24 20:10     ` Don Zickus
  1 sibling, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2015-03-20 14:01 UTC (permalink / raw)
  To: David Ahern; +Cc: Don Zickus, linux-kernel, Jiri Olsa

Em Thu, Mar 19, 2015 at 03:06:46PM -0600, David Ahern escreveu:
> On 3/19/15 2:56 PM, Don Zickus wrote:
> >On Thu, Mar 19, 2015 at 11:41:15AM -0600, David Ahern wrote:
> >>>363b785f38 added synthesized fork events and set a thread's parent id
> >>>to itself. Since we are already processing/proc/<pid>/status the ppid
> >>>can be determined properly. Make it so.
> >Thanks David.  My tester, Joe, is currently running other tests and then is
> >out until Tuesday.  I will try to provide test feedback by Tuesday or
> >Wednesday if it can wait.
> 
> ok. thanks for the heads up.

Was almost applying it, will wait.

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-19 21:06   ` David Ahern
  2015-03-20 14:01     ` Arnaldo Carvalho de Melo
@ 2015-03-24 20:10     ` Don Zickus
  2015-03-24 21:12       ` David Ahern
  1 sibling, 1 reply; 11+ messages in thread
From: Don Zickus @ 2015-03-24 20:10 UTC (permalink / raw)
  To: David Ahern; +Cc: acme, linux-kernel, Jiri Olsa, jmario

On Thu, Mar 19, 2015 at 03:06:46PM -0600, David Ahern wrote:
> On 3/19/15 2:56 PM, Don Zickus wrote:
> >On Thu, Mar 19, 2015 at 11:41:15AM -0600, David Ahern wrote:
> >>>363b785f38 added synthesized fork events and set a thread's parent id
> >>>to itself. Since we are already processing/proc/<pid>/status the ppid
> >>>can be determined properly. Make it so.
> >Thanks David.  My tester, Joe, is currently running other tests and then is
> >out until Tuesday.  I will try to provide test feedback by Tuesday or
> >Wednesday if it can wait.
> 
> ok. thanks for the heads up.

Hmm, preliminary tests, show a significant slow down in perf record and perf
report.  I would be against this patch for now.  We will dig into what the
problem is.

Joe is running a specjbb bench with lots of threads in the background and
running:

perf mem record -a -e 'cpu/mem-loads,ldlat=50/pp' -e 'cpu/mem-stores/pp' sleep 10


multiple times to get an average.  And also

perf mem report --stdio

to get that timing average too..

He does this with and without the patch.  The difference is usually over 50%
extra time with the patch for both the record timings and report timings. :-(

Cheers,
Don

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-24 20:10     ` Don Zickus
@ 2015-03-24 21:12       ` David Ahern
  2015-03-25 12:22         ` Joe Mario
  0 siblings, 1 reply; 11+ messages in thread
From: David Ahern @ 2015-03-24 21:12 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, linux-kernel, Jiri Olsa, jmario

On 3/24/15 2:10 PM, Don Zickus wrote:
> He does this with and without the patch.  The difference is usually over 50%
> extra time with the patch for both the record timings and report timings.:-(

I find that shocking. The patch only populates ppid and ptid with a 
value read from the file that is already opened and processed. Most of 
the patch is just plumbing the value from the low level function that 
processes the status file back to synthesize_fork.

What benchmark is this? Is it something I can download and run? if so, 
details? site, command, args?

Thanks,
David

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-24 21:12       ` David Ahern
@ 2015-03-25 12:22         ` Joe Mario
  2015-03-25 13:24           ` Arnaldo Carvalho de Melo
  2015-03-25 16:57           ` David Ahern
  0 siblings, 2 replies; 11+ messages in thread
From: Joe Mario @ 2015-03-25 12:22 UTC (permalink / raw)
  To: David Ahern, Don Zickus; +Cc: acme, linux-kernel, Jiri Olsa

On 03/24/2015 05:12 PM, David Ahern wrote:
> On 3/24/15 2:10 PM, Don Zickus wrote:
>> He does this with and without the patch.  The difference is usually over 50%
>> extra time with the patch for both the record timings and report timings.:-(
>
> I find that shocking. The patch only populates ppid and ptid with a value read from the file that is already opened and processed. Most of the patch is just plumbing the value from the low level function that processes the status file back to synthesize_fork.
>
> What benchmark is this? Is it something I can download and run? if so, details? site, command, args?
>
> Thanks,
> David

Hi David:

We ran "time perf mem record -a -e cpu/mem-loads,ldlat=50/pp -e cpu/mem-stores/pp sleep 10" on a system that was running SPECjbb2013 in the background.  There were about 10,000 java threads with about 500 to 800 in a runnable state at any given time.   We ran it on a 4 socket x86 IVB server.

We had two perf binaries.  One with your patch and one without it.  Because the benchmark doesn't always have a constant load, we ran the above perf command in a loop alternating between the patched and unpatched version.   The elapsed wall clock times ("real" field from time) for the perf with your patch was typically >= 50% longer than the equivalent unpatched perf.

We can't give out a copy of the SPEC benchmark (part of the SPEC agreement).  Try to find some application (or create your own) to load the system with lots of busy threads.

Joe

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-25 12:22         ` Joe Mario
@ 2015-03-25 13:24           ` Arnaldo Carvalho de Melo
  2015-03-25 15:03             ` David Ahern
  2015-03-25 16:57           ` David Ahern
  1 sibling, 1 reply; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2015-03-25 13:24 UTC (permalink / raw)
  To: Joe Mario; +Cc: David Ahern, Don Zickus, linux-kernel, Jiri Olsa

Em Wed, Mar 25, 2015 at 08:22:34AM -0400, Joe Mario escreveu:
> On 03/24/2015 05:12 PM, David Ahern wrote:
> >On 3/24/15 2:10 PM, Don Zickus wrote:
> >>He does this with and without the patch.  The difference is usually
> >>over 50% extra time with the patch for both the record timings and
> >>report timings.:-(

> >I find that shocking. The patch only populates ppid and ptid with a
> >value read from the file that is already opened and processed. Most
> >of the patch is just plumbing the value from the low level function
> >that processes the status file back to synthesize_fork.

> >What benchmark is this? Is it something I can download and run? if so, details? site, command, args?
 
> Hi David:
 
> We ran "time perf mem record -a -e cpu/mem-loads,ldlat=50/pp -e
> cpu/mem-stores/pp sleep 10" on a system that was running SPECjbb2013
> in the background.  There were about 10,000 java threads with about
> 500 to 800 in a runnable state at any given time.   We ran it on a 4
> socket x86 IVB server.

So it starts when there are tons of threads in the system, for which
synthezing from /proc will have to take place, without looking again at
that patch, I can't think about what would be a problem :-\
 
> We had two perf binaries.  One with your patch and one without it.
> Because the benchmark doesn't always have a constant load, we ran the
> above perf command in a loop alternating between the patched and
> unpatched version.   The elapsed wall clock times ("real" field from
> time) for the perf with your patch was typically >= 50% longer than
> the equivalent unpatched perf.
 
> We can't give out a copy of the SPEC benchmark (part of the SPEC
> agreement).  Try to find some application (or create your own) to load
> the system with lots of busy threads.

Haven't tried, but head this is something that could perhaps be useful
here:

http://kernel.ubuntu.com/~cking/stress-ng/

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-25 13:24           ` Arnaldo Carvalho de Melo
@ 2015-03-25 15:03             ` David Ahern
  2015-03-25 19:20               ` Don Zickus
  0 siblings, 1 reply; 11+ messages in thread
From: David Ahern @ 2015-03-25 15:03 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Joe Mario; +Cc: Don Zickus, linux-kernel, Jiri Olsa

On 3/25/15 7:24 AM, Arnaldo Carvalho de Melo wrote:
> So it starts when there are tons of threads in the system, for which
> synthezing from /proc will have to take place, without looking again at
> that patch, I can't think about what would be a problem :-\

3 extra lines are read from /proc/pid/status:

Name:	bash
State:	S (sleeping)
Tgid:	6046

< current patch breaks here>

Ngid:	0
Pid:	6046
PPid:	6045

< my patch reads these 3 lines, repeats the memcmp and does an atoi on 
the PPid: value >

Let me remove that loop by reading in 4k at a time and making a single 
pass. That should bring down the overhead, but filling in the ppid will 
add some.

David


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-25 12:22         ` Joe Mario
  2015-03-25 13:24           ` Arnaldo Carvalho de Melo
@ 2015-03-25 16:57           ` David Ahern
  1 sibling, 0 replies; 11+ messages in thread
From: David Ahern @ 2015-03-25 16:57 UTC (permalink / raw)
  To: Joe Mario, Don Zickus; +Cc: acme, linux-kernel, Jiri Olsa

On 3/25/15 6:22 AM, Joe Mario wrote:
> We ran "time perf mem record -a -e cpu/mem-loads,ldlat=50/pp -e
> cpu/mem-stores/pp sleep 10" on a system that was running SPECjbb2013 in
> the background.  There were about 10,000 java threads with about 500 to
> 800 in a runnable state at any given time.   We ran it on a 4 socket x86
> IVB server.
>
> We had two perf binaries.  One with your patch and one without it.
> Because the benchmark doesn't always have a constant load, we ran the
> above perf command in a loop alternating between the patched and
> unpatched version.   The elapsed wall clock times ("real" field from
> time) for the perf with your patch was typically >= 50% longer than the
> equivalent unpatched perf.

Sent a v2 with performance numbers on my end.

Adding -BN to the record removes processing of the events for build-ids. 
I also chose to use -e cpu-clock -F 1000 with -- usleep 1 to trim what 
perf-record is doing to *only* reading /proc files and generating COMM 
and FORK events.

David


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tool: Fix ppid for synthesized fork events
  2015-03-25 15:03             ` David Ahern
@ 2015-03-25 19:20               ` Don Zickus
  0 siblings, 0 replies; 11+ messages in thread
From: Don Zickus @ 2015-03-25 19:20 UTC (permalink / raw)
  To: David Ahern; +Cc: Arnaldo Carvalho de Melo, Joe Mario, linux-kernel, Jiri Olsa

On Wed, Mar 25, 2015 at 09:03:38AM -0600, David Ahern wrote:
> On 3/25/15 7:24 AM, Arnaldo Carvalho de Melo wrote:
> >So it starts when there are tons of threads in the system, for which
> >synthezing from /proc will have to take place, without looking again at
> >that patch, I can't think about what would be a problem :-\
> 
> 3 extra lines are read from /proc/pid/status:
> 
> Name:	bash
> State:	S (sleeping)
> Tgid:	6046
> 
> < current patch breaks here>
> 
> Ngid:	0
> Pid:	6046
> PPid:	6045
> 
> < my patch reads these 3 lines, repeats the memcmp and does an atoi
> on the PPid: value >

Ah, I read my email out of order.  So you figured out the extra latency.
Nice.  In that case I think I am ok with your V2, though I still think split
it out would make sense.

Sorry about that.  Thanks for quickly debugging that!!

Cheers,
Don

> 
> Let me remove that loop by reading in 4k at a time and making a
> single pass. That should bring down the overhead, but filling in the
> ppid will add some.
> 
> David
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-03-25 19:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-19 17:41 [PATCH] perf tool: Fix ppid for synthesized fork events David Ahern
2015-03-19 20:56 ` Don Zickus
2015-03-19 21:06   ` David Ahern
2015-03-20 14:01     ` Arnaldo Carvalho de Melo
2015-03-24 20:10     ` Don Zickus
2015-03-24 21:12       ` David Ahern
2015-03-25 12:22         ` Joe Mario
2015-03-25 13:24           ` Arnaldo Carvalho de Melo
2015-03-25 15:03             ` David Ahern
2015-03-25 19:20               ` Don Zickus
2015-03-25 16:57           ` David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.