linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] trace-cmd profile enhancements
@ 2015-05-08 19:28 Josef Bacik
  2015-05-08 19:29 ` [PATCH 1/5] trace-cmd: add option to group like comms for profile Josef Bacik
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Josef Bacik @ 2015-05-08 19:28 UTC (permalink / raw)
  To: rostedt, linux-kernel, kernel-team

I had to use trace-cmd profile to debug a regression with hhvm recently.  These
are the patches I needed to make trace-cmd profile useful to me.  There are a
few changes in here

1) Adding --merge to trace-cmd profile.  This merges events for like comms
together.  HHVM is a huge app that spawns worker threads to handle requests, I
needed all of this data in one place to see how the app as a whole was behaving,
--merge provides this.

2) Fixup the output formatting.  I also wanted to see which syscalls were taking
the longest but this was hard because we were just spitting out the syscall
number and it wasn't in any order, so now all the events are ordered based on
time taken and the syscall names are printed instead of the number.

3) Random trace-cmd hist fix, because it didn't work right with instances.

I've been using these in production for a couple of weeks now and they seem to
work well.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/5] trace-cmd: add option to group like comms for profile
  2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
@ 2015-05-08 19:29 ` Josef Bacik
  2015-05-20 17:31   ` Steven Rostedt
  2015-05-08 19:29 ` [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge Josef Bacik
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Josef Bacik @ 2015-05-08 19:29 UTC (permalink / raw)
  To: rostedt, linux-kernel, kernel-team

When profiling large applications that have multiple children we really want to
see the profile of the app all together.  Add a --merge option to accomplish
this for profile.  This matches events based on comm instead of PID, this way we
can easilly see what a giant app is doing, such as HHVM.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 trace-hash-local.h |   7 ++
 trace-local.h      |   1 +
 trace-profile.c    | 207 +++++++++++++++++++++++++++++++++++++++++++++++++----
 trace-record.c     |   5 ++
 trace-usage.c      |   1 +
 5 files changed, 208 insertions(+), 13 deletions(-)

diff --git a/trace-hash-local.h b/trace-hash-local.h
index 997b11c..5bae3f3 100644
--- a/trace-hash-local.h
+++ b/trace-hash-local.h
@@ -48,4 +48,11 @@ static inline unsigned int trace_hash(int val)
 	return hash;
 }
 
+static inline unsigned int trace_hash_str(char *str)
+{
+	int val = 0;
+
+	while (*str) val += (int)*(str++);
+	return trace_hash(val);
+}
 #endif /* _TRACE_HASH_LOCAL_H */
diff --git a/trace-local.h b/trace-local.h
index ab2bd34..50f36ba 100644
--- a/trace-local.h
+++ b/trace-local.h
@@ -81,6 +81,7 @@ int trace_profile_record(struct tracecmd_input *handle,
 void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hooks,
 			int global);
 int trace_profile(void);
+void trace_profile_merge(void);
 
 struct tracecmd_input *
 trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus,
diff --git a/trace-profile.c b/trace-profile.c
index f71a7c6..bd775e8 100644
--- a/trace-profile.c
+++ b/trace-profile.c
@@ -34,6 +34,7 @@
 #define start_from_item(item)	container_of(item, struct start_data, hash)
 #define event_from_item(item)	container_of(item, struct event_hash, hash)
 #define stack_from_item(item)	container_of(item, struct stack_data, hash)
+#define group_from_item(item)	container_of(item, struct group_data, hash)
 #define event_data_from_item(item)	container_of(item, struct event_data, hash)
 
 static unsigned long long nsecs_per_sec(unsigned long long ts)
@@ -141,6 +142,12 @@ struct event_hash {
 	struct trace_hash	stacks;
 };
 
+struct group_data {
+	struct trace_hash_item	hash;
+	char			*comm;
+	struct trace_hash	event_hash;
+};
+
 struct task_data {
 	struct trace_hash_item	hash;
 	int			pid;
@@ -156,6 +163,7 @@ struct task_data {
 	struct event_hash	*last_event;
 	struct pevent_record	*last_stack;
 	struct handle_data	*handle;
+	struct group_data	*group;
 };
 
 struct cpu_info {
@@ -173,6 +181,7 @@ struct handle_data {
 	struct pevent		*pevent;
 
 	struct trace_hash	events;
+	struct trace_hash	group_hash;
 
 	struct cpu_info		**cpu_data;
 
@@ -196,6 +205,12 @@ struct handle_data {
 
 static struct handle_data *handles;
 static struct event_data *stacktrace_event;
+static bool merge_like_comms = false;
+
+void trace_profile_merge(void)
+{
+	merge_like_comms = true;
+}
 
 static struct start_data *
 add_start(struct task_data *task,
@@ -250,6 +265,7 @@ static struct event_hash *
 find_event_hash(struct task_data *task, struct event_data_match *edata)
 {
 	struct event_hash *event_hash;
+	struct trace_hash *hash;
 	struct trace_hash_item *item;
 	unsigned long long key;
 
@@ -257,7 +273,11 @@ find_event_hash(struct task_data *task, struct event_data_match *edata)
 		(unsigned long)edata->search_val +
 		(unsigned long)edata->val;
 	key = trace_hash(key);
-	item = trace_hash_find(&task->event_hash, key, match_event, edata);
+	if (task->group)
+		hash = &task->group->event_hash;
+	else
+		hash = &task->event_hash;
+	item = trace_hash_find(hash, key, match_event, edata);
 	if (item)
 		return event_from_item(item);
 
@@ -270,7 +290,7 @@ find_event_hash(struct task_data *task, struct event_data_match *edata)
 	event_hash->hash.key = key;
 	trace_hash_init(&event_hash->stacks, 32);
 
-	trace_hash_add(&task->event_hash, &event_hash->hash);
+	trace_hash_add(hash, &event_hash->hash);
 
 	return event_hash;
 }
@@ -496,9 +516,59 @@ find_task(struct handle_data *h, int pid)
 	return last_task;
 }
 
+static int match_group(struct trace_hash_item *item, void *data)
+{
+	struct group_data *group = group_from_item(item);
+
+	return strcmp(group->comm, (char *)data) == 0;
+}
+
+static void add_group(struct handle_data *h, struct task_data *task)
+{
+	unsigned long long key;
+	struct trace_hash_item *item;
+	void *data = task->comm;
+
+	if (!merge_like_comms)
+		return;
+	if (task->group || !task->comm)
+		return;
+
+	key = trace_hash_str(task->comm);
+
+	item = trace_hash_find(&h->group_hash, key, match_group, data);
+	if (item) {
+		task->group = group_from_item(item);
+	} else {
+		struct group_data *grp;
+		struct trace_hash_item **bucket;
+
+		grp = malloc_or_die(sizeof(*grp));
+		memset(grp, 0, sizeof(*grp));
+
+		grp->comm = strdup(task->comm);
+		if (!grp->comm)
+			die("strdup");
+		grp->hash.key = key;
+		trace_hash_add(&h->group_hash, &grp->hash);
+		task->group = grp;
+		trace_hash_init(&grp->event_hash, 32);
+
+		trace_hash_for_each_bucket(bucket, &task->event_hash) {
+			trace_hash_while_item(item, bucket) {
+				struct event_hash *event_hash;
+
+				event_hash = event_from_item(item);
+				trace_hash_del(&event_hash->hash);
+				trace_hash_add(&grp->event_hash, &event_hash->hash);
+			}
+		}
+	}
+}
+
 static void
-add_task_comm(struct task_data *task, struct format_field *field,
-	      struct pevent_record *record)
+add_task_comm(struct handle_data *h, struct task_data *task,
+	      struct format_field *field, struct pevent_record *record)
 {
 	const char *comm;
 
@@ -506,6 +576,7 @@ add_task_comm(struct task_data *task, struct format_field *field,
 	comm = record->data + field->offset;
 	memcpy(task->comm, comm, field->size);
 	task->comm[field->size] = 0;
+	add_group(h, task);
 }
 
 /* Account for tasks that don't have starts */
@@ -932,7 +1003,7 @@ static int handle_sched_switch_event(struct handle_data *h,
 
 	task = find_task(h, prev_pid);
 	if (!task->comm)
-		add_task_comm(task, h->switch_prev_comm, record);
+		add_task_comm(h, task, h->switch_prev_comm, record);
 
 	if (prev_state)
 		task->sleeping = 1;
@@ -940,13 +1011,13 @@ static int handle_sched_switch_event(struct handle_data *h,
 		task->sleeping = 0;
 
 	/* task is being scheduled out. prev_state tells why */
-	start = add_start(task, event_data, record, prev_pid, prev_state);
+	start = add_start(task, event_data, record, prev_state, prev_state);
 	task->last_start = start;
 	task->last_event = NULL;
 
 	task = find_task(h, next_pid);
 	if (!task->comm)
-		add_task_comm(task, h->switch_next_comm, record);
+		add_task_comm(h, task, h->switch_next_comm, record);
 
 	/*
 	 * If the next task was blocked, it required a wakeup to
@@ -1133,7 +1204,7 @@ static int handle_sched_wakeup_event(struct handle_data *h,
 
 	task = find_task(h, pid);
 	if (!task->comm)
-		add_task_comm(task, h->wakeup_comm, record);
+		add_task_comm(h, task, h->wakeup_comm, record);
 
 	/* if the task isn't sleeping, then ignore the wake up */
 	if (!task->sleeping) {
@@ -1196,6 +1267,7 @@ void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
 
 	trace_hash_init(&h->task_hash, 1024);
 	trace_hash_init(&h->events, 1024);
+	trace_hash_init(&h->group_hash, 512);
 
 	h->handle = handle;
 	h->pevent = pevent;
@@ -1874,6 +1946,9 @@ static void output_task(struct handle_data *h, struct task_data *task)
 	int nr_events = 0;
 	int i;
 
+	if (trace_hash_empty(&task->event_hash))
+		return;
+
 	if (task->comm)
 		comm = task->comm;
 	else
@@ -1907,6 +1982,39 @@ static void output_task(struct handle_data *h, struct task_data *task)
 	free(events);
 }
 
+static void output_group(struct handle_data *h, struct group_data *group)
+{
+	struct trace_hash_item **bucket;
+	struct trace_hash_item *item;
+	struct event_hash **events;
+	int nr_events = 0;
+	int i;
+
+	printf("\ngroup: %s\n", group->comm);
+
+	trace_hash_for_each_bucket(bucket, &group->event_hash) {
+		trace_hash_for_each_item(item, bucket) {
+			nr_events++;
+		}
+	}
+
+	events = malloc_or_die(sizeof(*events) * nr_events);
+
+	i = 0;
+	trace_hash_for_each_bucket(bucket, &group->event_hash) {
+		trace_hash_for_each_item(item, bucket) {
+			events[i++] = event_from_item(item);
+		}
+	}
+
+	qsort(events, nr_events, sizeof(*events), compare_events);
+
+	for (i = 0; i < nr_events; i++)
+		output_event(events[i]);
+
+	free(events);
+}
+
 static int compare_tasks(const void *a, const void *b)
 {
 	struct task_data * const *A = a;
@@ -1919,6 +2027,14 @@ static int compare_tasks(const void *a, const void *b)
 	return 0;
 }
 
+static int compare_groups(const void *a, const void *b)
+{
+	const char *A = a;
+	const char *B = b;
+
+	return strcmp(A, B);
+}
+
 static void free_event_hash(struct event_hash *event_hash)
 {
 	struct trace_hash_item **bucket;
@@ -1976,6 +2092,25 @@ static void free_task(struct task_data *task)
 	free(task);
 }
 
+static void free_group(struct group_data *group)
+{
+	struct trace_hash_item **bucket;
+	struct trace_hash_item *item;
+	struct event_hash *event_hash;
+
+	free(group->comm);
+
+	trace_hash_for_each_bucket(bucket, &group->event_hash) {
+		trace_hash_while_item(item, bucket) {
+			event_hash = event_from_item(item);
+			trace_hash_del(item);
+			free_event_hash(event_hash);
+		}
+	}
+	trace_hash_free(&group->event_hash);
+	free(group);
+}
+
 static void show_global_task(struct handle_data *h,
 			     struct task_data *task)
 {
@@ -1985,7 +2120,7 @@ static void show_global_task(struct handle_data *h,
 	output_task(h, task);
 }
 
-static void output_handle(struct handle_data *h)
+static void output_tasks(struct handle_data *h)
 {
 	struct trace_hash_item **bucket;
 	struct trace_hash_item *item;
@@ -1993,10 +2128,6 @@ static void output_handle(struct handle_data *h)
 	int nr_tasks = 0;
 	int i;
 
-	show_global_task(h, h->global_task);
-	for (i = 0; i < h->cpus; i++)
-		show_global_task(h, &h->global_percpu_tasks[i]);
-
 	trace_hash_for_each_bucket(bucket, &h->task_hash) {
 		trace_hash_for_each_item(item, bucket) {
 			nr_tasks++;
@@ -2024,6 +2155,56 @@ static void output_handle(struct handle_data *h)
 	free(tasks);
 }
 
+static void output_groups(struct handle_data *h)
+{
+	struct trace_hash_item **bucket;
+	struct trace_hash_item *item;
+	struct group_data **groups;
+	int nr_groups = 0;
+	int i;
+
+	trace_hash_for_each_bucket(bucket, &h->group_hash) {
+		trace_hash_for_each_item(item, bucket) {
+			nr_groups++;
+		}
+	}
+
+	if (nr_groups == 0)
+		return;
+
+	groups = malloc_or_die(sizeof(*groups) * nr_groups);
+
+	nr_groups = 0;
+
+	trace_hash_for_each_bucket(bucket, &h->group_hash) {
+		trace_hash_while_item(item, bucket) {
+			groups[nr_groups++] = group_from_item(item);
+			trace_hash_del(item);
+		}
+	}
+
+	qsort(groups, nr_groups, sizeof(*groups), compare_groups);
+
+	for (i = 0; i < nr_groups; i++) {
+		output_group(h, groups[i]);
+		free_group(groups[i]);
+	}
+
+	free(groups);
+}
+
+static void output_handle(struct handle_data *h)
+{
+	int i;
+
+	show_global_task(h, h->global_task);
+	for (i = 0; i < h->cpus; i++)
+		show_global_task(h, &h->global_percpu_tasks[i]);
+
+	output_groups(h);
+	output_tasks(h);
+}
+
 int trace_profile(void)
 {
 	struct handle_data *h;
diff --git a/trace-record.c b/trace-record.c
index 9874a12..7b61cde 100644
--- a/trace-record.c
+++ b/trace-record.c
@@ -3703,6 +3703,7 @@ static void add_hook(struct buffer_instance *instance, const char *arg)
 }
 
 enum {
+	OPT_merge	= 250,
 	OPT_stderr	= 251,
 	OPT_profile	= 252,
 	OPT_nosplice	= 253,
@@ -3871,6 +3872,7 @@ void trace_record (int argc, char **argv)
 			{"nosplice", no_argument, NULL, OPT_nosplice},
 			{"profile", no_argument, NULL, OPT_profile},
 			{"stderr", no_argument, NULL, OPT_stderr},
+			{"merge", no_argument, NULL, OPT_merge},
 			{"help", no_argument, NULL, '?'},
 			{NULL, 0, NULL, 0}
 		};
@@ -4102,6 +4104,9 @@ void trace_record (int argc, char **argv)
 			close(1);
 			dup2(2, 1);
 			break;
+		case OPT_merge:
+			trace_profile_merge();
+			break;
 		default:
 			usage(argv);
 		}
diff --git a/trace-usage.c b/trace-usage.c
index 520b14b..190990b 100644
--- a/trace-usage.c
+++ b/trace-usage.c
@@ -48,6 +48,7 @@ static struct usage_help usage_help[] = {
 		"          -B create sub buffer and folling events will be enabled here\n"
 		"          -k do not reset the buffers after tracing.\n"
 		"          -i do not fail if an event is not found\n"
+		"          --merge used with --profile, merge events for related comms\n"
 		"          --profile enable tracing options needed for report --profile\n"
 		"          --func-stack perform a stack trace for function tracer\n"
 		"             (use with caution)\n"
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge
  2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
  2015-05-08 19:29 ` [PATCH 1/5] trace-cmd: add option to group like comms for profile Josef Bacik
@ 2015-05-08 19:29 ` Josef Bacik
  2015-05-20 17:36   ` Steven Rostedt
  2015-05-08 19:29 ` [PATCH 3/5] trace-cmd: lookup syscall names in profile Josef Bacik
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Josef Bacik @ 2015-05-08 19:29 UTC (permalink / raw)
  To: rostedt, linux-kernel, kernel-team

With --merge we want to be able to merge the scheduling information as well so
we can see how the whole app spent its time.  This is a little tricky because we
have to sort based on comm instead of pid.  It works out well enough because we
store the intermediate information in the per task hash, but we want to group
the final information together.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 trace-profile.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/trace-profile.c b/trace-profile.c
index bd775e8..eee026e 100644
--- a/trace-profile.c
+++ b/trace-profile.c
@@ -985,6 +985,7 @@ static int handle_sched_switch_event(struct handle_data *h,
 	unsigned long long prev_pid;
 	unsigned long long prev_state;
 	unsigned long long next_pid;
+	unsigned long long val = 0;
 	struct start_data *start;
 
 	/* pid_field holds prev_pid, data_field holds prev_state */
@@ -1019,6 +1020,12 @@ static int handle_sched_switch_event(struct handle_data *h,
 	if (!task->comm)
 		add_task_comm(h, task, h->switch_next_comm, record);
 
+	if (task->comm && task->group) {
+		char *str = task->comm;
+		while (*str) val += (int)*(str++);
+	} else
+		val = next_pid;
+
 	/*
 	 * If the next task was blocked, it required a wakeup to
 	 * restart, and there should be one.
@@ -1028,10 +1035,10 @@ static int handle_sched_switch_event(struct handle_data *h,
 	 *
 	 * event_data->start holds the sched_wakeup event data.
 	 */
-	find_and_update_start(task, event_data->start, record->ts, next_pid);
+	find_and_update_start(task, event_data->start, record->ts, val);
 
 	/* Look for this task if it was preempted (no wakeup found). */
-	find_and_update_start(task, event_data, record->ts, next_pid);
+	find_and_update_start(task, event_data, record->ts, val);
 
 	return 0;
 }
@@ -1186,6 +1193,7 @@ static int handle_sched_wakeup_event(struct handle_data *h,
 	struct task_data *task = NULL;
 	struct start_data *start;
 	unsigned long long success;
+	unsigned long long val = 0;
 
 	proxy = find_task(h, pid);
 
@@ -1224,11 +1232,17 @@ static int handle_sched_wakeup_event(struct handle_data *h,
 	 */
 	proxy->proxy = task;
 
+	if (task->comm && task->group) {
+		char *str = task->comm;
+		while (*str) val += (int)*(str++);
+	} else
+		val = pid;
+
 	/* There should be a blocked schedule out of this task */
-	find_and_update_start(task, event_data->start, record->ts, pid);
+	find_and_update_start(task, event_data->start, record->ts, val);
 
 	/* Set this up for timing how long the wakeup takes */
-	start = add_start(task, event_data, record, pid, pid);
+	start = add_start(task, event_data, record, val, val);
 	task->last_event = NULL;
 	task->last_start = start;
 
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/5] trace-cmd: lookup syscall names in profile
  2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
  2015-05-08 19:29 ` [PATCH 1/5] trace-cmd: add option to group like comms for profile Josef Bacik
  2015-05-08 19:29 ` [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge Josef Bacik
@ 2015-05-08 19:29 ` Josef Bacik
  2015-05-20 18:00   ` Steven Rostedt
  2015-05-08 19:29 ` [PATCH 4/5] trace-cmd: sort the events in the profile output Josef Bacik
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Josef Bacik @ 2015-05-08 19:29 UTC (permalink / raw)
  To: rostedt, linux-kernel, kernel-team

When we profile syscall times we're just given the syscall number, not the name.
This isn't helpful for human beings, so lookup the syscall names when we are
printing out the information.  This uses the audit library to get this
information, I'm not married to it, but it seems that any app that does this
mapping has to scrape /usr/include/asm/unistd*.h to pull out the names, so might
as well just use somebody elses work.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 Makefile        |  2 +-
 trace-profile.c | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 63f7e79..402f711 100644
--- a/Makefile
+++ b/Makefile
@@ -160,7 +160,7 @@ bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
 plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 python_dir_SQ = $(subst ','\'',$(python_dir))
 
-LIBS = -L. -ltracecmd -ldl
+LIBS = -L. -ltracecmd -ldl -laudit
 LIB_FILE = libtracecmd.a
 
 PACKAGES= gtk+-2.0 libxml-2.0 gthread-2.0
diff --git a/trace-profile.c b/trace-profile.c
index eee026e..0b84c11 100644
--- a/trace-profile.c
+++ b/trace-profile.c
@@ -23,6 +23,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <libaudit.h>
 
 #include "trace-local.h"
 #include "trace-hash.h"
@@ -916,8 +917,20 @@ static void func_print(struct trace_seq *s, struct event_hash *event_hash)
 		trace_seq_printf(s, "func: 0x%llx", event_hash->val);
 }
 
-static void print_int(struct trace_seq *s, struct event_hash *event_hash)
+static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
 {
+	const char *name = NULL;
+	int machine;
+
+	machine = audit_detect_machine();
+	if (machine < 0)
+		goto fail;
+	name = audit_syscall_to_name(event_hash->val, machine);
+	if (!name)
+		goto fail;
+	trace_seq_printf(s, "syscall:%s", name);
+	return;
+fail:
 	trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
 			 (int)event_hash->val);
 }
@@ -1425,8 +1438,8 @@ void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
 
 	if (syscall_enter && syscall_exit) {
 		mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0);
-		syscall_enter->print_func = print_int;
-		syscall_exit->print_func = print_int;
+		syscall_enter->print_func = syscall_print;
+		syscall_exit->print_func = syscall_print;
 	}
 
 	events = pevent_list_events(pevent, EVENT_SORT_ID);
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/5] trace-cmd: sort the events in the profile output
  2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
                   ` (2 preceding siblings ...)
  2015-05-08 19:29 ` [PATCH 3/5] trace-cmd: lookup syscall names in profile Josef Bacik
@ 2015-05-08 19:29 ` Josef Bacik
  2015-05-08 19:29 ` [PATCH 5/5] trace-cmd: make trace-hist work with instances Josef Bacik
  2015-05-08 20:13 ` [PATCH 0/5] trace-cmd profile enhancements Steven Rostedt
  5 siblings, 0 replies; 16+ messages in thread
From: Josef Bacik @ 2015-05-08 19:29 UTC (permalink / raw)
  To: rostedt, linux-kernel, kernel-team

Currently we just spit out the events as they appear in the hash, which makes it
hard to tell where we are spending our time.  Instead order the output by type
and total time spent.  So you'll get something like this

syscall 1: 100000ms
syscall 2:  80000ms
syscall 3:      4ms
func 1:        10ms
func 2:         1ms

instead of them all jumbled up.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 trace-profile.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/trace-profile.c b/trace-profile.c
index 0b84c11..3a9a9f3 100644
--- a/trace-profile.c
+++ b/trace-profile.c
@@ -1961,6 +1961,10 @@ static int compare_events(const void *a, const void *b)
 		return 1;
 	if (event_data_a->id < event_data_b->id)
 		return -1;
+	if ((*A)->time_total > (*B)->time_total)
+		return -1;
+	if ((*A)->time_total < (*B)->time_total)
+		return 1;
 	return 0;
 }
 
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/5] trace-cmd: make trace-hist work with instances
  2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
                   ` (3 preceding siblings ...)
  2015-05-08 19:29 ` [PATCH 4/5] trace-cmd: sort the events in the profile output Josef Bacik
@ 2015-05-08 19:29 ` Josef Bacik
  2015-05-08 20:13 ` [PATCH 0/5] trace-cmd profile enhancements Steven Rostedt
  5 siblings, 0 replies; 16+ messages in thread
From: Josef Bacik @ 2015-05-08 19:29 UTC (permalink / raw)
  To: rostedt, linux-kernel, kernel-team

If you recorded information completely in an instance then trace-hist wouldn't
think your trace.dat file had any events because it didn't know about instances.
Fix this.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 trace-hist.c | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/trace-hist.c b/trace-hist.c
index 574549e..2a2e780 100644
--- a/trace-hist.c
+++ b/trace-hist.c
@@ -954,13 +954,6 @@ static void do_trace_hist(struct tracecmd_input *handle)
 	int cpu;
 	int ret;
 
-	ret = tracecmd_init_data(handle);
-	if (ret < 0)
-		die("failed to init data");
-
-	if (ret > 0)
-		die("trace-cmd hist does not work with latency traces\n");
-
 	cpus = tracecmd_cpus(handle);
 
 	/* Need to get any event */
@@ -1025,6 +1018,7 @@ void trace_hist(int argc, char **argv)
 {
 	struct tracecmd_input *handle;
 	const char *input_file = NULL;
+	int instances;
 	int ret;
 
 	for (;;) {
@@ -1067,7 +1061,30 @@ void trace_hist(int argc, char **argv)
 	if (ret)
 		return;
 
-	do_trace_hist(handle);
+	ret = tracecmd_init_data(handle);
+	if (ret < 0)
+		die("failed to init data");
+
+	if (ret > 0)
+		die("trace-cmd hist does not work with latency traces\n");
+
+	instances = tracecmd_buffer_instances(handle);
+	if (instances) {
+		struct tracecmd_input *new_handle;
+		int i;
+
+		for (i = 0; i < instances; i++) {
+			new_handle = tracecmd_buffer_instance_handle(handle, i);
+			if (!new_handle) {
+				warning("could not retrieve handle %d", i);
+				continue;
+			}
+			do_trace_hist(new_handle);
+			tracecmd_close(new_handle);
+		}
+	} else {
+		do_trace_hist(handle);
+	}
 
 	tracecmd_close(handle);
 }
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/5] trace-cmd profile enhancements
  2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
                   ` (4 preceding siblings ...)
  2015-05-08 19:29 ` [PATCH 5/5] trace-cmd: make trace-hist work with instances Josef Bacik
@ 2015-05-08 20:13 ` Steven Rostedt
  5 siblings, 0 replies; 16+ messages in thread
From: Steven Rostedt @ 2015-05-08 20:13 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-kernel, kernel-team

On Fri, 8 May 2015 15:28:59 -0400
Josef Bacik <jbacik@fb.com> wrote:

> I had to use trace-cmd profile to debug a regression with hhvm recently.  These
> are the patches I needed to make trace-cmd profile useful to me.  There are a
> few changes in here
> 
> 1) Adding --merge to trace-cmd profile.  This merges events for like comms
> together.  HHVM is a huge app that spawns worker threads to handle requests, I
> needed all of this data in one place to see how the app as a whole was behaving,
> --merge provides this.
> 
> 2) Fixup the output formatting.  I also wanted to see which syscalls were taking
> the longest but this was hard because we were just spitting out the syscall
> number and it wasn't in any order, so now all the events are ordered based on
> time taken and the syscall names are printed instead of the number.
> 
> 3) Random trace-cmd hist fix, because it didn't work right with instances.
> 
> I've been using these in production for a couple of weeks now and they seem to
> work well.  Thanks,
>

Josef,

Thanks a lot! I really appreciate this.

I'll take a look at it next week as I have other things on my plate for
today. Ping me if I somehow forget to do so.

Thanks again,

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] trace-cmd: add option to group like comms for profile
  2015-05-08 19:29 ` [PATCH 1/5] trace-cmd: add option to group like comms for profile Josef Bacik
@ 2015-05-20 17:31   ` Steven Rostedt
  2015-05-20 17:51     ` Josef Bacik
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2015-05-20 17:31 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-kernel, kernel-team

Finally have some time to look at this.


On Fri, 8 May 2015 15:29:00 -0400
Josef Bacik <jbacik@fb.com> wrote:

> When profiling large applications that have multiple children we really want to
> see the profile of the app all together.  Add a --merge option to accomplish
> this for profile.  This matches events based on comm instead of PID, this way we
> can easilly see what a giant app is doing, such as HHVM.  Thanks,
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  trace-hash-local.h |   7 ++
>  trace-local.h      |   1 +
>  trace-profile.c    | 207 +++++++++++++++++++++++++++++++++++++++++++++++++----
>  trace-record.c     |   5 ++

Shouldn't we add this option to trace-cmd report --profile ?

Also, I think we need a man page.

Also, I'm not so sure I like the "--merge" name. It's not very
descriptive to what it does. What about "--by-comm"? Or maybe "--comm"?

>  trace-usage.c      |   1 +
>  5 files changed, 208 insertions(+), 13 deletions(-)
> 
> diff --git a/trace-hash-local.h b/trace-hash-local.h
> index 997b11c..5bae3f3 100644
> --- a/trace-hash-local.h
> +++ b/trace-hash-local.h
> @@ -48,4 +48,11 @@ static inline unsigned int trace_hash(int val)
>  	return hash;
>  }
>  
> +static inline unsigned int trace_hash_str(char *str)
> +{
> +	int val = 0;
> +
> +	while (*str) val += (int)*(str++);
> +	return trace_hash(val);
> +}

Hmm, the above doesn't account for order of characters. That is "bash"
is the same as "shab". What about this:

	int val = 0;
	int i;

	for (i = 0; str[i]; i++)
		val += ((int)str[i]) << (i & 0xffffff);
	return trace_hash(val);



>  #endif /* _TRACE_HASH_LOCAL_H */
> diff --git a/trace-local.h b/trace-local.h
> index ab2bd34..50f36ba 100644
> --- a/trace-local.h
> +++ b/trace-local.h
> @@ -81,6 +81,7 @@ int trace_profile_record(struct tracecmd_input *handle,
>  void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hooks,
>  			int global);
>  int trace_profile(void);
> +void trace_profile_merge(void);
>  
>  struct tracecmd_input *
>  trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus,
> diff --git a/trace-profile.c b/trace-profile.c
> index f71a7c6..bd775e8 100644
> --- a/trace-profile.c
> +++ b/trace-profile.c
> @@ -34,6 +34,7 @@
>  #define start_from_item(item)	container_of(item, struct start_data, hash)
>  #define event_from_item(item)	container_of(item, struct event_hash, hash)
>  #define stack_from_item(item)	container_of(item, struct stack_data, hash)
> +#define group_from_item(item)	container_of(item, struct group_data, hash)
>  #define event_data_from_item(item)	container_of(item, struct event_data, hash)
>  
>  static unsigned long long nsecs_per_sec(unsigned long long ts)
> @@ -141,6 +142,12 @@ struct event_hash {
>  	struct trace_hash	stacks;
>  };
>  
> +struct group_data {
> +	struct trace_hash_item	hash;
> +	char			*comm;
> +	struct trace_hash	event_hash;
> +};
> +
>  struct task_data {
>  	struct trace_hash_item	hash;
>  	int			pid;
> @@ -156,6 +163,7 @@ struct task_data {
>  	struct event_hash	*last_event;
>  	struct pevent_record	*last_stack;
>  	struct handle_data	*handle;
> +	struct group_data	*group;
>  };
>  
>  struct cpu_info {
> @@ -173,6 +181,7 @@ struct handle_data {
>  	struct pevent		*pevent;
>  
>  	struct trace_hash	events;
> +	struct trace_hash	group_hash;
>  
>  	struct cpu_info		**cpu_data;
>  
> @@ -196,6 +205,12 @@ struct handle_data {
>  
>  static struct handle_data *handles;
>  static struct event_data *stacktrace_event;
> +static bool merge_like_comms = false;
> +
> +void trace_profile_merge(void)
> +{
> +	merge_like_comms = true;
> +}
>  
>  static struct start_data *
>  add_start(struct task_data *task,
> @@ -250,6 +265,7 @@ static struct event_hash *
>  find_event_hash(struct task_data *task, struct event_data_match *edata)
>  {
>  	struct event_hash *event_hash;
> +	struct trace_hash *hash;
>  	struct trace_hash_item *item;
>  	unsigned long long key;
>  
> @@ -257,7 +273,11 @@ find_event_hash(struct task_data *task, struct event_data_match *edata)
>  		(unsigned long)edata->search_val +
>  		(unsigned long)edata->val;
>  	key = trace_hash(key);
> -	item = trace_hash_find(&task->event_hash, key, match_event, edata);
> +	if (task->group)
> +		hash = &task->group->event_hash;
> +	else
> +		hash = &task->event_hash;
> +	item = trace_hash_find(hash, key, match_event, edata);
>  	if (item)
>  		return event_from_item(item);
>  
> @@ -270,7 +290,7 @@ find_event_hash(struct task_data *task, struct event_data_match *edata)
>  	event_hash->hash.key = key;
>  	trace_hash_init(&event_hash->stacks, 32);
>  
> -	trace_hash_add(&task->event_hash, &event_hash->hash);
> +	trace_hash_add(hash, &event_hash->hash);
>  
>  	return event_hash;
>  }
> @@ -496,9 +516,59 @@ find_task(struct handle_data *h, int pid)
>  	return last_task;
>  }
>  
> +static int match_group(struct trace_hash_item *item, void *data)
> +{
> +	struct group_data *group = group_from_item(item);
> +
> +	return strcmp(group->comm, (char *)data) == 0;
> +}
> +
> +static void add_group(struct handle_data *h, struct task_data *task)
> +{
> +	unsigned long long key;
> +	struct trace_hash_item *item;
> +	void *data = task->comm;
> +
> +	if (!merge_like_comms)
> +		return;
> +	if (task->group || !task->comm)
> +		return;
> +
> +	key = trace_hash_str(task->comm);
> +
> +	item = trace_hash_find(&h->group_hash, key, match_group, data);
> +	if (item) {
> +		task->group = group_from_item(item);
> +	} else {
> +		struct group_data *grp;
> +		struct trace_hash_item **bucket;
> +
> +		grp = malloc_or_die(sizeof(*grp));
> +		memset(grp, 0, sizeof(*grp));
> +
> +		grp->comm = strdup(task->comm);
> +		if (!grp->comm)
> +			die("strdup");
> +		grp->hash.key = key;
> +		trace_hash_add(&h->group_hash, &grp->hash);
> +		task->group = grp;
> +		trace_hash_init(&grp->event_hash, 32);
> +
> +		trace_hash_for_each_bucket(bucket, &task->event_hash) {
> +			trace_hash_while_item(item, bucket) {
> +				struct event_hash *event_hash;
> +
> +				event_hash = event_from_item(item);
> +				trace_hash_del(&event_hash->hash);
> +				trace_hash_add(&grp->event_hash, &event_hash->hash);
> +			}
> +		}
> +	}
> +}
> +
>  static void
> -add_task_comm(struct task_data *task, struct format_field *field,
> -	      struct pevent_record *record)
> +add_task_comm(struct handle_data *h, struct task_data *task,
> +	      struct format_field *field, struct pevent_record *record)
>  {
>  	const char *comm;
>  
> @@ -506,6 +576,7 @@ add_task_comm(struct task_data *task, struct format_field *field,
>  	comm = record->data + field->offset;
>  	memcpy(task->comm, comm, field->size);
>  	task->comm[field->size] = 0;
> +	add_group(h, task);
>  }
>  
>  /* Account for tasks that don't have starts */
> @@ -932,7 +1003,7 @@ static int handle_sched_switch_event(struct handle_data *h,
>  
>  	task = find_task(h, prev_pid);
>  	if (!task->comm)
> -		add_task_comm(task, h->switch_prev_comm, record);
> +		add_task_comm(h, task, h->switch_prev_comm, record);
>  
>  	if (prev_state)
>  		task->sleeping = 1;
> @@ -940,13 +1011,13 @@ static int handle_sched_switch_event(struct handle_data *h,
>  		task->sleeping = 0;
>  
>  	/* task is being scheduled out. prev_state tells why */
> -	start = add_start(task, event_data, record, prev_pid, prev_state);
> +	start = add_start(task, event_data, record, prev_state, prev_state);
>  	task->last_start = start;
>  	task->last_event = NULL;
>  
>  	task = find_task(h, next_pid);
>  	if (!task->comm)
> -		add_task_comm(task, h->switch_next_comm, record);
> +		add_task_comm(h, task, h->switch_next_comm, record);
>  
>  	/*
>  	 * If the next task was blocked, it required a wakeup to
> @@ -1133,7 +1204,7 @@ static int handle_sched_wakeup_event(struct handle_data *h,
>  
>  	task = find_task(h, pid);
>  	if (!task->comm)
> -		add_task_comm(task, h->wakeup_comm, record);
> +		add_task_comm(h, task, h->wakeup_comm, record);
>  
>  	/* if the task isn't sleeping, then ignore the wake up */
>  	if (!task->sleeping) {
> @@ -1196,6 +1267,7 @@ void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
>  
>  	trace_hash_init(&h->task_hash, 1024);
>  	trace_hash_init(&h->events, 1024);
> +	trace_hash_init(&h->group_hash, 512);
>  
>  	h->handle = handle;
>  	h->pevent = pevent;
> @@ -1874,6 +1946,9 @@ static void output_task(struct handle_data *h, struct task_data *task)
>  	int nr_events = 0;
>  	int i;
>  
> +	if (trace_hash_empty(&task->event_hash))
> +		return;
> +

Was this needed for this patch, or is it for groups? Even if the task
doesn't have any events, I do sometimes want to see if it was recorder.

If you added this to not show tasks if they belong to a group, why not
add:

	if (task->group)
		return;
?

>  	if (task->comm)
>  		comm = task->comm;
>  	else
> @@ -1907,6 +1982,39 @@ static void output_task(struct handle_data *h, struct task_data *task)
>  	free(events);
>  }
>  
> +static void output_group(struct handle_data *h, struct group_data *group)
> +{
> +	struct trace_hash_item **bucket;
> +	struct trace_hash_item *item;
> +	struct event_hash **events;
> +	int nr_events = 0;
> +	int i;
> +
> +	printf("\ngroup: %s\n", group->comm);
> +
> +	trace_hash_for_each_bucket(bucket, &group->event_hash) {
> +		trace_hash_for_each_item(item, bucket) {
> +			nr_events++;
> +		}
> +	}
> +
> +	events = malloc_or_die(sizeof(*events) * nr_events);
> +
> +	i = 0;
> +	trace_hash_for_each_bucket(bucket, &group->event_hash) {
> +		trace_hash_for_each_item(item, bucket) {
> +			events[i++] = event_from_item(item);
> +		}
> +	}
> +
> +	qsort(events, nr_events, sizeof(*events), compare_events);
> +
> +	for (i = 0; i < nr_events; i++)
> +		output_event(events[i]);
> +
> +	free(events);
> +}
> +
>  static int compare_tasks(const void *a, const void *b)
>  {
>  	struct task_data * const *A = a;
> @@ -1919,6 +2027,14 @@ static int compare_tasks(const void *a, const void *b)
>  	return 0;
>  }
>  
> +static int compare_groups(const void *a, const void *b)
> +{
> +	const char *A = a;
> +	const char *B = b;
> +
> +	return strcmp(A, B);
> +}
> +
>  static void free_event_hash(struct event_hash *event_hash)
>  {
>  	struct trace_hash_item **bucket;
> @@ -1976,6 +2092,25 @@ static void free_task(struct task_data *task)
>  	free(task);
>  }
>  
> +static void free_group(struct group_data *group)
> +{
> +	struct trace_hash_item **bucket;
> +	struct trace_hash_item *item;
> +	struct event_hash *event_hash;
> +
> +	free(group->comm);
> +
> +	trace_hash_for_each_bucket(bucket, &group->event_hash) {
> +		trace_hash_while_item(item, bucket) {
> +			event_hash = event_from_item(item);
> +			trace_hash_del(item);
> +			free_event_hash(event_hash);
> +		}
> +	}
> +	trace_hash_free(&group->event_hash);
> +	free(group);
> +}
> +
>  static void show_global_task(struct handle_data *h,
>  			     struct task_data *task)
>  {
> @@ -1985,7 +2120,7 @@ static void show_global_task(struct handle_data *h,
>  	output_task(h, task);
>  }
>  
> -static void output_handle(struct handle_data *h)
> +static void output_tasks(struct handle_data *h)
>  {
>  	struct trace_hash_item **bucket;
>  	struct trace_hash_item *item;
> @@ -1993,10 +2128,6 @@ static void output_handle(struct handle_data *h)
>  	int nr_tasks = 0;
>  	int i;
>  
> -	show_global_task(h, h->global_task);
> -	for (i = 0; i < h->cpus; i++)
> -		show_global_task(h, &h->global_percpu_tasks[i]);
> -
>  	trace_hash_for_each_bucket(bucket, &h->task_hash) {
>  		trace_hash_for_each_item(item, bucket) {
>  			nr_tasks++;
> @@ -2024,6 +2155,56 @@ static void output_handle(struct handle_data *h)
>  	free(tasks);
>  }
>  
> +static void output_groups(struct handle_data *h)
> +{
> +	struct trace_hash_item **bucket;
> +	struct trace_hash_item *item;
> +	struct group_data **groups;
> +	int nr_groups = 0;
> +	int i;
> +
> +	trace_hash_for_each_bucket(bucket, &h->group_hash) {
> +		trace_hash_for_each_item(item, bucket) {
> +			nr_groups++;
> +		}
> +	}
> +
> +	if (nr_groups == 0)
> +		return;
> +
> +	groups = malloc_or_die(sizeof(*groups) * nr_groups);
> +
> +	nr_groups = 0;
> +
> +	trace_hash_for_each_bucket(bucket, &h->group_hash) {
> +		trace_hash_while_item(item, bucket) {
> +			groups[nr_groups++] = group_from_item(item);
> +			trace_hash_del(item);
> +		}
> +	}
> +
> +	qsort(groups, nr_groups, sizeof(*groups), compare_groups);
> +
> +	for (i = 0; i < nr_groups; i++) {
> +		output_group(h, groups[i]);
> +		free_group(groups[i]);
> +	}
> +
> +	free(groups);
> +}
> +
> +static void output_handle(struct handle_data *h)
> +{
> +	int i;
> +
> +	show_global_task(h, h->global_task);
> +	for (i = 0; i < h->cpus; i++)
> +		show_global_task(h, &h->global_percpu_tasks[i]);
> +
> +	output_groups(h);
> +	output_tasks(h);
> +}
> +


Hmm, I wonder if the combination should be done on output. Because how
do you handle threads where an event is per task? Like scheduling one
out and the other one in? What about wake ups? If a bunch of threads
get woken up, how do we match the wakeups to the tasks?

Would it be easier to just match all the tasks by comm after the
recording was done? I would think matching pids would be quicker than
matching strings.

-- Steve

>  int trace_profile(void)
>  {
>  	struct handle_data *h;
> diff --git a/trace-record.c b/trace-record.c
> index 9874a12..7b61cde 100644
> --- a/trace-record.c
> +++ b/trace-record.c
> @@ -3703,6 +3703,7 @@ static void add_hook(struct buffer_instance *instance, const char *arg)
>  }
>  
>  enum {
> +	OPT_merge	= 250,
>  	OPT_stderr	= 251,
>  	OPT_profile	= 252,
>  	OPT_nosplice	= 253,
> @@ -3871,6 +3872,7 @@ void trace_record (int argc, char **argv)
>  			{"nosplice", no_argument, NULL, OPT_nosplice},
>  			{"profile", no_argument, NULL, OPT_profile},
>  			{"stderr", no_argument, NULL, OPT_stderr},
> +			{"merge", no_argument, NULL, OPT_merge},
>  			{"help", no_argument, NULL, '?'},
>  			{NULL, 0, NULL, 0}
>  		};
> @@ -4102,6 +4104,9 @@ void trace_record (int argc, char **argv)
>  			close(1);
>  			dup2(2, 1);
>  			break;
> +		case OPT_merge:
> +			trace_profile_merge();
> +			break;
>  		default:
>  			usage(argv);
>  		}
> diff --git a/trace-usage.c b/trace-usage.c
> index 520b14b..190990b 100644
> --- a/trace-usage.c
> +++ b/trace-usage.c
> @@ -48,6 +48,7 @@ static struct usage_help usage_help[] = {
>  		"          -B create sub buffer and folling events will be enabled here\n"
>  		"          -k do not reset the buffers after tracing.\n"
>  		"          -i do not fail if an event is not found\n"
> +		"          --merge used with --profile, merge events for related comms\n"
>  		"          --profile enable tracing options needed for report --profile\n"
>  		"          --func-stack perform a stack trace for function tracer\n"
>  		"             (use with caution)\n"


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge
  2015-05-08 19:29 ` [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge Josef Bacik
@ 2015-05-20 17:36   ` Steven Rostedt
  2015-05-20 17:54     ` Josef Bacik
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2015-05-20 17:36 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-kernel, kernel-team

On Fri, 8 May 2015 15:29:01 -0400
Josef Bacik <jbacik@fb.com> wrote:

> With --merge we want to be able to merge the scheduling information as well so
> we can see how the whole app spent its time.  This is a little tricky because we
> have to sort based on comm instead of pid.  It works out well enough because we
> store the intermediate information in the per task hash, but we want to group
> the final information together.

If we do the merge at the end, we wont need to worry about this.

-- Steve

> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] trace-cmd: add option to group like comms for profile
  2015-05-20 17:31   ` Steven Rostedt
@ 2015-05-20 17:51     ` Josef Bacik
  0 siblings, 0 replies; 16+ messages in thread
From: Josef Bacik @ 2015-05-20 17:51 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, kernel-team

On 05/20/2015 01:31 PM, Steven Rostedt wrote:
> Finally have some time to look at this.
>
>
> On Fri, 8 May 2015 15:29:00 -0400
> Josef Bacik <jbacik@fb.com> wrote:
>
>> When profiling large applications that have multiple children we really want to
>> see the profile of the app all together.  Add a --merge option to accomplish
>> this for profile.  This matches events based on comm instead of PID, this way we
>> can easilly see what a giant app is doing, such as HHVM.  Thanks,
>>
>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>> ---
>>   trace-hash-local.h |   7 ++
>>   trace-local.h      |   1 +
>>   trace-profile.c    | 207 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>   trace-record.c     |   5 ++
>
> Shouldn't we add this option to trace-cmd report --profile ?

Ah yeah I missed that part of it, I'll add that bit.

>
> Also, I think we need a man page.
>

No complaints here, but I'm not volunteering either ;).

> Also, I'm not so sure I like the "--merge" name. It's not very
> descriptive to what it does. What about "--by-comm"? Or maybe "--comm"?
>

I like --by-comm, I'll change it.

>>   trace-usage.c      |   1 +
>>   5 files changed, 208 insertions(+), 13 deletions(-)
>>
>> diff --git a/trace-hash-local.h b/trace-hash-local.h
>> index 997b11c..5bae3f3 100644
>> --- a/trace-hash-local.h
>> +++ b/trace-hash-local.h
>> @@ -48,4 +48,11 @@ static inline unsigned int trace_hash(int val)
>>   	return hash;
>>   }
>>
>> +static inline unsigned int trace_hash_str(char *str)
>> +{
>> +	int val = 0;
>> +
>> +	while (*str) val += (int)*(str++);
>> +	return trace_hash(val);
>> +}
>
> Hmm, the above doesn't account for order of characters. That is "bash"
> is the same as "shab". What about this:
>
> 	int val = 0;
> 	int i;
>
> 	for (i = 0; str[i]; i++)
> 		val += ((int)str[i]) << (i & 0xffffff);
> 	return trace_hash(val);
>

Good point, I meant to look up a real hashing thing later but obviously 
forgot, I'll switch to yours.

>
>
>>   #endif /* _TRACE_HASH_LOCAL_H */
>> diff --git a/trace-local.h b/trace-local.h
>> index ab2bd34..50f36ba 100644
>> --- a/trace-local.h
>> +++ b/trace-local.h
>> @@ -81,6 +81,7 @@ int trace_profile_record(struct tracecmd_input *handle,
>>   void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hooks,
>>   			int global);
>>   int trace_profile(void);
>> +void trace_profile_merge(void);
>>
>>   struct tracecmd_input *
>>   trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus,
>> diff --git a/trace-profile.c b/trace-profile.c
>> index f71a7c6..bd775e8 100644
>> --- a/trace-profile.c
>> +++ b/trace-profile.c
>> @@ -34,6 +34,7 @@
>>   #define start_from_item(item)	container_of(item, struct start_data, hash)
>>   #define event_from_item(item)	container_of(item, struct event_hash, hash)
>>   #define stack_from_item(item)	container_of(item, struct stack_data, hash)
>> +#define group_from_item(item)	container_of(item, struct group_data, hash)
>>   #define event_data_from_item(item)	container_of(item, struct event_data, hash)
>>
>>   static unsigned long long nsecs_per_sec(unsigned long long ts)
>> @@ -141,6 +142,12 @@ struct event_hash {
>>   	struct trace_hash	stacks;
>>   };
>>
>> +struct group_data {
>> +	struct trace_hash_item	hash;
>> +	char			*comm;
>> +	struct trace_hash	event_hash;
>> +};
>> +
>>   struct task_data {
>>   	struct trace_hash_item	hash;
>>   	int			pid;
>> @@ -156,6 +163,7 @@ struct task_data {
>>   	struct event_hash	*last_event;
>>   	struct pevent_record	*last_stack;
>>   	struct handle_data	*handle;
>> +	struct group_data	*group;
>>   };
>>
>>   struct cpu_info {
>> @@ -173,6 +181,7 @@ struct handle_data {
>>   	struct pevent		*pevent;
>>
>>   	struct trace_hash	events;
>> +	struct trace_hash	group_hash;
>>
>>   	struct cpu_info		**cpu_data;
>>
>> @@ -196,6 +205,12 @@ struct handle_data {
>>
>>   static struct handle_data *handles;
>>   static struct event_data *stacktrace_event;
>> +static bool merge_like_comms = false;
>> +
>> +void trace_profile_merge(void)
>> +{
>> +	merge_like_comms = true;
>> +}
>>
>>   static struct start_data *
>>   add_start(struct task_data *task,
>> @@ -250,6 +265,7 @@ static struct event_hash *
>>   find_event_hash(struct task_data *task, struct event_data_match *edata)
>>   {
>>   	struct event_hash *event_hash;
>> +	struct trace_hash *hash;
>>   	struct trace_hash_item *item;
>>   	unsigned long long key;
>>
>> @@ -257,7 +273,11 @@ find_event_hash(struct task_data *task, struct event_data_match *edata)
>>   		(unsigned long)edata->search_val +
>>   		(unsigned long)edata->val;
>>   	key = trace_hash(key);
>> -	item = trace_hash_find(&task->event_hash, key, match_event, edata);
>> +	if (task->group)
>> +		hash = &task->group->event_hash;
>> +	else
>> +		hash = &task->event_hash;
>> +	item = trace_hash_find(hash, key, match_event, edata);
>>   	if (item)
>>   		return event_from_item(item);
>>
>> @@ -270,7 +290,7 @@ find_event_hash(struct task_data *task, struct event_data_match *edata)
>>   	event_hash->hash.key = key;
>>   	trace_hash_init(&event_hash->stacks, 32);
>>
>> -	trace_hash_add(&task->event_hash, &event_hash->hash);
>> +	trace_hash_add(hash, &event_hash->hash);
>>
>>   	return event_hash;
>>   }
>> @@ -496,9 +516,59 @@ find_task(struct handle_data *h, int pid)
>>   	return last_task;
>>   }
>>
>> +static int match_group(struct trace_hash_item *item, void *data)
>> +{
>> +	struct group_data *group = group_from_item(item);
>> +
>> +	return strcmp(group->comm, (char *)data) == 0;
>> +}
>> +
>> +static void add_group(struct handle_data *h, struct task_data *task)
>> +{
>> +	unsigned long long key;
>> +	struct trace_hash_item *item;
>> +	void *data = task->comm;
>> +
>> +	if (!merge_like_comms)
>> +		return;
>> +	if (task->group || !task->comm)
>> +		return;
>> +
>> +	key = trace_hash_str(task->comm);
>> +
>> +	item = trace_hash_find(&h->group_hash, key, match_group, data);
>> +	if (item) {
>> +		task->group = group_from_item(item);
>> +	} else {
>> +		struct group_data *grp;
>> +		struct trace_hash_item **bucket;
>> +
>> +		grp = malloc_or_die(sizeof(*grp));
>> +		memset(grp, 0, sizeof(*grp));
>> +
>> +		grp->comm = strdup(task->comm);
>> +		if (!grp->comm)
>> +			die("strdup");
>> +		grp->hash.key = key;
>> +		trace_hash_add(&h->group_hash, &grp->hash);
>> +		task->group = grp;
>> +		trace_hash_init(&grp->event_hash, 32);
>> +
>> +		trace_hash_for_each_bucket(bucket, &task->event_hash) {
>> +			trace_hash_while_item(item, bucket) {
>> +				struct event_hash *event_hash;
>> +
>> +				event_hash = event_from_item(item);
>> +				trace_hash_del(&event_hash->hash);
>> +				trace_hash_add(&grp->event_hash, &event_hash->hash);
>> +			}
>> +		}
>> +	}
>> +}
>> +
>>   static void
>> -add_task_comm(struct task_data *task, struct format_field *field,
>> -	      struct pevent_record *record)
>> +add_task_comm(struct handle_data *h, struct task_data *task,
>> +	      struct format_field *field, struct pevent_record *record)
>>   {
>>   	const char *comm;
>>
>> @@ -506,6 +576,7 @@ add_task_comm(struct task_data *task, struct format_field *field,
>>   	comm = record->data + field->offset;
>>   	memcpy(task->comm, comm, field->size);
>>   	task->comm[field->size] = 0;
>> +	add_group(h, task);
>>   }
>>
>>   /* Account for tasks that don't have starts */
>> @@ -932,7 +1003,7 @@ static int handle_sched_switch_event(struct handle_data *h,
>>
>>   	task = find_task(h, prev_pid);
>>   	if (!task->comm)
>> -		add_task_comm(task, h->switch_prev_comm, record);
>> +		add_task_comm(h, task, h->switch_prev_comm, record);
>>
>>   	if (prev_state)
>>   		task->sleeping = 1;
>> @@ -940,13 +1011,13 @@ static int handle_sched_switch_event(struct handle_data *h,
>>   		task->sleeping = 0;
>>
>>   	/* task is being scheduled out. prev_state tells why */
>> -	start = add_start(task, event_data, record, prev_pid, prev_state);
>> +	start = add_start(task, event_data, record, prev_state, prev_state);
>>   	task->last_start = start;
>>   	task->last_event = NULL;
>>
>>   	task = find_task(h, next_pid);
>>   	if (!task->comm)
>> -		add_task_comm(task, h->switch_next_comm, record);
>> +		add_task_comm(h, task, h->switch_next_comm, record);
>>
>>   	/*
>>   	 * If the next task was blocked, it required a wakeup to
>> @@ -1133,7 +1204,7 @@ static int handle_sched_wakeup_event(struct handle_data *h,
>>
>>   	task = find_task(h, pid);
>>   	if (!task->comm)
>> -		add_task_comm(task, h->wakeup_comm, record);
>> +		add_task_comm(h, task, h->wakeup_comm, record);
>>
>>   	/* if the task isn't sleeping, then ignore the wake up */
>>   	if (!task->sleeping) {
>> @@ -1196,6 +1267,7 @@ void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
>>
>>   	trace_hash_init(&h->task_hash, 1024);
>>   	trace_hash_init(&h->events, 1024);
>> +	trace_hash_init(&h->group_hash, 512);
>>
>>   	h->handle = handle;
>>   	h->pevent = pevent;
>> @@ -1874,6 +1946,9 @@ static void output_task(struct handle_data *h, struct task_data *task)
>>   	int nr_events = 0;
>>   	int i;
>>
>> +	if (trace_hash_empty(&task->event_hash))
>> +		return;
>> +
>
> Was this needed for this patch, or is it for groups? Even if the task
> doesn't have any events, I do sometimes want to see if it was recorder.
>
> If you added this to not show tasks if they belong to a group, why not
> add:
>
> 	if (task->group)
> 		return;
> ?

Yup I can do that too, I added it because we got a bunch of "task: blah" 
output at the end using groups and it seemed ugly, so this check made it 
only output tasks that weren't grouped.  I'll make your change instead.

>
>>   	if (task->comm)
>>   		comm = task->comm;
>>   	else
>> @@ -1907,6 +1982,39 @@ static void output_task(struct handle_data *h, struct task_data *task)
>>   	free(events);
>>   }
>>
>> +static void output_group(struct handle_data *h, struct group_data *group)
>> +{
>> +	struct trace_hash_item **bucket;
>> +	struct trace_hash_item *item;
>> +	struct event_hash **events;
>> +	int nr_events = 0;
>> +	int i;
>> +
>> +	printf("\ngroup: %s\n", group->comm);
>> +
>> +	trace_hash_for_each_bucket(bucket, &group->event_hash) {
>> +		trace_hash_for_each_item(item, bucket) {
>> +			nr_events++;
>> +		}
>> +	}
>> +
>> +	events = malloc_or_die(sizeof(*events) * nr_events);
>> +
>> +	i = 0;
>> +	trace_hash_for_each_bucket(bucket, &group->event_hash) {
>> +		trace_hash_for_each_item(item, bucket) {
>> +			events[i++] = event_from_item(item);
>> +		}
>> +	}
>> +
>> +	qsort(events, nr_events, sizeof(*events), compare_events);
>> +
>> +	for (i = 0; i < nr_events; i++)
>> +		output_event(events[i]);
>> +
>> +	free(events);
>> +}
>> +
>>   static int compare_tasks(const void *a, const void *b)
>>   {
>>   	struct task_data * const *A = a;
>> @@ -1919,6 +2027,14 @@ static int compare_tasks(const void *a, const void *b)
>>   	return 0;
>>   }
>>
>> +static int compare_groups(const void *a, const void *b)
>> +{
>> +	const char *A = a;
>> +	const char *B = b;
>> +
>> +	return strcmp(A, B);
>> +}
>> +
>>   static void free_event_hash(struct event_hash *event_hash)
>>   {
>>   	struct trace_hash_item **bucket;
>> @@ -1976,6 +2092,25 @@ static void free_task(struct task_data *task)
>>   	free(task);
>>   }
>>
>> +static void free_group(struct group_data *group)
>> +{
>> +	struct trace_hash_item **bucket;
>> +	struct trace_hash_item *item;
>> +	struct event_hash *event_hash;
>> +
>> +	free(group->comm);
>> +
>> +	trace_hash_for_each_bucket(bucket, &group->event_hash) {
>> +		trace_hash_while_item(item, bucket) {
>> +			event_hash = event_from_item(item);
>> +			trace_hash_del(item);
>> +			free_event_hash(event_hash);
>> +		}
>> +	}
>> +	trace_hash_free(&group->event_hash);
>> +	free(group);
>> +}
>> +
>>   static void show_global_task(struct handle_data *h,
>>   			     struct task_data *task)
>>   {
>> @@ -1985,7 +2120,7 @@ static void show_global_task(struct handle_data *h,
>>   	output_task(h, task);
>>   }
>>
>> -static void output_handle(struct handle_data *h)
>> +static void output_tasks(struct handle_data *h)
>>   {
>>   	struct trace_hash_item **bucket;
>>   	struct trace_hash_item *item;
>> @@ -1993,10 +2128,6 @@ static void output_handle(struct handle_data *h)
>>   	int nr_tasks = 0;
>>   	int i;
>>
>> -	show_global_task(h, h->global_task);
>> -	for (i = 0; i < h->cpus; i++)
>> -		show_global_task(h, &h->global_percpu_tasks[i]);
>> -
>>   	trace_hash_for_each_bucket(bucket, &h->task_hash) {
>>   		trace_hash_for_each_item(item, bucket) {
>>   			nr_tasks++;
>> @@ -2024,6 +2155,56 @@ static void output_handle(struct handle_data *h)
>>   	free(tasks);
>>   }
>>
>> +static void output_groups(struct handle_data *h)
>> +{
>> +	struct trace_hash_item **bucket;
>> +	struct trace_hash_item *item;
>> +	struct group_data **groups;
>> +	int nr_groups = 0;
>> +	int i;
>> +
>> +	trace_hash_for_each_bucket(bucket, &h->group_hash) {
>> +		trace_hash_for_each_item(item, bucket) {
>> +			nr_groups++;
>> +		}
>> +	}
>> +
>> +	if (nr_groups == 0)
>> +		return;
>> +
>> +	groups = malloc_or_die(sizeof(*groups) * nr_groups);
>> +
>> +	nr_groups = 0;
>> +
>> +	trace_hash_for_each_bucket(bucket, &h->group_hash) {
>> +		trace_hash_while_item(item, bucket) {
>> +			groups[nr_groups++] = group_from_item(item);
>> +			trace_hash_del(item);
>> +		}
>> +	}
>> +
>> +	qsort(groups, nr_groups, sizeof(*groups), compare_groups);
>> +
>> +	for (i = 0; i < nr_groups; i++) {
>> +		output_group(h, groups[i]);
>> +		free_group(groups[i]);
>> +	}
>> +
>> +	free(groups);
>> +}
>> +
>> +static void output_handle(struct handle_data *h)
>> +{
>> +	int i;
>> +
>> +	show_global_task(h, h->global_task);
>> +	for (i = 0; i < h->cpus; i++)
>> +		show_global_task(h, &h->global_percpu_tasks[i]);
>> +
>> +	output_groups(h);
>> +	output_tasks(h);
>> +}
>> +
>
>
> Hmm, I wonder if the combination should be done on output. Because how
> do you handle threads where an event is per task? Like scheduling one
> out and the other one in? What about wake ups? If a bunch of threads
> get woken up, how do we match the wakeups to the tasks?

So the scheduling things are all tracked per task.  I'm not actually 
killing the tasks, I'm adding a grouping with all of the per task 
information collated.

So for example, the schedule wakeup stuff gets handled the same way in 
the per task hash bucket, and then when it completes and we add it to 
the event list we go ahead and move it to the group stuff, after we've 
done all of our accounting.

>
> Would it be easier to just match all the tasks by comm after the
> recording was done? I would think matching pids would be quicker than
> matching strings.
>

We could move it to the end I guess, I did it this way so there wasn't a 
bunch of lag between stopping the profile and then outputting stuff 
because we have to walk all of the tasks and collate its info at that 
point.  Do you feel strongly either way?  I'll change it if you do, but 
otherwise this seems to work fine and doesn't mess up the accounting 
since the per-pid stuff is tracked in the task itself.  Thanks,

Josef


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge
  2015-05-20 17:36   ` Steven Rostedt
@ 2015-05-20 17:54     ` Josef Bacik
  0 siblings, 0 replies; 16+ messages in thread
From: Josef Bacik @ 2015-05-20 17:54 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, kernel-team

On 05/20/2015 01:36 PM, Steven Rostedt wrote:
> On Fri, 8 May 2015 15:29:01 -0400
> Josef Bacik <jbacik@fb.com> wrote:
>
>> With --merge we want to be able to merge the scheduling information as well so
>> we can see how the whole app spent its time.  This is a little tricky because we
>> have to sort based on comm instead of pid.  It works out well enough because we
>> store the intermediate information in the per task hash, but we want to group
>> the final information together.
>
> If we do the merge at the end, we wont need to worry about this.

Yeah that's true, I guess this is probably the best argument for doing 
it this way.  Ignore my last email, I'll go ahead and rework stuff to do 
the grouping at the end.  Thanks,

Josef


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] trace-cmd: lookup syscall names in profile
  2015-05-08 19:29 ` [PATCH 3/5] trace-cmd: lookup syscall names in profile Josef Bacik
@ 2015-05-20 18:00   ` Steven Rostedt
  2015-05-20 18:22     ` Josef Bacik
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2015-05-20 18:00 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-kernel, kernel-team

On Fri, 8 May 2015 15:29:02 -0400
Josef Bacik <jbacik@fb.com> wrote:

> When we profile syscall times we're just given the syscall number, not the name.
> This isn't helpful for human beings, so lookup the syscall names when we are
> printing out the information.  This uses the audit library to get this
> information, I'm not married to it, but it seems that any app that does this
> mapping has to scrape /usr/include/asm/unistd*.h to pull out the names, so might
> as well just use somebody elses work.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  Makefile        |  2 +-
>  trace-profile.c | 19 ++++++++++++++++---
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index 63f7e79..402f711 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -160,7 +160,7 @@ bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
>  plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
>  python_dir_SQ = $(subst ','\'',$(python_dir))
>  
> -LIBS = -L. -ltracecmd -ldl
> +LIBS = -L. -ltracecmd -ldl -laudit

This fails to compile for me because I don't have a audit-libs-devel
installed.

I'll keep this as is, but I'm going to add code to check for this
library, and go back to the default raw numbers if it doesn't exist.

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] trace-cmd: lookup syscall names in profile
  2015-05-20 18:00   ` Steven Rostedt
@ 2015-05-20 18:22     ` Josef Bacik
  2015-05-20 18:56       ` Steven Rostedt
  0 siblings, 1 reply; 16+ messages in thread
From: Josef Bacik @ 2015-05-20 18:22 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, kernel-team

On 05/20/2015 02:00 PM, Steven Rostedt wrote:
> On Fri, 8 May 2015 15:29:02 -0400
> Josef Bacik <jbacik@fb.com> wrote:
>
>> When we profile syscall times we're just given the syscall number, not the name.
>> This isn't helpful for human beings, so lookup the syscall names when we are
>> printing out the information.  This uses the audit library to get this
>> information, I'm not married to it, but it seems that any app that does this
>> mapping has to scrape /usr/include/asm/unistd*.h to pull out the names, so might
>> as well just use somebody elses work.
>>
>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>> ---
>>   Makefile        |  2 +-
>>   trace-profile.c | 19 ++++++++++++++++---
>>   2 files changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/Makefile b/Makefile
>> index 63f7e79..402f711 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -160,7 +160,7 @@ bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
>>   plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
>>   python_dir_SQ = $(subst ','\'',$(python_dir))
>>
>> -LIBS = -L. -ltracecmd -ldl
>> +LIBS = -L. -ltracecmd -ldl -laudit
>
> This fails to compile for me because I don't have a audit-libs-devel
> installed.
>
> I'll keep this as is, but I'm going to add code to check for this
> library, and go back to the default raw numbers if it doesn't exist.
>

Maybe we should think about autoconf at some point?  Thanks,

Josef


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] trace-cmd: lookup syscall names in profile
  2015-05-20 18:22     ` Josef Bacik
@ 2015-05-20 18:56       ` Steven Rostedt
  2015-05-20 19:01         ` Steven Rostedt
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2015-05-20 18:56 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-kernel, kernel-team

On Wed, 20 May 2015 14:22:31 -0400
Josef Bacik <jbacik@fb.com> wrote:

> > I'll keep this as is, but I'm going to add code to check for this
> > library, and go back to the default raw numbers if it doesn't exist.
> >
> 
> Maybe we should think about autoconf at some point?  Thanks,

Egad no! I hate autoconf. And so do many others!

A simple test works fine. I don't want to require libaudit. Just warn
users that compile without it.

I like to install trace-cmd on minimal setups (like embedded devices).
autoconf will kill that.

-- Steve


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] trace-cmd: lookup syscall names in profile
  2015-05-20 18:56       ` Steven Rostedt
@ 2015-05-20 19:01         ` Steven Rostedt
  2015-05-20 19:06           ` Josef Bacik
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2015-05-20 19:01 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-kernel, kernel-team

On Wed, 20 May 2015 14:56:17 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed, 20 May 2015 14:22:31 -0400
> Josef Bacik <jbacik@fb.com> wrote:
> 
> > > I'll keep this as is, but I'm going to add code to check for this
> > > library, and go back to the default raw numbers if it doesn't exist.
> > >
> > 
> > Maybe we should think about autoconf at some point?  Thanks,
> 
> Egad no! I hate autoconf. And so do many others!
> 
> A simple test works fine. I don't want to require libaudit. Just warn
> users that compile without it.
> 
> I like to install trace-cmd on minimal setups (like embedded devices).
> autoconf will kill that.
> 

Here's what I did to fix it:

-- Steve

diff --git a/Makefile b/Makefile
index 402f7114ab42..a8e63ad0c223 100644
--- a/Makefile
+++ b/Makefile
@@ -160,7 +160,7 @@ bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
 plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 python_dir_SQ = $(subst ','\'',$(python_dir))
 
-LIBS = -L. -ltracecmd -ldl -laudit
+LIBS = -L. -ltracecmd -ldl
 LIB_FILE = libtracecmd.a
 
 PACKAGES= gtk+-2.0 libxml-2.0 gthread-2.0
@@ -231,12 +231,25 @@ override CFLAGS += -D_GNU_SOURCE
 ifndef NO_PTRACE
 ifneq ($(call try-cc,$(SOURCE_PTRACE),),y)
 	NO_PTRACE = 1
-	CFLAGS += -DWARN_NO_PTRACE
+	override CFLAGS += -DWARN_NO_PTRACE
 endif
 endif
 
 ifdef NO_PTRACE
-CFLAGS += -DNO_PTRACE
+override CFLAGS += -DNO_PTRACE
+endif
+
+ifndef NO_AUDIT
+ifneq ($(call try-cc,$(SOURCE_AUDIT),-laudit),y)
+	NO_AUDIT = 1
+	override CFLAGS += -DWARN_NO_AUDIT
+endif
+endif
+
+ifdef NO_AUDIT
+override CFLAGS += -DNO_AUDIT
+else
+LIBS += -laudit
 endif
 
 # Append required CFLAGS
diff --git a/features.mk b/features.mk
index cb2e8bd897be..9c9d63c13a23 100644
--- a/features.mk
+++ b/features.mk
@@ -34,3 +34,21 @@ int main (void)
 	return ret;
 }
 endef
+
+define SOURCE_AUDIT
+#include <stdio.h>
+#include <libaudit.h>
+
+int main (void)
+{
+	char *name;
+	int ret;
+	ret = audit_detect_machine();
+	if (ret < 0)
+		return ret;
+	name = audit_syscall_to_name(1, ret);
+	if (!name)
+		return -1;
+	return ret;
+}
+endef
diff --git a/trace-profile.c b/trace-profile.c
index 24f989715f05..640dca822d46 100644
--- a/trace-profile.c
+++ b/trace-profile.c
@@ -23,11 +23,17 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#ifndef NO_AUDIT
 #include <libaudit.h>
-
+#endif
 #include "trace-local.h"
 #include "trace-hash.h"
 
+#ifdef WARN_NO_AUDIT
+# warning "lib audit not found, using raw syscalls "	\
+	"(install libaudit-devel and try again)"
+#endif
+
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
 #define TASK_STATE_MAX		1024
 
@@ -848,6 +854,7 @@ static void func_print(struct trace_seq *s, struct event_hash *event_hash)
 
 static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
 {
+#ifndef NO_AUDIT
 	const char *name = NULL;
 	int machine;
 
@@ -860,6 +867,7 @@ static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
 	trace_seq_printf(s, "syscall:%s", name);
 	return;
 fail:
+#endif
 	trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
 			 (int)event_hash->val);
 }

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] trace-cmd: lookup syscall names in profile
  2015-05-20 19:01         ` Steven Rostedt
@ 2015-05-20 19:06           ` Josef Bacik
  0 siblings, 0 replies; 16+ messages in thread
From: Josef Bacik @ 2015-05-20 19:06 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, kernel-team

On 05/20/2015 03:01 PM, Steven Rostedt wrote:
> On Wed, 20 May 2015 14:56:17 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
>
>> On Wed, 20 May 2015 14:22:31 -0400
>> Josef Bacik <jbacik@fb.com> wrote:
>>
>>>> I'll keep this as is, but I'm going to add code to check for this
>>>> library, and go back to the default raw numbers if it doesn't exist.
>>>>
>>>
>>> Maybe we should think about autoconf at some point?  Thanks,
>>
>> Egad no! I hate autoconf. And so do many others!
>>
>> A simple test works fine. I don't want to require libaudit. Just warn
>> users that compile without it.
>>
>> I like to install trace-cmd on minimal setups (like embedded devices).
>> autoconf will kill that.
>>
>
> Here's what I did to fix it:
>
> -- Steve
>
> diff --git a/Makefile b/Makefile
> index 402f7114ab42..a8e63ad0c223 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -160,7 +160,7 @@ bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
>   plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
>   python_dir_SQ = $(subst ','\'',$(python_dir))
>
> -LIBS = -L. -ltracecmd -ldl -laudit
> +LIBS = -L. -ltracecmd -ldl
>   LIB_FILE = libtracecmd.a
>
>   PACKAGES= gtk+-2.0 libxml-2.0 gthread-2.0
> @@ -231,12 +231,25 @@ override CFLAGS += -D_GNU_SOURCE
>   ifndef NO_PTRACE
>   ifneq ($(call try-cc,$(SOURCE_PTRACE),),y)
>   	NO_PTRACE = 1
> -	CFLAGS += -DWARN_NO_PTRACE
> +	override CFLAGS += -DWARN_NO_PTRACE
>   endif
>   endif
>
>   ifdef NO_PTRACE
> -CFLAGS += -DNO_PTRACE
> +override CFLAGS += -DNO_PTRACE
> +endif
> +
> +ifndef NO_AUDIT
> +ifneq ($(call try-cc,$(SOURCE_AUDIT),-laudit),y)
> +	NO_AUDIT = 1
> +	override CFLAGS += -DWARN_NO_AUDIT
> +endif
> +endif
> +
> +ifdef NO_AUDIT
> +override CFLAGS += -DNO_AUDIT
> +else
> +LIBS += -laudit
>   endif
>
>   # Append required CFLAGS
> diff --git a/features.mk b/features.mk
> index cb2e8bd897be..9c9d63c13a23 100644
> --- a/features.mk
> +++ b/features.mk
> @@ -34,3 +34,21 @@ int main (void)
>   	return ret;
>   }
>   endef
> +
> +define SOURCE_AUDIT
> +#include <stdio.h>
> +#include <libaudit.h>
> +
> +int main (void)
> +{
> +	char *name;
> +	int ret;
> +	ret = audit_detect_machine();
> +	if (ret < 0)
> +		return ret;
> +	name = audit_syscall_to_name(1, ret);
> +	if (!name)
> +		return -1;
> +	return ret;
> +}
> +endef
> diff --git a/trace-profile.c b/trace-profile.c
> index 24f989715f05..640dca822d46 100644
> --- a/trace-profile.c
> +++ b/trace-profile.c
> @@ -23,11 +23,17 @@
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <string.h>
> +#ifndef NO_AUDIT
>   #include <libaudit.h>
> -
> +#endif
>   #include "trace-local.h"
>   #include "trace-hash.h"
>
> +#ifdef WARN_NO_AUDIT
> +# warning "lib audit not found, using raw syscalls "	\
> +	"(install libaudit-devel and try again)"
> +#endif
> +
>   #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
>   #define TASK_STATE_MAX		1024
>
> @@ -848,6 +854,7 @@ static void func_print(struct trace_seq *s, struct event_hash *event_hash)
>
>   static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
>   {
> +#ifndef NO_AUDIT
>   	const char *name = NULL;
>   	int machine;
>
> @@ -860,6 +867,7 @@ static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
>   	trace_seq_printf(s, "syscall:%s", name);
>   	return;
>   fail:
> +#endif
>   	trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
>   			 (int)event_hash->val);
>   }
>

Looks good to me, thanks,

Josef

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-05-20 19:07 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-08 19:28 [PATCH 0/5] trace-cmd profile enhancements Josef Bacik
2015-05-08 19:29 ` [PATCH 1/5] trace-cmd: add option to group like comms for profile Josef Bacik
2015-05-20 17:31   ` Steven Rostedt
2015-05-20 17:51     ` Josef Bacik
2015-05-08 19:29 ` [PATCH 2/5] trace-cmd: group schedule stacktraces together with --merge Josef Bacik
2015-05-20 17:36   ` Steven Rostedt
2015-05-20 17:54     ` Josef Bacik
2015-05-08 19:29 ` [PATCH 3/5] trace-cmd: lookup syscall names in profile Josef Bacik
2015-05-20 18:00   ` Steven Rostedt
2015-05-20 18:22     ` Josef Bacik
2015-05-20 18:56       ` Steven Rostedt
2015-05-20 19:01         ` Steven Rostedt
2015-05-20 19:06           ` Josef Bacik
2015-05-08 19:29 ` [PATCH 4/5] trace-cmd: sort the events in the profile output Josef Bacik
2015-05-08 19:29 ` [PATCH 5/5] trace-cmd: make trace-hist work with instances Josef Bacik
2015-05-08 20:13 ` [PATCH 0/5] trace-cmd profile enhancements Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).