linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hari Bathini <hbathini@linux.vnet.ibm.com>
To: daniel@iogearbox.net, peterz@infradead.org,
	linux-kernel@vger.kernel.org, acme@kernel.org,
	alexander.shishkin@linux.intel.com, mingo@redhat.com,
	paulus@samba.org, ebiederm@xmission.com, kernel@kyup.com,
	rostedt@goodmis.org, viro@zeniv.linux.org.uk
Cc: aravinda@linux.vnet.ibm.com, ananth@in.ibm.com
Subject: [RFC PATCH v2 2/3] tracefs: add instances support for uprobe events
Date: Thu, 28 Jul 2016 02:57:38 +0530	[thread overview]
Message-ID: <146965485827.23765.14920656474693831799.stgit@hbathini.in.ibm.com> (raw)
In-Reply-To: <146965470618.23765.7329786743211962695.stgit@hbathini.in.ibm.com>

If a uprobe event is set on a library function, and if a similar uprobe
event trace is needed for a container, a duplicate is created leaving
the uprobe list with multiple entries of the same function:

  $ perf probe --list
    probe_libc:malloc    (on 0x80490 in /lib64/libc.so.6)
    probe_libc:malloc_1  (on __libc_malloc in /lib64/libc.so.6)
  $

This can soon get out of hand if multiple containers want to probe the
same function/address in their libraries. This patch tries to resolve this
by adding uprobe event trace files to every new instance. Currently, perf
tool can leverage this by using --debugfs-dir option - something like
(assuming instance dir name is 'tracing'):

  $ perf --debugfs-dir=$MOUNT_PNT/instances probe /lib64/libc.so.6 malloc
  $
  $
  $ perf --debugfs-dir=$MOUNT_PNT/instances probe --list
    probe_libc:malloc    (on __libc_malloc in /lib64/libc.so.6)
  $

New uprobe events can be added to the uprobe_events file under the instance
directory and the profile information for these events will be available in
uprobe_profile file in the same instance directory.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
---
 include/linux/trace_events.h |    3 +
 kernel/trace/trace.c         |    2 +
 kernel/trace/trace.h         |   12 +++
 kernel/trace/trace_events.c  |   15 +++-
 kernel/trace/trace_kprobe.c  |    2 -
 kernel/trace/trace_uprobe.c  |  158 +++++++++++++++++++++++++++++++-----------
 6 files changed, 144 insertions(+), 48 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index be00761..f893223 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -451,7 +451,8 @@ extern int trace_event_raw_init(struct trace_event_call *call);
 extern int trace_define_field(struct trace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
-extern int trace_add_event_call(struct trace_event_call *call);
+extern int trace_add_event_call(struct trace_event_call *call,
+				struct trace_array *tr);
 extern int trace_remove_event_call(struct trace_event_call *call);
 extern int trace_event_get_offsets(struct trace_event_call *call);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a4bd6b..23a8111 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6966,6 +6966,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 			&tr->max_latency, &tracing_max_lat_fops);
 #endif
 
+	uprobe_create_trace_files(tr, d_tracer);
+
 	if (ftrace_create_function_files(tr, d_tracer))
 		WARN(1, "Could not allocate function filter files");
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5167c36..a8360e9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -245,6 +245,10 @@ struct trace_array {
 	struct list_head	events;
 	cpumask_var_t		tracing_cpumask; /* only trace on set CPUs */
 	int			ref;
+#ifdef CONFIG_UPROBE_EVENT
+	struct mutex		uprobe_lock;
+	struct list_head	uprobe_list;
+#endif
 #ifdef CONFIG_FUNCTION_TRACER
 	struct ftrace_ops	*ops;
 	/* function tracing enabled */
@@ -819,6 +823,14 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 
 extern struct list_head ftrace_pids;
 
+#ifdef CONFIG_UPROBE_EVENT
+void uprobe_create_trace_files(struct trace_array *tr,
+			       struct dentry *parent);
+#else
+static inline void
+uprobe_create_trace_files(struct trace_array *tr, struct dentry *parent) { }
+#endif /* CONFIG_UPROBE_EVENT */
+
 #ifdef CONFIG_FUNCTION_TRACER
 extern bool ftrace_filter_param __initdata;
 static inline int ftrace_trace_task(struct task_struct *task)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3d41558..2e0f986 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2441,15 +2441,20 @@ struct ftrace_module_file_ops;
 static void __add_event_to_tracers(struct trace_event_call *call);
 
 /* Add an additional event_call dynamically */
-int trace_add_event_call(struct trace_event_call *call)
+int trace_add_event_call(struct trace_event_call *call, struct trace_array *tr)
 {
 	int ret;
 	mutex_lock(&trace_types_lock);
 	mutex_lock(&event_mutex);
 
 	ret = __register_event(call, NULL);
-	if (ret >= 0)
-		__add_event_to_tracers(call);
+	if (ret >= 0) {
+		if (tr)
+			/* If a tracer is specified, add event only to it */
+			__trace_add_new_event(call, tr);
+		else
+			__add_event_to_tracers(call);
+	}
 
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&trace_types_lock);
@@ -2609,6 +2614,10 @@ __trace_add_event_dirs(struct trace_array *tr)
 	int ret;
 
 	list_for_each_entry(call, &ftrace_events, list) {
+		/* Don't add dynamic uprobe events to new tracers */
+		if (call->flags & TRACE_EVENT_FL_UPROBE)
+			continue;
+
 		ret = __trace_add_new_event(call, tr);
 		if (ret < 0)
 			pr_warn("Could not create directory for event %s\n",
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5546eec..b82a328 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1296,7 +1296,7 @@ static int register_kprobe_event(struct trace_kprobe *tk)
 	call->flags = TRACE_EVENT_FL_KPROBE;
 	call->class->reg = kprobe_register;
 	call->data = tk;
-	ret = trace_add_event_call(call);
+	ret = trace_add_event_call(call, NULL);
 	if (ret) {
 		pr_info("Failed to register kprobe event: %s\n",
 			trace_event_name(call));
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c534854..ea8c4e4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -64,12 +64,10 @@ struct trace_uprobe {
 	(offsetof(struct trace_uprobe, tp.args) +	\
 	(sizeof(struct probe_arg) * (n)))
 
-static int register_uprobe_event(struct trace_uprobe *tu);
+static int register_uprobe_event(struct trace_array *tr,
+				 struct trace_uprobe *tu);
 static int unregister_uprobe_event(struct trace_uprobe *tu);
 
-static DEFINE_MUTEX(uprobe_lock);
-static LIST_HEAD(uprobe_list);
-
 struct uprobe_dispatch_data {
 	struct trace_uprobe	*tu;
 	unsigned long		bp_addr;
@@ -288,11 +286,12 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
 	kfree(tu);
 }
 
-static struct trace_uprobe *find_probe_event(const char *event, const char *group)
+static struct trace_uprobe *
+find_probe_event(struct trace_array *tr, const char *event, const char *group)
 {
 	struct trace_uprobe *tu;
 
-	list_for_each_entry(tu, &uprobe_list, list)
+	list_for_each_entry(tu, &tr->uprobe_list, list)
 		if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
 		    strcmp(tu->tp.call.class->system, group) == 0)
 			return tu;
@@ -315,15 +314,16 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)
 }
 
 /* Register a trace_uprobe and probe_event */
-static int register_trace_uprobe(struct trace_uprobe *tu)
+static int register_trace_uprobe(struct trace_array *tr,
+				 struct trace_uprobe *tu)
 {
 	struct trace_uprobe *old_tu;
 	int ret;
 
-	mutex_lock(&uprobe_lock);
+	mutex_lock(&tr->uprobe_lock);
 
 	/* register as an event */
-	old_tu = find_probe_event(trace_event_name(&tu->tp.call),
+	old_tu = find_probe_event(tr, trace_event_name(&tu->tp.call),
 			tu->tp.call.class->system);
 	if (old_tu) {
 		/* delete old event */
@@ -332,16 +332,16 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
 			goto end;
 	}
 
-	ret = register_uprobe_event(tu);
+	ret = register_uprobe_event(tr, tu);
 	if (ret) {
 		pr_warn("Failed to register probe event(%d)\n", ret);
 		goto end;
 	}
 
-	list_add_tail(&tu->list, &uprobe_list);
+	list_add_tail(&tu->list, &tr->uprobe_list);
 
 end:
-	mutex_unlock(&uprobe_lock);
+	mutex_unlock(&tr->uprobe_lock);
 
 	return ret;
 }
@@ -352,7 +352,7 @@ end:
  *
  *  - Remove uprobe: -:[GRP/]EVENT
  */
-static int create_trace_uprobe(int argc, char **argv)
+static int create_trace_uprobe(struct trace_array *tr, int argc, char **argv)
 {
 	struct trace_uprobe *tu;
 	struct inode *inode;
@@ -409,17 +409,17 @@ static int create_trace_uprobe(int argc, char **argv)
 			pr_info("Delete command needs an event name.\n");
 			return -EINVAL;
 		}
-		mutex_lock(&uprobe_lock);
-		tu = find_probe_event(event, group);
+		mutex_lock(&tr->uprobe_lock);
+		tu = find_probe_event(tr, event, group);
 
 		if (!tu) {
-			mutex_unlock(&uprobe_lock);
+			mutex_unlock(&tr->uprobe_lock);
 			pr_info("Event %s/%s doesn't exist.\n", group, event);
 			return -ENOENT;
 		}
 		/* delete an event */
 		ret = unregister_trace_uprobe(tu);
-		mutex_unlock(&uprobe_lock);
+		mutex_unlock(&tr->uprobe_lock);
 		return ret;
 	}
 
@@ -543,7 +543,7 @@ static int create_trace_uprobe(int argc, char **argv)
 		}
 	}
 
-	ret = register_trace_uprobe(tu);
+	ret = register_trace_uprobe(tr, tu);
 	if (ret)
 		goto error;
 	return 0;
@@ -560,37 +560,45 @@ fail_address_parse:
 	return ret;
 }
 
-static int cleanup_all_probes(void)
+static int cleanup_all_probes(struct trace_array *tr)
 {
 	struct trace_uprobe *tu;
 	int ret = 0;
 
-	mutex_lock(&uprobe_lock);
-	while (!list_empty(&uprobe_list)) {
-		tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
+	mutex_lock(&tr->uprobe_lock);
+	while (!list_empty(&tr->uprobe_list)) {
+		tu = list_entry(tr->uprobe_list.next,
+				struct trace_uprobe,
+				list);
 		ret = unregister_trace_uprobe(tu);
 		if (ret)
 			break;
 	}
-	mutex_unlock(&uprobe_lock);
+	mutex_unlock(&tr->uprobe_lock);
 	return ret;
 }
 
 /* Probes listing interfaces */
 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
 {
-	mutex_lock(&uprobe_lock);
-	return seq_list_start(&uprobe_list, *pos);
+	struct trace_array *tr = m->file->f_inode->i_private;
+
+	mutex_lock(&tr->uprobe_lock);
+	return seq_list_start(&tr->uprobe_list, *pos);
 }
 
 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	return seq_list_next(v, &uprobe_list, pos);
+	struct trace_array *tr = m->file->f_inode->i_private;
+
+	return seq_list_next(v, &tr->uprobe_list, pos);
 }
 
 static void probes_seq_stop(struct seq_file *m, void *v)
 {
-	mutex_unlock(&uprobe_lock);
+	struct trace_array *tr = m->file->f_inode->i_private;
+
+	mutex_unlock(&tr->uprobe_lock);
 }
 
 static int probes_seq_show(struct seq_file *m, void *v)
@@ -635,9 +643,10 @@ static const struct seq_operations probes_seq_op = {
 static int probes_open(struct inode *inode, struct file *file)
 {
 	int ret;
+	struct trace_array *tr = inode->i_private;
 
 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
-		ret = cleanup_all_probes();
+		ret = cleanup_all_probes(tr);
 		if (ret)
 			return ret;
 	}
@@ -645,10 +654,72 @@ static int probes_open(struct inode *inode, struct file *file)
 	return seq_open(file, &probes_seq_op);
 }
 
+#define WRITE_BUFSIZE  4096
+
 static ssize_t probes_write(struct file *file, const char __user *buffer,
 			    size_t count, loff_t *ppos)
 {
-	return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
+	char *kbuf, *tmp;
+	char **argv;
+	int argc;
+	int ret = 0;
+	size_t done = 0;
+	size_t size;
+	struct trace_array *tr = file->f_inode->i_private;
+
+	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	while (done < count) {
+		size = count - done;
+
+		if (size >= WRITE_BUFSIZE)
+			size = WRITE_BUFSIZE - 1;
+
+		if (copy_from_user(kbuf, buffer + done, size)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		kbuf[size] = '\0';
+		tmp = strchr(kbuf, '\n');
+
+		if (tmp) {
+			*tmp = '\0';
+			size = tmp - kbuf + 1;
+		} else if (done + size < count) {
+			pr_warn("Line length is too long: Should be less than %d\n",
+				WRITE_BUFSIZE);
+			ret = -EINVAL;
+			goto out;
+		}
+		done += size;
+		/* Remove comments */
+		tmp = strchr(kbuf, '#');
+
+		if (tmp)
+			*tmp = '\0';
+
+		argc = 0;
+		argv = argv_split(GFP_KERNEL, kbuf, &argc);
+		if (!argv) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		if (argc)
+			ret = create_trace_uprobe(tr, argc, argv);
+
+		argv_free(argv);
+		if (ret)
+			goto out;
+	}
+	ret = done;
+
+out:
+	kfree(kbuf);
+
+	return ret;
 }
 
 static const struct file_operations uprobe_events_ops = {
@@ -1290,7 +1361,8 @@ static struct trace_event_functions uprobe_funcs = {
 	.trace		= print_uprobe_event
 };
 
-static int register_uprobe_event(struct trace_uprobe *tu)
+static int register_uprobe_event(struct trace_array *tr,
+				 struct trace_uprobe *tu)
 {
 	struct trace_event_call *call = &tu->tp.call;
 	int ret;
@@ -1312,7 +1384,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
 	call->flags = TRACE_EVENT_FL_UPROBE;
 	call->class->reg = trace_uprobe_register;
 	call->data = tu;
-	ret = trace_add_event_call(call);
+	ret = trace_add_event_call(call, tr);
 
 	if (ret) {
 		pr_info("Failed to register uprobe event: %s\n",
@@ -1338,20 +1410,20 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
 }
 
 /* Make a trace interface for controling probe points */
-static __init int init_uprobe_trace(void)
+void uprobe_create_trace_files(struct trace_array *tr,
+			       struct dentry *parent)
 {
-	struct dentry *d_tracer;
+	if (!tr) {
+		WARN(1, "Need a trace array for uprobe events");
+		return;
+	}
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
-		return 0;
+	mutex_init(&tr->uprobe_lock);
+	INIT_LIST_HEAD(&tr->uprobe_list);
 
-	trace_create_file("uprobe_events", 0644, d_tracer,
-				    NULL, &uprobe_events_ops);
+	trace_create_file("uprobe_events", 0644, parent,
+				tr, &uprobe_events_ops);
 	/* Profile interface */
-	trace_create_file("uprobe_profile", 0444, d_tracer,
-				    NULL, &uprobe_profile_ops);
-	return 0;
+	trace_create_file("uprobe_profile", 0444, parent,
+				tr, &uprobe_profile_ops);
 }
-
-fs_initcall(init_uprobe_trace);

  parent reply	other threads:[~2016-07-27 21:27 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-27 21:27 [RFC PATCH v2 0/3] perf/tracefs: Container-aware tracing support Hari Bathini
2016-07-27 21:27 ` [RFC PATCH v2 1/3] perf: filter container events based on cgroup namespace Hari Bathini
2016-07-27 21:27 ` Hari Bathini [this message]
2016-08-01 21:45   ` [RFC PATCH v2 2/3] tracefs: add instances support for uprobe events Steven Rostedt
2016-08-02 17:27     ` Hari Bathini
2016-08-02 17:32       ` Hari Bathini
2016-08-02 17:49       ` Steven Rostedt
2016-08-03 19:30         ` Aravinda Prasad
2016-08-03 20:10           ` Steven Rostedt
2016-08-03 20:16             ` Aravinda Prasad
2016-08-04  1:04               ` Steven Rostedt
2016-08-04 13:46                 ` Aravinda Prasad
2016-08-04 14:08                   ` Steven Rostedt
2016-08-04 14:34                     ` Aravinda Prasad
2016-07-27 21:27 ` [RFC PATCH v2 3/3] tracefs: add 'newinstance' mount option Hari Bathini
2016-08-04  2:54   ` Eric W. Biederman
2016-08-04 12:26     ` Hari Bathini
2016-08-04 14:12       ` Eric W. Biederman
2016-08-04  2:59 ` [RFC PATCH v2 0/3] perf/tracefs: Container-aware tracing support Eric W. Biederman
2016-08-04 14:48   ` Aravinda Prasad
2016-08-04 18:27     ` Eric W. Biederman
2016-08-04 19:11       ` Aravinda Prasad

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=146965485827.23765.14920656474693831799.stgit@hbathini.in.ibm.com \
    --to=hbathini@linux.vnet.ibm.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=ananth@in.ibm.com \
    --cc=aravinda@linux.vnet.ibm.com \
    --cc=daniel@iogearbox.net \
    --cc=ebiederm@xmission.com \
    --cc=kernel@kyup.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).