linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
@ 2013-06-14  1:44 zhangwei(Jovi)
  2013-06-14 13:21 ` Masami Hiramatsu
  2013-06-14 14:44 ` Oleg Nesterov
  0 siblings, 2 replies; 19+ messages in thread
From: zhangwei(Jovi) @ 2013-06-14  1:44 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Ingo Molnar
  Cc: Masami Hiramatsu, Oleg Nesterov, Srikar Dronamraju, linux-kernel

Support multi-buffer on uprobe-based dynamic events by
using ftrace_event_file.

The code change is based on kprobe-based dynamic events
multibuffer support work commited by Masami(commit 41a7dd420c)

Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 kernel/trace/trace_uprobe.c |  183 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 161 insertions(+), 22 deletions(-)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d5d0cd3..31f08d6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -53,6 +53,7 @@ struct trace_uprobe {
 	struct list_head		list;
 	struct ftrace_event_class	class;
 	struct ftrace_event_call	call;
+	struct ftrace_event_file * __rcu *files;
 	struct trace_uprobe_filter	filter;
 	struct uprobe_consumer		consumer;
 	struct inode			*inode;
@@ -513,7 +514,8 @@ static const struct file_operations uprobe_profile_ops = {
 };

 static void uprobe_trace_print(struct trace_uprobe *tu,
-				unsigned long func, struct pt_regs *regs)
+				unsigned long func, struct pt_regs *regs,
+				struct ftrace_event_file *ftrace_file)
 {
 	struct uprobe_trace_entry_head *entry;
 	struct ring_buffer_event *event;
@@ -522,9 +524,15 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
 	int size, i;
 	struct ftrace_event_call *call = &tu->call;

+	WARN_ON(call != ftrace_file->event_call);
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
+		return;
+
 	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
-	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
-						  size + tu->size, 0, 0);
+	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+						call->event.type,
+						size + tu->size, 0, 0);
 	if (!event)
 		return;

@@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
 /* uprobe handler */
 static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
 {
-	if (!is_ret_probe(tu))
-		uprobe_trace_print(tu, 0, regs);
+	struct ftrace_event_file **file;
+
+	if (is_ret_probe(tu))
+		return 0;
+
+	file = rcu_dereference_raw(tu->files);
+	if (unlikely(!file))
+		return 0;
+
+	while (*file) {
+		uprobe_trace_print(tu, 0, regs, *file);
+		file++;
+	}
+
 	return 0;
 }

 static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
 				struct pt_regs *regs)
 {
-	uprobe_trace_print(tu, func, regs);
+	struct ftrace_event_file **file = rcu_dereference_raw(tu->files);
+
+	if (unlikely(!file))
+		return;
+
+	while (*file) {
+		uprobe_trace_print(tu, func, regs, *file);
+		file++;
+	}
 }

 /* Event entry printers */
@@ -597,6 +625,26 @@ partial:
 	return TRACE_TYPE_PARTIAL_LINE;
 }

+
+static int trace_uprobe_nr_files(struct trace_uprobe *tu)
+{
+	struct ftrace_event_file **file;
+	int ret = 0;
+
+	/*
+	 * Since all tu->files updater is protected by uprobe_enable_lock,
+	 * we don't need to lock an rcu_read_lock.
+	 */
+	file = rcu_dereference_raw(tu->files);
+	if (file)
+		while (*(file++))
+			ret++;
+
+	return ret;
+}
+
+static DEFINE_MUTEX(uprobe_enable_lock);
+
 static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
 {
 	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
@@ -607,33 +655,123 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
 				struct mm_struct *mm);

 static int
-probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
+probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
+		   filter_func_t filter)
 {
+	int enabled = 0;
 	int ret = 0;

+	mutex_lock(&uprobe_enable_lock);
+
 	if (is_trace_uprobe_enabled(tu))
-		return -EINTR;
+		enabled = 1;
+
+	if (file) {
+		struct ftrace_event_file **new, **old;
+		int n = trace_uprobe_nr_files(tu);
+
+		old = rcu_dereference_raw(tu->files);
+		/* 1 is for new one and 1 is for stopper */
+		new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
+			      GFP_KERNEL);
+		if (!new) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
+		memcpy(new, old, n * sizeof(struct ftrace_event_file *));
+		new[n] = file;
+		/* The last one keeps a NULL */
+
+		rcu_assign_pointer(tu->files, new);
+		tu->flags |= TP_FLAG_TRACE;
+
+		if (old) {
+			/* Make sure the probe is done with old files */
+			synchronize_sched();
+			kfree(old);
+		}
+	} else
+		tu->flags |= TP_FLAG_PROFILE;

 	WARN_ON(!uprobe_filter_is_empty(&tu->filter));

-	tu->flags |= flag;
-	tu->consumer.filter = filter;
-	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
-	if (ret)
-		tu->flags &= ~flag;
+	if (!enabled) {
+		tu->consumer.filter = filter;
+		ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+		if (ret)
+			tu->flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
+	}

+ out_unlock:
+	mutex_unlock(&uprobe_enable_lock);
 	return ret;
 }

-static void probe_event_disable(struct trace_uprobe *tu, int flag)
+static int
+trace_uprobe_file_index(struct trace_uprobe *tu, struct ftrace_event_file *file)
 {
-	if (!is_trace_uprobe_enabled(tu))
-		return;
+	struct ftrace_event_file **files;
+	int i;
+
+	/*
+	 * Since all tu->files updater is protected by uprobe_enable_lock,
+	 * we don't need to lock an rcu_read_lock.
+	 */
+	files = rcu_dereference_raw(tu->files);
+	if (files) {
+		for (i = 0; files[i]; i++)
+			if (files[i] == file)
+				return i;
+	}
+
+	return -1;
+}
+
+static void
+probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
+{
+	mutex_lock(&uprobe_enable_lock);
+
+	if (file) {
+		struct ftrace_event_file **new, **old;
+		int n = trace_uprobe_nr_files(tu);
+		int i, j;
+
+		old = rcu_dereference_raw(tu->files);
+		if (n == 0 || trace_uprobe_file_index(tu, file) < 0)
+			goto out_unlock;
+
+		if (n == 1) {	/* Remove the last file */
+			tu->flags &= ~TP_FLAG_TRACE;
+			new = NULL;
+		} else {
+			new = kzalloc(n * sizeof(struct ftrace_event_file *),
+				      GFP_KERNEL);
+			if (!new)
+				goto out_unlock;
+
+			/* This copy & check loop copies the NULL stopper too */
+			for (i = 0, j = 0; j < n && i < n + 1; i++)
+				if (old[i] != file)
+					new[j++] = old[i];
+		}
+
+		rcu_assign_pointer(tu->files, new);
+
+		/* Make sure the probe is done with old files */
+		synchronize_sched();
+		kfree(old);
+	} else
+		tu->flags &= ~TP_FLAG_PROFILE;
+

 	WARN_ON(!uprobe_filter_is_empty(&tu->filter));

-	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
-	tu->flags &= ~flag;
+	if (!is_trace_uprobe_enabled(tu))
+		uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
+
+ out_unlock:
+	mutex_unlock(&uprobe_enable_lock);
 }

 static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -869,21 +1007,22 @@ static
 int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
 {
 	struct trace_uprobe *tu = event->data;
+	struct ftrace_event_file *file = data;

 	switch (type) {
 	case TRACE_REG_REGISTER:
-		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
+		return probe_event_enable(tu, file, NULL);

 	case TRACE_REG_UNREGISTER:
-		probe_event_disable(tu, TP_FLAG_TRACE);
+		probe_event_disable(tu, file);
 		return 0;

 #ifdef CONFIG_PERF_EVENTS
 	case TRACE_REG_PERF_REGISTER:
-		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
+		return probe_event_enable(tu, NULL, uprobe_perf_filter);

 	case TRACE_REG_PERF_UNREGISTER:
-		probe_event_disable(tu, TP_FLAG_PROFILE);
+		probe_event_disable(tu, NULL);
 		return 0;

 	case TRACE_REG_PERF_OPEN:
-- 
1.7.9.7



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14  1:44 [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer zhangwei(Jovi)
@ 2013-06-14 13:21 ` Masami Hiramatsu
  2013-06-14 13:51   ` Oleg Nesterov
  2013-06-14 15:31   ` Steven Rostedt
  2013-06-14 14:44 ` Oleg Nesterov
  1 sibling, 2 replies; 19+ messages in thread
From: Masami Hiramatsu @ 2013-06-14 13:21 UTC (permalink / raw)
  To: zhangwei(Jovi)
  Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

(2013/06/14 10:44), zhangwei(Jovi) wrote:
> Support multi-buffer on uprobe-based dynamic events by
> using ftrace_event_file.
> 
> The code change is based on kprobe-based dynamic events
> multibuffer support work commited by Masami(commit 41a7dd420c)

I'm not so sure that rcu_dereference_raw() can ensure the safety
of accessing pointer in uprobe handler. ( since kprobes disables
irq and preempt, it could use rcu.)

Srikar, Oleg, could you check that?

Thank you,

> 
> Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> ---
>  kernel/trace/trace_uprobe.c |  183 +++++++++++++++++++++++++++++++++++++------
>  1 file changed, 161 insertions(+), 22 deletions(-)
> 
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index d5d0cd3..31f08d6 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -53,6 +53,7 @@ struct trace_uprobe {
>  	struct list_head		list;
>  	struct ftrace_event_class	class;
>  	struct ftrace_event_call	call;
> +	struct ftrace_event_file * __rcu *files;
>  	struct trace_uprobe_filter	filter;
>  	struct uprobe_consumer		consumer;
>  	struct inode			*inode;
> @@ -513,7 +514,8 @@ static const struct file_operations uprobe_profile_ops = {
>  };
> 
>  static void uprobe_trace_print(struct trace_uprobe *tu,
> -				unsigned long func, struct pt_regs *regs)
> +				unsigned long func, struct pt_regs *regs,
> +				struct ftrace_event_file *ftrace_file)
>  {
>  	struct uprobe_trace_entry_head *entry;
>  	struct ring_buffer_event *event;
> @@ -522,9 +524,15 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
>  	int size, i;
>  	struct ftrace_event_call *call = &tu->call;
> 
> +	WARN_ON(call != ftrace_file->event_call);
> +
> +	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
> +		return;
> +
>  	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> -	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
> -						  size + tu->size, 0, 0);
> +	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
> +						call->event.type,
> +						size + tu->size, 0, 0);
>  	if (!event)
>  		return;
> 
> @@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
>  /* uprobe handler */
>  static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
>  {
> -	if (!is_ret_probe(tu))
> -		uprobe_trace_print(tu, 0, regs);
> +	struct ftrace_event_file **file;
> +
> +	if (is_ret_probe(tu))
> +		return 0;
> +
> +	file = rcu_dereference_raw(tu->files);
> +	if (unlikely(!file))
> +		return 0;
> +
> +	while (*file) {
> +		uprobe_trace_print(tu, 0, regs, *file);
> +		file++;
> +	}
> +
>  	return 0;
>  }
> 
>  static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
>  				struct pt_regs *regs)
>  {
> -	uprobe_trace_print(tu, func, regs);
> +	struct ftrace_event_file **file = rcu_dereference_raw(tu->files);
> +
> +	if (unlikely(!file))
> +		return;
> +
> +	while (*file) {
> +		uprobe_trace_print(tu, func, regs, *file);
> +		file++;
> +	}
>  }
> 
>  /* Event entry printers */
> @@ -597,6 +625,26 @@ partial:
>  	return TRACE_TYPE_PARTIAL_LINE;
>  }
> 
> +
> +static int trace_uprobe_nr_files(struct trace_uprobe *tu)
> +{
> +	struct ftrace_event_file **file;
> +	int ret = 0;
> +
> +	/*
> +	 * Since all tu->files updater is protected by uprobe_enable_lock,
> +	 * we don't need to lock an rcu_read_lock.
> +	 */
> +	file = rcu_dereference_raw(tu->files);
> +	if (file)
> +		while (*(file++))
> +			ret++;
> +
> +	return ret;
> +}
> +
> +static DEFINE_MUTEX(uprobe_enable_lock);
> +
>  static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
>  {
>  	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
> @@ -607,33 +655,123 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
>  				struct mm_struct *mm);
> 
>  static int
> -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
> +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
> +		   filter_func_t filter)
>  {
> +	int enabled = 0;
>  	int ret = 0;
> 
> +	mutex_lock(&uprobe_enable_lock);
> +
>  	if (is_trace_uprobe_enabled(tu))
> -		return -EINTR;
> +		enabled = 1;
> +
> +	if (file) {
> +		struct ftrace_event_file **new, **old;
> +		int n = trace_uprobe_nr_files(tu);
> +
> +		old = rcu_dereference_raw(tu->files);
> +		/* 1 is for new one and 1 is for stopper */
> +		new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
> +			      GFP_KERNEL);
> +		if (!new) {
> +			ret = -ENOMEM;
> +			goto out_unlock;
> +		}
> +		memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> +		new[n] = file;
> +		/* The last one keeps a NULL */
> +
> +		rcu_assign_pointer(tu->files, new);
> +		tu->flags |= TP_FLAG_TRACE;
> +
> +		if (old) {
> +			/* Make sure the probe is done with old files */
> +			synchronize_sched();
> +			kfree(old);
> +		}
> +	} else
> +		tu->flags |= TP_FLAG_PROFILE;
> 
>  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
> 
> -	tu->flags |= flag;
> -	tu->consumer.filter = filter;
> -	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
> -	if (ret)
> -		tu->flags &= ~flag;
> +	if (!enabled) {
> +		tu->consumer.filter = filter;
> +		ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
> +		if (ret)
> +			tu->flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
> +	}
> 
> + out_unlock:
> +	mutex_unlock(&uprobe_enable_lock);
>  	return ret;
>  }
> 
> -static void probe_event_disable(struct trace_uprobe *tu, int flag)
> +static int
> +trace_uprobe_file_index(struct trace_uprobe *tu, struct ftrace_event_file *file)
>  {
> -	if (!is_trace_uprobe_enabled(tu))
> -		return;
> +	struct ftrace_event_file **files;
> +	int i;
> +
> +	/*
> +	 * Since all tu->files updater is protected by uprobe_enable_lock,
> +	 * we don't need to lock an rcu_read_lock.
> +	 */
> +	files = rcu_dereference_raw(tu->files);
> +	if (files) {
> +		for (i = 0; files[i]; i++)
> +			if (files[i] == file)
> +				return i;
> +	}
> +
> +	return -1;
> +}
> +
> +static void
> +probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
> +{
> +	mutex_lock(&uprobe_enable_lock);
> +
> +	if (file) {
> +		struct ftrace_event_file **new, **old;
> +		int n = trace_uprobe_nr_files(tu);
> +		int i, j;
> +
> +		old = rcu_dereference_raw(tu->files);
> +		if (n == 0 || trace_uprobe_file_index(tu, file) < 0)
> +			goto out_unlock;
> +
> +		if (n == 1) {	/* Remove the last file */
> +			tu->flags &= ~TP_FLAG_TRACE;
> +			new = NULL;
> +		} else {
> +			new = kzalloc(n * sizeof(struct ftrace_event_file *),
> +				      GFP_KERNEL);
> +			if (!new)
> +				goto out_unlock;
> +
> +			/* This copy & check loop copies the NULL stopper too */
> +			for (i = 0, j = 0; j < n && i < n + 1; i++)
> +				if (old[i] != file)
> +					new[j++] = old[i];
> +		}
> +
> +		rcu_assign_pointer(tu->files, new);
> +
> +		/* Make sure the probe is done with old files */
> +		synchronize_sched();
> +		kfree(old);
> +	} else
> +		tu->flags &= ~TP_FLAG_PROFILE;
> +
> 
>  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
> 
> -	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
> -	tu->flags &= ~flag;
> +	if (!is_trace_uprobe_enabled(tu))
> +		uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
> +
> + out_unlock:
> +	mutex_unlock(&uprobe_enable_lock);
>  }
> 
>  static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
> @@ -869,21 +1007,22 @@ static
>  int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
>  {
>  	struct trace_uprobe *tu = event->data;
> +	struct ftrace_event_file *file = data;
> 
>  	switch (type) {
>  	case TRACE_REG_REGISTER:
> -		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
> +		return probe_event_enable(tu, file, NULL);
> 
>  	case TRACE_REG_UNREGISTER:
> -		probe_event_disable(tu, TP_FLAG_TRACE);
> +		probe_event_disable(tu, file);
>  		return 0;
> 
>  #ifdef CONFIG_PERF_EVENTS
>  	case TRACE_REG_PERF_REGISTER:
> -		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
> +		return probe_event_enable(tu, NULL, uprobe_perf_filter);
> 
>  	case TRACE_REG_PERF_UNREGISTER:
> -		probe_event_disable(tu, TP_FLAG_PROFILE);
> +		probe_event_disable(tu, NULL);
>  		return 0;
> 
>  	case TRACE_REG_PERF_OPEN:
> 


-- 
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 13:21 ` Masami Hiramatsu
@ 2013-06-14 13:51   ` Oleg Nesterov
  2013-06-14 14:09     ` Oleg Nesterov
  2013-06-14 15:31   ` Steven Rostedt
  1 sibling, 1 reply; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-14 13:51 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: zhangwei(Jovi),
	Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On 06/14, Masami Hiramatsu wrote:
>
> Srikar, Oleg, could you check that?

I'll definitely try to read this series on Monday.

Thanks,

Oleg.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 13:51   ` Oleg Nesterov
@ 2013-06-14 14:09     ` Oleg Nesterov
  0 siblings, 0 replies; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-14 14:09 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: zhangwei(Jovi),
	Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On 06/14, Oleg Nesterov wrote:
>
> On 06/14, Masami Hiramatsu wrote:
> >
> > Srikar, Oleg, could you check that?
>
> I'll definitely try to read this series on Monday.

Yes, but Masami is certainly right. synchronize_sched() can't help
without rcu lock in uprobe_trace_func/etc.

Also, probe_event_enable() can use krealloc() but this is minor.

Oleg.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14  1:44 [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer zhangwei(Jovi)
  2013-06-14 13:21 ` Masami Hiramatsu
@ 2013-06-14 14:44 ` Oleg Nesterov
  2013-06-14 16:04   ` ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer) Oleg Nesterov
  2013-06-20 16:43   ` [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer Oleg Nesterov
  1 sibling, 2 replies; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-14 14:44 UTC (permalink / raw)
  To: zhangwei(Jovi)
  Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
	Masami Hiramatsu, Srikar Dronamraju, linux-kernel

On 06/14, zhangwei(Jovi) wrote:
>
> Support multi-buffer on uprobe-based dynamic events by
> using ftrace_event_file.
>
> The code change is based on kprobe-based dynamic events
> multibuffer support work commited by Masami(commit 41a7dd420c)

And the change in probe_event_enable() doesn't look right, but
let me repeat I didn't read the patch carefully yet.

> +static DEFINE_MUTEX(uprobe_enable_lock);
> +
>  static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
>  {
>  	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
> @@ -607,33 +655,123 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
>  				struct mm_struct *mm);
> 
>  static int
> -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
> +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
> +		   filter_func_t filter)
>  {
> +	int enabled = 0;
>  	int ret = 0;
> 
> +	mutex_lock(&uprobe_enable_lock);

Do we really need this? Can't we really on mutex_event hold by the caller?

>  	if (is_trace_uprobe_enabled(tu))
> -		return -EINTR;
> +		enabled = 1;
> +
> +	if (file) {
> +		struct ftrace_event_file **new, **old;
> +		int n = trace_uprobe_nr_files(tu);
> +
> +		old = rcu_dereference_raw(tu->files);
> +		/* 1 is for new one and 1 is for stopper */
> +		new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
> +			      GFP_KERNEL);
> +		if (!new) {
> +			ret = -ENOMEM;
> +			goto out_unlock;
> +		}
> +		memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> +		new[n] = file;
> +		/* The last one keeps a NULL */
> +
> +		rcu_assign_pointer(tu->files, new);
> +		tu->flags |= TP_FLAG_TRACE;
> +
> +		if (old) {
> +			/* Make sure the probe is done with old files */
> +			synchronize_sched();
> +			kfree(old);
> +		}
> +	} else
> +		tu->flags |= TP_FLAG_PROFILE;

So it can set both TP_FLAG_TRACE and TP_FLAG_PROFILE, yes?

If yes, this is not right. Until we change the pre-filtering at least.
Currently TP_FLAG_TRACE/TP_FLAG_PROFILE are mutually exclusive.

I think it makes sense to remove this limitation anyway, and in fact
I do not remember why I didn't do this... But this needs a separate
change.

Oleg.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 13:21 ` Masami Hiramatsu
  2013-06-14 13:51   ` Oleg Nesterov
@ 2013-06-14 15:31   ` Steven Rostedt
  2013-06-14 16:21     ` Paul E. McKenney
  1 sibling, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2013-06-14 15:31 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt,
	Paul E. McKenney

On Fri, 2013-06-14 at 22:21 +0900, Masami Hiramatsu wrote:
> (2013/06/14 10:44), zhangwei(Jovi) wrote:
> > Support multi-buffer on uprobe-based dynamic events by
> > using ftrace_event_file.
> > 
> > The code change is based on kprobe-based dynamic events
> > multibuffer support work commited by Masami(commit 41a7dd420c)
> 
> I'm not so sure that rcu_dereference_raw() can ensure the safety
> of accessing pointer in uprobe handler. ( since kprobes disables
> irq and preempt, it could use rcu.)

Correct.

> 
> Srikar, Oleg, could you check that?
> 
> Thank you,
> 
> > 
> > Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
> > Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> > Cc: Frederic Weisbecker <fweisbec@gmail.com>
> > Cc: Oleg Nesterov <oleg@redhat.com>
> > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> > ---
> >  kernel/trace/trace_uprobe.c |  183 +++++++++++++++++++++++++++++++++++++------
> >  1 file changed, 161 insertions(+), 22 deletions(-)
> > 
> > diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> > index d5d0cd3..31f08d6 100644
> > --- a/kernel/trace/trace_uprobe.c
> > +++ b/kernel/trace/trace_uprobe.c
> > @@ -53,6 +53,7 @@ struct trace_uprobe {
> >  	struct list_head		list;
> >  	struct ftrace_event_class	class;
> >  	struct ftrace_event_call	call;
> > +	struct ftrace_event_file * __rcu *files;
> >  	struct trace_uprobe_filter	filter;
> >  	struct uprobe_consumer		consumer;
> >  	struct inode			*inode;
> > @@ -513,7 +514,8 @@ static const struct file_operations uprobe_profile_ops = {
> >  };
> > 
> >  static void uprobe_trace_print(struct trace_uprobe *tu,
> > -				unsigned long func, struct pt_regs *regs)
> > +				unsigned long func, struct pt_regs *regs,
> > +				struct ftrace_event_file *ftrace_file)
> >  {
> >  	struct uprobe_trace_entry_head *entry;
> >  	struct ring_buffer_event *event;
> > @@ -522,9 +524,15 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> >  	int size, i;
> >  	struct ftrace_event_call *call = &tu->call;
> > 
> > +	WARN_ON(call != ftrace_file->event_call);
> > +
> > +	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
> > +		return;
> > +
> >  	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> > -	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
> > -						  size + tu->size, 0, 0);
> > +	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
> > +						call->event.type,
> > +						size + tu->size, 0, 0);
> >  	if (!event)
> >  		return;
> > 
> > @@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> >  /* uprobe handler */
> >  static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
> >  {
> > -	if (!is_ret_probe(tu))
> > -		uprobe_trace_print(tu, 0, regs);
> > +	struct ftrace_event_file **file;
> > +
> > +	if (is_ret_probe(tu))
> > +		return 0;
> > +
> > +	file = rcu_dereference_raw(tu->files);

Why are you using rcu_dereference_raw() and not rcu_dereference(). The
_raw() is a bit special, and unless you know what you are doing with RCU
here, don't use it.

As I see you using rcu_dereference_raw() all over this patch, along with
mutexes, I believe that you are not using RCU correctly here.

-- Steve

> > +	if (unlikely(!file))
> > +		return 0;
> > +
> > +	while (*file) {
> > +		uprobe_trace_print(tu, 0, regs, *file);
> > +		file++;
> > +	}
> > +
> >  	return 0;
> >  }
> > 
> >  static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
> >  				struct pt_regs *regs)
> >  {
> > -	uprobe_trace_print(tu, func, regs);
> > +	struct ftrace_event_file **file = rcu_dereference_raw(tu->files);
> > +
> > +	if (unlikely(!file))
> > +		return;
> > +
> > +	while (*file) {
> > +		uprobe_trace_print(tu, func, regs, *file);
> > +		file++;
> > +	}
> >  }
> > 
> >  /* Event entry printers */
> > @@ -597,6 +625,26 @@ partial:
> >  	return TRACE_TYPE_PARTIAL_LINE;
> >  }
> > 
> > +
> > +static int trace_uprobe_nr_files(struct trace_uprobe *tu)
> > +{
> > +	struct ftrace_event_file **file;
> > +	int ret = 0;
> > +
> > +	/*
> > +	 * Since all tu->files updater is protected by uprobe_enable_lock,
> > +	 * we don't need to lock an rcu_read_lock.
> > +	 */
> > +	file = rcu_dereference_raw(tu->files);
> > +	if (file)
> > +		while (*(file++))
> > +			ret++;
> > +
> > +	return ret;
> > +}
> > +
> > +static DEFINE_MUTEX(uprobe_enable_lock);
> > +
> >  static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
> >  {
> >  	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
> > @@ -607,33 +655,123 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
> >  				struct mm_struct *mm);
> > 
> >  static int
> > -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
> > +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
> > +		   filter_func_t filter)
> >  {
> > +	int enabled = 0;
> >  	int ret = 0;
> > 
> > +	mutex_lock(&uprobe_enable_lock);
> > +
> >  	if (is_trace_uprobe_enabled(tu))
> > -		return -EINTR;
> > +		enabled = 1;
> > +
> > +	if (file) {
> > +		struct ftrace_event_file **new, **old;
> > +		int n = trace_uprobe_nr_files(tu);
> > +
> > +		old = rcu_dereference_raw(tu->files);
> > +		/* 1 is for new one and 1 is for stopper */
> > +		new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
> > +			      GFP_KERNEL);
> > +		if (!new) {
> > +			ret = -ENOMEM;
> > +			goto out_unlock;
> > +		}
> > +		memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> > +		new[n] = file;
> > +		/* The last one keeps a NULL */
> > +
> > +		rcu_assign_pointer(tu->files, new);
> > +		tu->flags |= TP_FLAG_TRACE;
> > +
> > +		if (old) {
> > +			/* Make sure the probe is done with old files */
> > +			synchronize_sched();
> > +			kfree(old);
> > +		}
> > +	} else
> > +		tu->flags |= TP_FLAG_PROFILE;
> > 
> >  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
> > 
> > -	tu->flags |= flag;
> > -	tu->consumer.filter = filter;
> > -	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
> > -	if (ret)
> > -		tu->flags &= ~flag;
> > +	if (!enabled) {
> > +		tu->consumer.filter = filter;
> > +		ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
> > +		if (ret)
> > +			tu->flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
> > +	}
> > 
> > + out_unlock:
> > +	mutex_unlock(&uprobe_enable_lock);
> >  	return ret;
> >  }
> > 
> > -static void probe_event_disable(struct trace_uprobe *tu, int flag)
> > +static int
> > +trace_uprobe_file_index(struct trace_uprobe *tu, struct ftrace_event_file *file)
> >  {
> > -	if (!is_trace_uprobe_enabled(tu))
> > -		return;
> > +	struct ftrace_event_file **files;
> > +	int i;
> > +
> > +	/*
> > +	 * Since all tu->files updater is protected by uprobe_enable_lock,
> > +	 * we don't need to lock an rcu_read_lock.
> > +	 */
> > +	files = rcu_dereference_raw(tu->files);
> > +	if (files) {
> > +		for (i = 0; files[i]; i++)
> > +			if (files[i] == file)
> > +				return i;
> > +	}
> > +
> > +	return -1;
> > +}
> > +
> > +static void
> > +probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
> > +{
> > +	mutex_lock(&uprobe_enable_lock);
> > +
> > +	if (file) {
> > +		struct ftrace_event_file **new, **old;
> > +		int n = trace_uprobe_nr_files(tu);
> > +		int i, j;
> > +
> > +		old = rcu_dereference_raw(tu->files);
> > +		if (n == 0 || trace_uprobe_file_index(tu, file) < 0)
> > +			goto out_unlock;
> > +
> > +		if (n == 1) {	/* Remove the last file */
> > +			tu->flags &= ~TP_FLAG_TRACE;
> > +			new = NULL;
> > +		} else {
> > +			new = kzalloc(n * sizeof(struct ftrace_event_file *),
> > +				      GFP_KERNEL);
> > +			if (!new)
> > +				goto out_unlock;
> > +
> > +			/* This copy & check loop copies the NULL stopper too */
> > +			for (i = 0, j = 0; j < n && i < n + 1; i++)
> > +				if (old[i] != file)
> > +					new[j++] = old[i];
> > +		}
> > +
> > +		rcu_assign_pointer(tu->files, new);
> > +
> > +		/* Make sure the probe is done with old files */
> > +		synchronize_sched();
> > +		kfree(old);
> > +	} else
> > +		tu->flags &= ~TP_FLAG_PROFILE;
> > +
> > 
> >  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
> > 
> > -	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
> > -	tu->flags &= ~flag;
> > +	if (!is_trace_uprobe_enabled(tu))
> > +		uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
> > +
> > + out_unlock:
> > +	mutex_unlock(&uprobe_enable_lock);
> >  }
> > 
> >  static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
> > @@ -869,21 +1007,22 @@ static
> >  int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
> >  {
> >  	struct trace_uprobe *tu = event->data;
> > +	struct ftrace_event_file *file = data;
> > 
> >  	switch (type) {
> >  	case TRACE_REG_REGISTER:
> > -		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
> > +		return probe_event_enable(tu, file, NULL);
> > 
> >  	case TRACE_REG_UNREGISTER:
> > -		probe_event_disable(tu, TP_FLAG_TRACE);
> > +		probe_event_disable(tu, file);
> >  		return 0;
> > 
> >  #ifdef CONFIG_PERF_EVENTS
> >  	case TRACE_REG_PERF_REGISTER:
> > -		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
> > +		return probe_event_enable(tu, NULL, uprobe_perf_filter);
> > 
> >  	case TRACE_REG_PERF_UNREGISTER:
> > -		probe_event_disable(tu, TP_FLAG_PROFILE);
> > +		probe_event_disable(tu, NULL);
> >  		return 0;
> > 
> >  	case TRACE_REG_PERF_OPEN:
> > 
> 
> 



^ permalink raw reply	[flat|nested] 19+ messages in thread

* ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer)
  2013-06-14 14:44 ` Oleg Nesterov
@ 2013-06-14 16:04   ` Oleg Nesterov
  2013-06-14 16:18     ` Oleg Nesterov
  2013-06-14 16:26     ` Steven Rostedt
  2013-06-20 16:43   ` [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer Oleg Nesterov
  1 sibling, 2 replies; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-14 16:04 UTC (permalink / raw)
  To: zhangwei(Jovi), Steven Rostedt, Masami Hiramatsu
  Cc: Frederic Weisbecker, Ingo Molnar, Srikar Dronamraju, linux-kernel

On 06/14, Oleg Nesterov wrote:
>
> > -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
> > +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
> > +		   filter_func_t filter)
> >  {
> > +	int enabled = 0;
> >  	int ret = 0;
> >
> > +	mutex_lock(&uprobe_enable_lock);
>
> Do we really need this? Can't we really on mutex_event hold by the caller?

Looks like, kprobes do not need probe_enable_lock too.

Steven, Masami, I just looked at this new multibuffer code. Not sure
I really understand it, but it seems that ftrace_event_file should
help its users.

Lets look at enable_trace_probe(). Firstly, "ftrace_event_file **files"
and the add/remove code doesn't look very nice, list_head looks more
convenient.

But the main problem is, synchronize_sched() is slow and it is called
under the global event_mutex.

So perhaps something like below (untested) makes sense? With this patch
we can trivially convert trace_kprobe.c to use list_add/del/each_rcu.

What do you think?

Oleg.


--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -294,8 +294,32 @@ struct ftrace_event_file {
 	 */
 	unsigned long		flags;
 	atomic_t		sm_ref;	/* soft-mode reference counter */
+	atomic_t		refcnt;
+	struct rcu_head		rcu;
 };
 
+struct event_file_link {
+	struct ftrace_event_file	*file;
+	struct list_head		list;
+	struct rcu_head			rcu;
+};
+
+extern void rcu_free_event_file_link(struct rcu_head *rcu);
+
+static inline struct event_file_link *
+alloc_event_file_link(struct ftrace_event_file *file)
+{
+	struct event_file_link *link = kmalloc(sizeof(*link), GFP_KERNEL);
+	if (link)
+		link->file = file;
+	return link;
+}
+
+static inline void free_event_file_link(struct event_file_link *link)
+{
+	call_rcu(&link->rcu, rcu_free_event_file_link);
+}
+
 #define __TRACE_EVENT_FLAGS(name, value)				\
 	static int __init trace_init_flags_##name(void)			\
 	{								\
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1542,6 +1542,7 @@ trace_create_new_event(struct ftrace_event_call *call,
 	file->event_call = call;
 	file->tr = tr;
 	atomic_set(&file->sm_ref, 0);
+	atomic_set(&file->refcnt, 1);
 	list_add(&file->list, &tr->events);
 
 	return file;
@@ -2182,6 +2183,17 @@ __trace_early_add_events(struct trace_array *tr)
 	}
 }
 
+static void put_event_file(struct ftrace_event_file *file)
+{
+	if (atomic_dec_and_test(&file->refcnt))
+		kmem_cache_free(file_cachep, file);
+}
+
+static void delayed_put_event_file(struct rcu_head *rcu)
+{
+	put_event_file(container_of(rcu, struct ftrace_event_file, rcu));
+}
+
 /* Remove the event directory structure for a trace directory. */
 static void
 __trace_remove_event_dirs(struct trace_array *tr)
@@ -2192,10 +2204,18 @@ __trace_remove_event_dirs(struct trace_array *tr)
 		list_del(&file->list);
 		debugfs_remove_recursive(file->dir);
 		remove_subsystem(file->system);
-		kmem_cache_free(file_cachep, file);
+		call_rcu(&file->rcu, delayed_put_event_file);
 	}
 }
 
+void rcu_free_event_file_link(struct rcu_head *rcu)
+{
+	struct event_file_link *link =
+			container_of(rcu, struct event_file_link, rcu);
+	put_event_file(link->file);
+	kfree(link);
+}
+
 static void
 __add_event_to_tracers(struct ftrace_event_call *call,
 		       struct ftrace_module_file_ops *file_ops)


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer)
  2013-06-14 16:04   ` ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer) Oleg Nesterov
@ 2013-06-14 16:18     ` Oleg Nesterov
  2013-06-14 16:26     ` Steven Rostedt
  1 sibling, 0 replies; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-14 16:18 UTC (permalink / raw)
  To: zhangwei(Jovi), Steven Rostedt, Masami Hiramatsu
  Cc: Frederic Weisbecker, Ingo Molnar, Srikar Dronamraju, linux-kernel

On 06/14, Oleg Nesterov wrote:
>
> So perhaps something like below (untested) makes sense? With this patch
> we can trivially convert trace_kprobe.c to use list_add/del/each_rcu.

Forgot to mention, the patch is obviously incomplete, __kprobe_trace_func()
can see the "dead" file even if its memory can't go away. But this looks
fixable.

> +static inline struct event_file_link *
> +alloc_event_file_link(struct ftrace_event_file *file)
> +{
> +	struct event_file_link *link = kmalloc(sizeof(*link), GFP_KERNEL);
> +	if (link)
> +		link->file = file;
> +	return link;
> +}

And this lacks atomic_inc(file->refcnt).

In short, this is just to explain what I meant, the actual change should
probably differ.

Oleg.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 15:31   ` Steven Rostedt
@ 2013-06-14 16:21     ` Paul E. McKenney
  2013-06-14 16:33       ` Steven Rostedt
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2013-06-14 16:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Masami Hiramatsu, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On Fri, Jun 14, 2013 at 11:31:58AM -0400, Steven Rostedt wrote:
> On Fri, 2013-06-14 at 22:21 +0900, Masami Hiramatsu wrote:
> > (2013/06/14 10:44), zhangwei(Jovi) wrote:
> > > Support multi-buffer on uprobe-based dynamic events by
> > > using ftrace_event_file.
> > > 
> > > The code change is based on kprobe-based dynamic events
> > > multibuffer support work commited by Masami(commit 41a7dd420c)
> > 
> > I'm not so sure that rcu_dereference_raw() can ensure the safety
> > of accessing pointer in uprobe handler. ( since kprobes disables
> > irq and preempt, it could use rcu.)
> 
> Correct.
> 
> > 
> > Srikar, Oleg, could you check that?
> > 
> > Thank you,
> > 
> > > 
> > > Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
> > > Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> > > Cc: Frederic Weisbecker <fweisbec@gmail.com>
> > > Cc: Oleg Nesterov <oleg@redhat.com>
> > > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> > > ---
> > >  kernel/trace/trace_uprobe.c |  183 +++++++++++++++++++++++++++++++++++++------
> > >  1 file changed, 161 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> > > index d5d0cd3..31f08d6 100644
> > > --- a/kernel/trace/trace_uprobe.c
> > > +++ b/kernel/trace/trace_uprobe.c
> > > @@ -53,6 +53,7 @@ struct trace_uprobe {
> > >  	struct list_head		list;
> > >  	struct ftrace_event_class	class;
> > >  	struct ftrace_event_call	call;
> > > +	struct ftrace_event_file * __rcu *files;
> > >  	struct trace_uprobe_filter	filter;
> > >  	struct uprobe_consumer		consumer;
> > >  	struct inode			*inode;
> > > @@ -513,7 +514,8 @@ static const struct file_operations uprobe_profile_ops = {
> > >  };
> > > 
> > >  static void uprobe_trace_print(struct trace_uprobe *tu,
> > > -				unsigned long func, struct pt_regs *regs)
> > > +				unsigned long func, struct pt_regs *regs,
> > > +				struct ftrace_event_file *ftrace_file)
> > >  {
> > >  	struct uprobe_trace_entry_head *entry;
> > >  	struct ring_buffer_event *event;
> > > @@ -522,9 +524,15 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> > >  	int size, i;
> > >  	struct ftrace_event_call *call = &tu->call;
> > > 
> > > +	WARN_ON(call != ftrace_file->event_call);
> > > +
> > > +	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
> > > +		return;
> > > +
> > >  	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> > > -	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
> > > -						  size + tu->size, 0, 0);
> > > +	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
> > > +						call->event.type,
> > > +						size + tu->size, 0, 0);
> > >  	if (!event)
> > >  		return;
> > > 
> > > @@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> > >  /* uprobe handler */
> > >  static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
> > >  {
> > > -	if (!is_ret_probe(tu))
> > > -		uprobe_trace_print(tu, 0, regs);
> > > +	struct ftrace_event_file **file;
> > > +
> > > +	if (is_ret_probe(tu))
> > > +		return 0;
> > > +
> > > +	file = rcu_dereference_raw(tu->files);
> 
> Why are you using rcu_dereference_raw() and not rcu_dereference(). The
> _raw() is a bit special, and unless you know what you are doing with RCU
> here, don't use it.
> 
> As I see you using rcu_dereference_raw() all over this patch, along with
> mutexes, I believe that you are not using RCU correctly here.

If irqs and preempt are disabled, I suggest using rcu_dereference_sched().
That is what it is there for.  ;-)

							Thanx, Paul

> -- Steve
> 
> > > +	if (unlikely(!file))
> > > +		return 0;
> > > +
> > > +	while (*file) {
> > > +		uprobe_trace_print(tu, 0, regs, *file);
> > > +		file++;
> > > +	}
> > > +
> > >  	return 0;
> > >  }
> > > 
> > >  static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
> > >  				struct pt_regs *regs)
> > >  {
> > > -	uprobe_trace_print(tu, func, regs);
> > > +	struct ftrace_event_file **file = rcu_dereference_raw(tu->files);
> > > +
> > > +	if (unlikely(!file))
> > > +		return;
> > > +
> > > +	while (*file) {
> > > +		uprobe_trace_print(tu, func, regs, *file);
> > > +		file++;
> > > +	}
> > >  }
> > > 
> > >  /* Event entry printers */
> > > @@ -597,6 +625,26 @@ partial:
> > >  	return TRACE_TYPE_PARTIAL_LINE;
> > >  }
> > > 
> > > +
> > > +static int trace_uprobe_nr_files(struct trace_uprobe *tu)
> > > +{
> > > +	struct ftrace_event_file **file;
> > > +	int ret = 0;
> > > +
> > > +	/*
> > > +	 * Since all tu->files updater is protected by uprobe_enable_lock,
> > > +	 * we don't need to lock an rcu_read_lock.
> > > +	 */
> > > +	file = rcu_dereference_raw(tu->files);
> > > +	if (file)
> > > +		while (*(file++))
> > > +			ret++;
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static DEFINE_MUTEX(uprobe_enable_lock);
> > > +
> > >  static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
> > >  {
> > >  	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
> > > @@ -607,33 +655,123 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
> > >  				struct mm_struct *mm);
> > > 
> > >  static int
> > > -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
> > > +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
> > > +		   filter_func_t filter)
> > >  {
> > > +	int enabled = 0;
> > >  	int ret = 0;
> > > 
> > > +	mutex_lock(&uprobe_enable_lock);
> > > +
> > >  	if (is_trace_uprobe_enabled(tu))
> > > -		return -EINTR;
> > > +		enabled = 1;
> > > +
> > > +	if (file) {
> > > +		struct ftrace_event_file **new, **old;
> > > +		int n = trace_uprobe_nr_files(tu);
> > > +
> > > +		old = rcu_dereference_raw(tu->files);
> > > +		/* 1 is for new one and 1 is for stopper */
> > > +		new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
> > > +			      GFP_KERNEL);
> > > +		if (!new) {
> > > +			ret = -ENOMEM;
> > > +			goto out_unlock;
> > > +		}
> > > +		memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> > > +		new[n] = file;
> > > +		/* The last one keeps a NULL */
> > > +
> > > +		rcu_assign_pointer(tu->files, new);
> > > +		tu->flags |= TP_FLAG_TRACE;
> > > +
> > > +		if (old) {
> > > +			/* Make sure the probe is done with old files */
> > > +			synchronize_sched();
> > > +			kfree(old);
> > > +		}
> > > +	} else
> > > +		tu->flags |= TP_FLAG_PROFILE;
> > > 
> > >  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
> > > 
> > > -	tu->flags |= flag;
> > > -	tu->consumer.filter = filter;
> > > -	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
> > > -	if (ret)
> > > -		tu->flags &= ~flag;
> > > +	if (!enabled) {
> > > +		tu->consumer.filter = filter;
> > > +		ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
> > > +		if (ret)
> > > +			tu->flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
> > > +	}
> > > 
> > > + out_unlock:
> > > +	mutex_unlock(&uprobe_enable_lock);
> > >  	return ret;
> > >  }
> > > 
> > > -static void probe_event_disable(struct trace_uprobe *tu, int flag)
> > > +static int
> > > +trace_uprobe_file_index(struct trace_uprobe *tu, struct ftrace_event_file *file)
> > >  {
> > > -	if (!is_trace_uprobe_enabled(tu))
> > > -		return;
> > > +	struct ftrace_event_file **files;
> > > +	int i;
> > > +
> > > +	/*
> > > +	 * Since all tu->files updater is protected by uprobe_enable_lock,
> > > +	 * we don't need to lock an rcu_read_lock.
> > > +	 */
> > > +	files = rcu_dereference_raw(tu->files);
> > > +	if (files) {
> > > +		for (i = 0; files[i]; i++)
> > > +			if (files[i] == file)
> > > +				return i;
> > > +	}
> > > +
> > > +	return -1;
> > > +}
> > > +
> > > +static void
> > > +probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
> > > +{
> > > +	mutex_lock(&uprobe_enable_lock);
> > > +
> > > +	if (file) {
> > > +		struct ftrace_event_file **new, **old;
> > > +		int n = trace_uprobe_nr_files(tu);
> > > +		int i, j;
> > > +
> > > +		old = rcu_dereference_raw(tu->files);
> > > +		if (n == 0 || trace_uprobe_file_index(tu, file) < 0)
> > > +			goto out_unlock;
> > > +
> > > +		if (n == 1) {	/* Remove the last file */
> > > +			tu->flags &= ~TP_FLAG_TRACE;
> > > +			new = NULL;
> > > +		} else {
> > > +			new = kzalloc(n * sizeof(struct ftrace_event_file *),
> > > +				      GFP_KERNEL);
> > > +			if (!new)
> > > +				goto out_unlock;
> > > +
> > > +			/* This copy & check loop copies the NULL stopper too */
> > > +			for (i = 0, j = 0; j < n && i < n + 1; i++)
> > > +				if (old[i] != file)
> > > +					new[j++] = old[i];
> > > +		}
> > > +
> > > +		rcu_assign_pointer(tu->files, new);
> > > +
> > > +		/* Make sure the probe is done with old files */
> > > +		synchronize_sched();
> > > +		kfree(old);
> > > +	} else
> > > +		tu->flags &= ~TP_FLAG_PROFILE;
> > > +
> > > 
> > >  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
> > > 
> > > -	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
> > > -	tu->flags &= ~flag;
> > > +	if (!is_trace_uprobe_enabled(tu))
> > > +		uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
> > > +
> > > + out_unlock:
> > > +	mutex_unlock(&uprobe_enable_lock);
> > >  }
> > > 
> > >  static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
> > > @@ -869,21 +1007,22 @@ static
> > >  int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
> > >  {
> > >  	struct trace_uprobe *tu = event->data;
> > > +	struct ftrace_event_file *file = data;
> > > 
> > >  	switch (type) {
> > >  	case TRACE_REG_REGISTER:
> > > -		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
> > > +		return probe_event_enable(tu, file, NULL);
> > > 
> > >  	case TRACE_REG_UNREGISTER:
> > > -		probe_event_disable(tu, TP_FLAG_TRACE);
> > > +		probe_event_disable(tu, file);
> > >  		return 0;
> > > 
> > >  #ifdef CONFIG_PERF_EVENTS
> > >  	case TRACE_REG_PERF_REGISTER:
> > > -		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
> > > +		return probe_event_enable(tu, NULL, uprobe_perf_filter);
> > > 
> > >  	case TRACE_REG_PERF_UNREGISTER:
> > > -		probe_event_disable(tu, TP_FLAG_PROFILE);
> > > +		probe_event_disable(tu, NULL);
> > >  		return 0;
> > > 
> > >  	case TRACE_REG_PERF_OPEN:
> > > 
> > 
> > 
> 
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer)
  2013-06-14 16:04   ` ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer) Oleg Nesterov
  2013-06-14 16:18     ` Oleg Nesterov
@ 2013-06-14 16:26     ` Steven Rostedt
  2013-06-14 17:02       ` Oleg Nesterov
  1 sibling, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2013-06-14 16:26 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: zhangwei(Jovi),
	Masami Hiramatsu, Frederic Weisbecker, Ingo Molnar,
	Srikar Dronamraju, linux-kernel

On Fri, 2013-06-14 at 18:04 +0200, Oleg Nesterov wrote:
> On 06/14, Oleg Nesterov wrote:
> >
> > > -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
> > > +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
> > > +		   filter_func_t filter)
> > >  {
> > > +	int enabled = 0;
> > >  	int ret = 0;
> > >
> > > +	mutex_lock(&uprobe_enable_lock);
> >
> > Do we really need this? Can't we really on mutex_event hold by the caller?
> 
> Looks like, kprobes do not need probe_enable_lock too.
> 
> Steven, Masami, I just looked at this new multibuffer code. Not sure
> I really understand it, but it seems that ftrace_event_file should
> help its users.
> 
> Lets look at enable_trace_probe(). Firstly, "ftrace_event_file **files"
> and the add/remove code doesn't look very nice, list_head looks more
> convenient.
> 
> But the main problem is, synchronize_sched() is slow and it is called
> under the global event_mutex.

But is that really an issue? event_mutex is used to add or remove
events, and this happens only when we load or unload a module, or add or
remove a probe. These are mostly user operations that take a relatively
long time to complete (but not relative to humans).


> 
> So perhaps something like below (untested) makes sense? With this patch
> we can trivially convert trace_kprobe.c to use list_add/del/each_rcu.
> 
> What do you think?
> 
> Oleg.
> 
> 
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -294,8 +294,32 @@ struct ftrace_event_file {
>  	 */
>  	unsigned long		flags;
>  	atomic_t		sm_ref;	/* soft-mode reference counter */
> +	atomic_t		refcnt;
> +	struct rcu_head		rcu;

I'd like to keep this struct as small as possible, as it is created for
every event we have. And there's already over 900 events and it is
constantly growing.

Of course one can argue that it's still a relatively small number.

>  };
>  
> +struct event_file_link {
> +	struct ftrace_event_file	*file;
> +	struct list_head		list;
> +	struct rcu_head			rcu;
> +};
> +
> +extern void rcu_free_event_file_link(struct rcu_head *rcu);
> +
> +static inline struct event_file_link *
> +alloc_event_file_link(struct ftrace_event_file *file)
> +{
> +	struct event_file_link *link = kmalloc(sizeof(*link), GFP_KERNEL);
> +	if (link)
> +		link->file = file;
> +	return link;
> +}

I don't see where this is used.

> +
> +static inline void free_event_file_link(struct event_file_link *link)
> +{
> +	call_rcu(&link->rcu, rcu_free_event_file_link);
> +}
> +
>  #define __TRACE_EVENT_FLAGS(name, value)				\
>  	static int __init trace_init_flags_##name(void)			\
>  	{								\
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -1542,6 +1542,7 @@ trace_create_new_event(struct ftrace_event_call *call,
>  	file->event_call = call;
>  	file->tr = tr;
>  	atomic_set(&file->sm_ref, 0);
> +	atomic_set(&file->refcnt, 1);
>  	list_add(&file->list, &tr->events);
>  
>  	return file;
> @@ -2182,6 +2183,17 @@ __trace_early_add_events(struct trace_array *tr)
>  	}
>  }
>  
> +static void put_event_file(struct ftrace_event_file *file)
> +{
> +	if (atomic_dec_and_test(&file->refcnt))
> +		kmem_cache_free(file_cachep, file);
> +}
> +
> +static void delayed_put_event_file(struct rcu_head *rcu)
> +{
> +	put_event_file(container_of(rcu, struct ftrace_event_file, rcu));
> +}
> +
>  /* Remove the event directory structure for a trace directory. */
>  static void
>  __trace_remove_event_dirs(struct trace_array *tr)
> @@ -2192,10 +2204,18 @@ __trace_remove_event_dirs(struct trace_array *tr)
>  		list_del(&file->list);
>  		debugfs_remove_recursive(file->dir);
>  		remove_subsystem(file->system);
> -		kmem_cache_free(file_cachep, file);
> +		call_rcu(&file->rcu, delayed_put_event_file);

This would have to be used by module unloading as well.

Again, is it really a big deal if we do synchronize_sched() under the
event_mutex. It's not a critical lock.

-- Steve

>  	}
>  }
>  
> +void rcu_free_event_file_link(struct rcu_head *rcu)
> +{
> +	struct event_file_link *link =
> +			container_of(rcu, struct event_file_link, rcu);
> +	put_event_file(link->file);
> +	kfree(link);
> +}
> +
>  static void
>  __add_event_to_tracers(struct ftrace_event_call *call,
>  		       struct ftrace_module_file_ops *file_ops)



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 16:21     ` Paul E. McKenney
@ 2013-06-14 16:33       ` Steven Rostedt
  2013-06-14 17:25         ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2013-06-14 16:33 UTC (permalink / raw)
  To: paulmck
  Cc: Masami Hiramatsu, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On Fri, 2013-06-14 at 09:21 -0700, Paul E. McKenney wrote:

> > > > @@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> > > >  /* uprobe handler */
> > > >  static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
> > > >  {
> > > > -	if (!is_ret_probe(tu))
> > > > -		uprobe_trace_print(tu, 0, regs);
> > > > +	struct ftrace_event_file **file;
> > > > +
> > > > +	if (is_ret_probe(tu))
> > > > +		return 0;
> > > > +
> > > > +	file = rcu_dereference_raw(tu->files);
> > 
> > Why are you using rcu_dereference_raw() and not rcu_dereference(). The
> > _raw() is a bit special, and unless you know what you are doing with RCU
> > here, don't use it.
> > 
> > As I see you using rcu_dereference_raw() all over this patch, along with
> > mutexes, I believe that you are not using RCU correctly here.
> 
> If irqs and preempt are disabled, I suggest using rcu_dereference_sched().
> That is what it is there for.  ;-)
> 

I believe this just copied what kprobes did, where irqs and preemption
is disabled. I don't believe that these probes have that same luxury.

But that begs the question. Should we convert the rcu_dereference_raw()
in the kprobe code to rcu_dereference_sched()?

-- Steve



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer)
  2013-06-14 16:26     ` Steven Rostedt
@ 2013-06-14 17:02       ` Oleg Nesterov
  0 siblings, 0 replies; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-14 17:02 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: zhangwei(Jovi),
	Masami Hiramatsu, Frederic Weisbecker, Ingo Molnar,
	Srikar Dronamraju, linux-kernel

On 06/14, Steven Rostedt wrote:
>
> On Fri, 2013-06-14 at 18:04 +0200, Oleg Nesterov wrote:
> >
> > But the main problem is, synchronize_sched() is slow and it is called
> > under the global event_mutex.
>
> But is that really an issue? event_mutex is used to add or remove
> events, and this happens only when we load or unload a module, or add or
> remove a probe. These are mostly user operations that take a relatively
> long time to complete (but not relative to humans).

OK, please ignore then.

Oleg.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 16:33       ` Steven Rostedt
@ 2013-06-14 17:25         ` Paul E. McKenney
  2013-06-17  2:54           ` Masami Hiramatsu
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2013-06-14 17:25 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Masami Hiramatsu, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On Fri, Jun 14, 2013 at 12:33:27PM -0400, Steven Rostedt wrote:
> On Fri, 2013-06-14 at 09:21 -0700, Paul E. McKenney wrote:
> 
> > > > > @@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> > > > >  /* uprobe handler */
> > > > >  static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
> > > > >  {
> > > > > -	if (!is_ret_probe(tu))
> > > > > -		uprobe_trace_print(tu, 0, regs);
> > > > > +	struct ftrace_event_file **file;
> > > > > +
> > > > > +	if (is_ret_probe(tu))
> > > > > +		return 0;
> > > > > +
> > > > > +	file = rcu_dereference_raw(tu->files);
> > > 
> > > Why are you using rcu_dereference_raw() and not rcu_dereference(). The
> > > _raw() is a bit special, and unless you know what you are doing with RCU
> > > here, don't use it.
> > > 
> > > As I see you using rcu_dereference_raw() all over this patch, along with
> > > mutexes, I believe that you are not using RCU correctly here.
> > 
> > If irqs and preempt are disabled, I suggest using rcu_dereference_sched().
> > That is what it is there for.  ;-)
> 
> I believe this just copied what kprobes did, where irqs and preemption
> is disabled. I don't believe that these probes have that same luxury.
> 
> But that begs the question. Should we convert the rcu_dereference_raw()
> in the kprobe code to rcu_dereference_sched()?

It makes a lot of sense to me, at least assuming no issues with the
interrupts being disabled, but the checks not spotting this.  Here
is the check:

	preempt_count() != 0 || irqs_disabled()

(With additional elaboration for if lockdep is enabled.)

							Thanx, Paul


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 17:25         ` Paul E. McKenney
@ 2013-06-17  2:54           ` Masami Hiramatsu
  2013-06-17 12:33             ` Steven Rostedt
  0 siblings, 1 reply; 19+ messages in thread
From: Masami Hiramatsu @ 2013-06-17  2:54 UTC (permalink / raw)
  To: paulmck
  Cc: Steven Rostedt, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

(2013/06/15 2:25), Paul E. McKenney wrote:
> On Fri, Jun 14, 2013 at 12:33:27PM -0400, Steven Rostedt wrote:
>> On Fri, 2013-06-14 at 09:21 -0700, Paul E. McKenney wrote:
>>
>>>>>> @@ -548,15 +556,35 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
>>>>>>  /* uprobe handler */
>>>>>>  static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
>>>>>>  {
>>>>>> -	if (!is_ret_probe(tu))
>>>>>> -		uprobe_trace_print(tu, 0, regs);
>>>>>> +	struct ftrace_event_file **file;
>>>>>> +
>>>>>> +	if (is_ret_probe(tu))
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	file = rcu_dereference_raw(tu->files);
>>>>
>>>> Why are you using rcu_dereference_raw() and not rcu_dereference(). The
>>>> _raw() is a bit special, and unless you know what you are doing with RCU
>>>> here, don't use it.
>>>>
>>>> As I see you using rcu_dereference_raw() all over this patch, along with
>>>> mutexes, I believe that you are not using RCU correctly here.
>>>
>>> If irqs and preempt are disabled, I suggest using rcu_dereference_sched().
>>> That is what it is there for.  ;-)
>>
>> I believe this just copied what kprobes did, where irqs and preemption
>> is disabled. I don't believe that these probes have that same luxury.
>>
>> But that begs the question. Should we convert the rcu_dereference_raw()
>> in the kprobe code to rcu_dereference_sched()?
> 
> It makes a lot of sense to me, at least assuming no issues with the
> interrupts being disabled, but the checks not spotting this.  Here
> is the check:
> 
> 	preempt_count() != 0 || irqs_disabled()
> 
> (With additional elaboration for if lockdep is enabled.)

OK, I see. So I'll convert all the rcu_dereference_raw() to
rcu_dereference_sched() except kprobe handler, because in the
kprobe handler above check always be true. :)

Thank you,


-- 
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-17  2:54           ` Masami Hiramatsu
@ 2013-06-17 12:33             ` Steven Rostedt
  2013-06-18  1:31               ` Masami Hiramatsu
  0 siblings, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2013-06-17 12:33 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: paulmck, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On Mon, 2013-06-17 at 11:54 +0900, Masami Hiramatsu wrote:

> > It makes a lot of sense to me, at least assuming no issues with the
> > interrupts being disabled, but the checks not spotting this.  Here
> > is the check:
> > 
> > 	preempt_count() != 0 || irqs_disabled()
> > 
> > (With additional elaboration for if lockdep is enabled.)
> 
> OK, I see. So I'll convert all the rcu_dereference_raw() to
> rcu_dereference_sched() except kprobe handler, because in the
> kprobe handler above check always be true. :)

I would convert them all to rcu_dereference_sched(), the above check is
only when CONFIG_DEBUG_LOCK_ALLOC is set. It also annotates what is
protecting this variable. Please do not avoid a check because "it's
always true here". You also get people copying that code (like for
uprobes) and that will skip the check too.

The only reason ftrace function tracer uses the raw (and now
raw_notrace) version is because it is extremely invasive, and these
checks done at *every* function call can actually live lock the system.
But other places in tracing use the appropriate rcu_dereference_*()
functions. If they do not, then they need to be fixed too.

-- Steve



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-17 12:33             ` Steven Rostedt
@ 2013-06-18  1:31               ` Masami Hiramatsu
  2013-06-18  2:02                 ` Steven Rostedt
  0 siblings, 1 reply; 19+ messages in thread
From: Masami Hiramatsu @ 2013-06-18  1:31 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: paulmck, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

(2013/06/17 21:33), Steven Rostedt wrote:
> On Mon, 2013-06-17 at 11:54 +0900, Masami Hiramatsu wrote:
> 
>>> It makes a lot of sense to me, at least assuming no issues with the
>>> interrupts being disabled, but the checks not spotting this.  Here
>>> is the check:
>>>
>>> 	preempt_count() != 0 || irqs_disabled()
>>>
>>> (With additional elaboration for if lockdep is enabled.)
>>
>> OK, I see. So I'll convert all the rcu_dereference_raw() to
>> rcu_dereference_sched() except kprobe handler, because in the
>> kprobe handler above check always be true. :)
> 
> I would convert them all to rcu_dereference_sched(), the above check is
> only when CONFIG_DEBUG_LOCK_ALLOC is set. It also annotates what is
> protecting this variable. Please do not avoid a check because "it's
> always true here". You also get people copying that code (like for
> uprobes) and that will skip the check too.
> 
> The only reason ftrace function tracer uses the raw (and now
> raw_notrace) version is because it is extremely invasive, and these
> checks done at *every* function call can actually live lock the system.
> But other places in tracing use the appropriate rcu_dereference_*()
> functions. If they do not, then they need to be fixed too.

Oh, I see.
Anyway, it will be completely replaced by the Oleg's patch.
Or should I fix that before his work?

Thank you,

-- 
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-18  1:31               ` Masami Hiramatsu
@ 2013-06-18  2:02                 ` Steven Rostedt
  0 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2013-06-18  2:02 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: paulmck, zhangwei(Jovi),
	Frederic Weisbecker, Ingo Molnar, Oleg Nesterov,
	Srikar Dronamraju, linux-kernel, yrl.pp-manager.tt

On Tue, 2013-06-18 at 10:31 +0900, Masami Hiramatsu wrote:
> (2013/06/17 21:33), Steven Rostedt wrote:

> > The only reason ftrace function tracer uses the raw (and now
> > raw_notrace) version is because it is extremely invasive, and these
> > checks done at *every* function call can actually live lock the system.
> > But other places in tracing use the appropriate rcu_dereference_*()
> > functions. If they do not, then they need to be fixed too.
> 
> Oh, I see.
> Anyway, it will be completely replaced by the Oleg's patch.
> Or should I fix that before his work?

It's not that urgent. If Oleg's patch will replace it, I wouldn't
bother.

-- Steve



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-14 14:44 ` Oleg Nesterov
  2013-06-14 16:04   ` ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer) Oleg Nesterov
@ 2013-06-20 16:43   ` Oleg Nesterov
  2013-06-21  8:17     ` zhangwei(Jovi)
  1 sibling, 1 reply; 19+ messages in thread
From: Oleg Nesterov @ 2013-06-20 16:43 UTC (permalink / raw)
  To: zhangwei(Jovi)
  Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
	Masami Hiramatsu, Srikar Dronamraju, linux-kernel

Hi,

Now that we finished the discussion of the similar code in kprobes,
let me summarize.

On 06/14, Oleg Nesterov wrote:
>
> On 06/14, zhangwei(Jovi) wrote:
> >
> > +		rcu_assign_pointer(tu->files, new);
> > +		tu->flags |= TP_FLAG_TRACE;
> > +
> > +		if (old) {
> > +			/* Make sure the probe is done with old files */
> > +			synchronize_sched();
> > +			kfree(old);
> > +		}
> > +	} else
> > +		tu->flags |= TP_FLAG_PROFILE;
>
> So it can set both TP_FLAG_TRACE and TP_FLAG_PROFILE, yes?
>
> If yes, this is not right. Until we change the pre-filtering at least.
> Currently TP_FLAG_TRACE/TP_FLAG_PROFILE are mutually exclusive.

Yes. So please update this patch so that TP_FLAG_PROFILE can't be
set if TP_FLAG_TRACE is set and vice versa.

Once again, this limitation is artificial. But it was always here,
and we need more changes to remove it. I'll try to do this later.
(but if you want to play with this code - welcome ;)

Don't add the mutex, and do not use array-of-pointers (I hope you
noticed the recent discussion).

Locking. Oh, OK. Let it be rcu for now (but please do not forget
that you need rcu_read_lock, uprobe handlers run in the sleepable
context). This is sub-optimal, but this is just another indication
that uprobes API is not perfect, we can't use uprobe->register_sem.

Also. When I was reading trace_kprobes.c I notice the new (and imho
useful) feature, soft disable/enable. Perhaps you can make a 2nd
patch which adds the FTRACE_EVENT_FL_SOFT_DISABLED_BIT check?

Oleg.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer
  2013-06-20 16:43   ` [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer Oleg Nesterov
@ 2013-06-21  8:17     ` zhangwei(Jovi)
  0 siblings, 0 replies; 19+ messages in thread
From: zhangwei(Jovi) @ 2013-06-21  8:17 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
	Masami Hiramatsu, Srikar Dronamraju, linux-kernel

On 2013/6/21 0:43, Oleg Nesterov wrote:
> Hi,
> 
> Now that we finished the discussion of the similar code in kprobes,
> let me summarize.
> 
> On 06/14, Oleg Nesterov wrote:
>>
>> On 06/14, zhangwei(Jovi) wrote:
>>>
>>> +		rcu_assign_pointer(tu->files, new);
>>> +		tu->flags |= TP_FLAG_TRACE;
>>> +
>>> +		if (old) {
>>> +			/* Make sure the probe is done with old files */
>>> +			synchronize_sched();
>>> +			kfree(old);
>>> +		}
>>> +	} else
>>> +		tu->flags |= TP_FLAG_PROFILE;
>>
>> So it can set both TP_FLAG_TRACE and TP_FLAG_PROFILE, yes?
>>
>> If yes, this is not right. Until we change the pre-filtering at least.
>> Currently TP_FLAG_TRACE/TP_FLAG_PROFILE are mutually exclusive.
> 
> Yes. So please update this patch so that TP_FLAG_PROFILE can't be
> set if TP_FLAG_TRACE is set and vice versa.
> 
> Once again, this limitation is artificial. But it was always here,
> and we need more changes to remove it. I'll try to do this later.
> (but if you want to play with this code - welcome ;)
> 
> Don't add the mutex, and do not use array-of-pointers (I hope you
> noticed the recent discussion).
> 
> Locking. Oh, OK. Let it be rcu for now (but please do not forget
> that you need rcu_read_lock, uprobe handlers run in the sleepable
> context). This is sub-optimal, but this is just another indication
> that uprobes API is not perfect, we can't use uprobe->register_sem.
> 
> Also. When I was reading trace_kprobes.c I notice the new (and imho
> useful) feature, soft disable/enable. Perhaps you can make a 2nd
> patch which adds the FTRACE_EVENT_FL_SOFT_DISABLED_BIT check?
> 
> Oleg.
> 
Thanks Oleg, I will send v2 patch soon.

.jovi





^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2013-06-21  8:18 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-14  1:44 [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer zhangwei(Jovi)
2013-06-14 13:21 ` Masami Hiramatsu
2013-06-14 13:51   ` Oleg Nesterov
2013-06-14 14:09     ` Oleg Nesterov
2013-06-14 15:31   ` Steven Rostedt
2013-06-14 16:21     ` Paul E. McKenney
2013-06-14 16:33       ` Steven Rostedt
2013-06-14 17:25         ` Paul E. McKenney
2013-06-17  2:54           ` Masami Hiramatsu
2013-06-17 12:33             ` Steven Rostedt
2013-06-18  1:31               ` Masami Hiramatsu
2013-06-18  2:02                 ` Steven Rostedt
2013-06-14 14:44 ` Oleg Nesterov
2013-06-14 16:04   ` ftrace multibuffer && rcu (Was: tracing/uprobes: Support ftrace_event_file base multibuffer) Oleg Nesterov
2013-06-14 16:18     ` Oleg Nesterov
2013-06-14 16:26     ` Steven Rostedt
2013-06-14 17:02       ` Oleg Nesterov
2013-06-20 16:43   ` [PATCH] tracing/uprobes: Support ftrace_event_file base multibuffer Oleg Nesterov
2013-06-21  8:17     ` zhangwei(Jovi)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).