All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
@ 2016-02-03 20:28 Martin KaFai Lau
  2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
  2016-02-09 12:20 ` [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics tip-bot for Martin KaFai Lau
  0 siblings, 2 replies; 5+ messages in thread
From: Martin KaFai Lau @ 2016-02-03 20:28 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Masami Hiramatsu, Steven Rostedt,
	Alexei Starovoitov, Josef Bacik, Kernel Team

When doing ebpf+kprobe on some hot TCP functions (e.g.
tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.

In kprobe_dispatcher(), there is a lot of cache bouncing
in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
the same cacheline.

perf report (cycles:pp):
8.30%  ipv4_dst_check
4.74%  copy_user_enhanced_fast_string
3.93%  dst_release
2.80%  tcp_v4_rcv
2.31%  queued_spin_lock_slowpath
2.30%  _raw_spin_lock
1.88%  mlx4_en_process_rx_cq
1.84%  eth_get_headlen
1.81%  ip_rcv_finish
~~~~
1.71%  kprobe_dispatcher
~~~~
1.55%  mlx4_en_xmit
1.09%  __probe_kernel_read

perf report after patch:
9.15%  ipv4_dst_check
5.00%  copy_user_enhanced_fast_string
4.12%  dst_release
2.96%  tcp_v4_rcv
2.50%  _raw_spin_lock
2.39%  queued_spin_lock_slowpath
2.11%  eth_get_headlen
2.03%  mlx4_en_process_rx_cq
1.69%  mlx4_en_xmit
1.19%  ip_rcv_finish
1.12%  __probe_kernel_read
1.02%  ehci_hcd_cleanup

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
---
 kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c995644..21b81a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,7 +30,7 @@
 struct trace_kprobe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long 		nhit;
+	unsigned long __percpu *nhit;
 	const char		*symbol;	/* symbol name */
 	struct trace_probe	tp;
 };
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 	if (!tk)
 		return ERR_PTR(ret);
 
+	tk->nhit = alloc_percpu(unsigned long);
+	if (!tk->nhit)
+		goto error;
+
 	if (symbol) {
 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
 		if (!tk->symbol)
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 error:
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 	return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
 	kfree(tk->tp.call.class->system);
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 }
 
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct trace_kprobe *tk = v;
+	unsigned long nhit = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		nhit += *per_cpu_ptr(tk->nhit, cpu);
 
 	seq_printf(m, "  %-44s %15lu %15lu\n",
-		   trace_event_name(&tk->tp.call), tk->nhit,
+		   trace_event_name(&tk->tp.call), nhit,
 		   tk->rp.kp.nmissed);
 
 	return 0;
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(tk, ri, regs);
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
  2016-02-03 20:28 [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics Martin KaFai Lau
@ 2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
  2016-02-04 13:19   ` Steven Rostedt
  2016-02-09 12:20 ` [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics tip-bot for Martin KaFai Lau
  1 sibling, 1 reply; 5+ messages in thread
From: 平松雅巳 / HIRAMATU,MASAMI @ 2016-02-04 10:24 UTC (permalink / raw)
  To: 'Martin KaFai Lau', linux-kernel, Ingo Molnar
  Cc: Steven Rostedt, Alexei Starovoitov, Josef Bacik, Kernel Team

From: Martin KaFai Lau [mailto:kafai@fb.com]
>
>When doing ebpf+kprobe on some hot TCP functions (e.g.
>tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
>
>In kprobe_dispatcher(), there is a lot of cache bouncing
>in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
>the same cacheline.
>

Ingo, could you consider to merge this?

Thanks,


>perf report (cycles:pp):
>8.30%  ipv4_dst_check
>4.74%  copy_user_enhanced_fast_string
>3.93%  dst_release
>2.80%  tcp_v4_rcv
>2.31%  queued_spin_lock_slowpath
>2.30%  _raw_spin_lock
>1.88%  mlx4_en_process_rx_cq
>1.84%  eth_get_headlen
>1.81%  ip_rcv_finish
>~~~~
>1.71%  kprobe_dispatcher
>~~~~
>1.55%  mlx4_en_xmit
>1.09%  __probe_kernel_read
>
>perf report after patch:
>9.15%  ipv4_dst_check
>5.00%  copy_user_enhanced_fast_string
>4.12%  dst_release
>2.96%  tcp_v4_rcv
>2.50%  _raw_spin_lock
>2.39%  queued_spin_lock_slowpath
>2.11%  eth_get_headlen
>2.03%  mlx4_en_process_rx_cq
>1.69%  mlx4_en_xmit
>1.19%  ip_rcv_finish
>1.12%  __probe_kernel_read
>1.02%  ehci_hcd_cleanup
>
>Signed-off-by: Martin KaFai Lau <kafai@fb.com>
>Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
>---
> kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
>diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
>index c995644..21b81a4 100644
>--- a/kernel/trace/trace_kprobe.c
>+++ b/kernel/trace/trace_kprobe.c
>@@ -30,7 +30,7 @@
> struct trace_kprobe {
> 	struct list_head	list;
> 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
>-	unsigned long 		nhit;
>+	unsigned long __percpu *nhit;
> 	const char		*symbol;	/* symbol name */
> 	struct trace_probe	tp;
> };
>@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
> 	if (!tk)
> 		return ERR_PTR(ret);
>
>+	tk->nhit = alloc_percpu(unsigned long);
>+	if (!tk->nhit)
>+		goto error;
>+
> 	if (symbol) {
> 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
> 		if (!tk->symbol)
>@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
> error:
> 	kfree(tk->tp.call.name);
> 	kfree(tk->symbol);
>+	free_percpu(tk->nhit);
> 	kfree(tk);
> 	return ERR_PTR(ret);
> }
>@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
> 	kfree(tk->tp.call.class->system);
> 	kfree(tk->tp.call.name);
> 	kfree(tk->symbol);
>+	free_percpu(tk->nhit);
> 	kfree(tk);
> }
>
>@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
> static int probes_profile_seq_show(struct seq_file *m, void *v)
> {
> 	struct trace_kprobe *tk = v;
>+	unsigned long nhit = 0;
>+	int cpu;
>+
>+	for_each_possible_cpu(cpu)
>+		nhit += *per_cpu_ptr(tk->nhit, cpu);
>
> 	seq_printf(m, "  %-44s %15lu %15lu\n",
>-		   trace_event_name(&tk->tp.call), tk->nhit,
>+		   trace_event_name(&tk->tp.call), nhit,
> 		   tk->rp.kp.nmissed);
>
> 	return 0;
>@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
> {
> 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
>
>-	tk->nhit++;
>+	raw_cpu_inc(*tk->nhit);
>
> 	if (tk->tp.flags & TP_FLAG_TRACE)
> 		kprobe_trace_func(tk, regs);
>@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
> {
> 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
>
>-	tk->nhit++;
>+	raw_cpu_inc(*tk->nhit);
>
> 	if (tk->tp.flags & TP_FLAG_TRACE)
> 		kretprobe_trace_func(tk, ri, regs);
>--
>2.5.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
  2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
@ 2016-02-04 13:19   ` Steven Rostedt
  2016-02-09 10:09     ` Ingo Molnar
  0 siblings, 1 reply; 5+ messages in thread
From: Steven Rostedt @ 2016-02-04 13:19 UTC (permalink / raw)
  To: 平松雅巳 / HIRAMATU,MASAMI
  Cc: 'Martin KaFai Lau',
	linux-kernel, Ingo Molnar, Alexei Starovoitov, Josef Bacik,
	Kernel Team

On Thu, 4 Feb 2016 10:24:12 +0000
平松雅巳 / HIRAMATU,MASAMI <masami.hiramatsu.pt@hitachi.com> wrote:

> From: Martin KaFai Lau [mailto:kafai@fb.com]
> >
> >When doing ebpf+kprobe on some hot TCP functions (e.g.
> >tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
> >
> >In kprobe_dispatcher(), there is a lot of cache bouncing
> >in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
> >the same cacheline.
> >  
> 
> Ingo, could you consider to merge this?
> 

Ingo, you can also add my

 Acked-by: Steven Rostedt <rostedt@goodmis.org>

-- Steve

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
  2016-02-04 13:19   ` Steven Rostedt
@ 2016-02-09 10:09     ` Ingo Molnar
  0 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2016-02-09 10:09 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: 平松雅巳 / HIRAMATU,MASAMI,
	'Martin KaFai Lau',
	linux-kernel, Ingo Molnar, Alexei Starovoitov, Josef Bacik,
	Kernel Team


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 4 Feb 2016 10:24:12 +0000
> 平松雅巳 / HIRAMATU,MASAMI <masami.hiramatsu.pt@hitachi.com> wrote:
> 
> > From: Martin KaFai Lau [mailto:kafai@fb.com]
> > >
> > >When doing ebpf+kprobe on some hot TCP functions (e.g.
> > >tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
> > >
> > >In kprobe_dispatcher(), there is a lot of cache bouncing
> > >in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
> > >the same cacheline.
> > >  
> > 
> > Ingo, could you consider to merge this?
> > 
> 
> Ingo, you can also add my
> 
>  Acked-by: Steven Rostedt <rostedt@goodmis.org>

Yeah, nice optimization, picked it up - thanks guys!

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics
  2016-02-03 20:28 [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics Martin KaFai Lau
  2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
@ 2016-02-09 12:20 ` tip-bot for Martin KaFai Lau
  1 sibling, 0 replies; 5+ messages in thread
From: tip-bot for Martin KaFai Lau @ 2016-02-09 12:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, tglx, jbacik, hpa, kafai, kernel-team,
	masami.hiramatsu.pt, torvalds, rostedt, mingo, linux-kernel,
	alexei.starovoitov

Commit-ID:  a7636d9ecfa3ab7800a7c04c1f89378229eff609
Gitweb:     http://git.kernel.org/tip/a7636d9ecfa3ab7800a7c04c1f89378229eff609
Author:     Martin KaFai Lau <kafai@fb.com>
AuthorDate: Wed, 3 Feb 2016 12:28:28 -0800
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 9 Feb 2016 11:08:58 +0100

kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics

When doing ebpf+kprobe on some hot TCP functions (e.g.
tcp_rcv_established), the kprobe_dispatcher() function
shows up in 'perf report'.

In kprobe_dispatcher(), there is a lot of cache bouncing
on 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
the same cacheline.

perf report (cycles:pp):

	8.30%  ipv4_dst_check
	4.74%  copy_user_enhanced_fast_string
	3.93%  dst_release
	2.80%  tcp_v4_rcv
	2.31%  queued_spin_lock_slowpath
	2.30%  _raw_spin_lock
	1.88%  mlx4_en_process_rx_cq
	1.84%  eth_get_headlen
	1.81%  ip_rcv_finish
	~~~~
	1.71%  kprobe_dispatcher
	~~~~
	1.55%  mlx4_en_xmit
	1.09%  __probe_kernel_read

perf report after patch:

	9.15%  ipv4_dst_check
	5.00%  copy_user_enhanced_fast_string
	4.12%  dst_release
	2.96%  tcp_v4_rcv
	2.50%  _raw_spin_lock
	2.39%  queued_spin_lock_slowpath
	2.11%  eth_get_headlen
	2.03%  mlx4_en_process_rx_cq
	1.69%  mlx4_en_xmit
	1.19%  ip_rcv_finish
	1.12%  __probe_kernel_read
	1.02%  ehci_hcd_cleanup

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Kernel Team <kernel-team@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1454531308-2441898-1-git-send-email-kafai@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c995644..21b81a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,7 +30,7 @@
 struct trace_kprobe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long 		nhit;
+	unsigned long __percpu *nhit;
 	const char		*symbol;	/* symbol name */
 	struct trace_probe	tp;
 };
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 	if (!tk)
 		return ERR_PTR(ret);
 
+	tk->nhit = alloc_percpu(unsigned long);
+	if (!tk->nhit)
+		goto error;
+
 	if (symbol) {
 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
 		if (!tk->symbol)
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 error:
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 	return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
 	kfree(tk->tp.call.class->system);
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 }
 
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct trace_kprobe *tk = v;
+	unsigned long nhit = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		nhit += *per_cpu_ptr(tk->nhit, cpu);
 
 	seq_printf(m, "  %-44s %15lu %15lu\n",
-		   trace_event_name(&tk->tp.call), tk->nhit,
+		   trace_event_name(&tk->tp.call), nhit,
 		   tk->rp.kp.nmissed);
 
 	return 0;
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(tk, ri, regs);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-02-09 12:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-03 20:28 [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics Martin KaFai Lau
2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
2016-02-04 13:19   ` Steven Rostedt
2016-02-09 10:09     ` Ingo Molnar
2016-02-09 12:20 ` [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics tip-bot for Martin KaFai Lau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.