linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] perf uprobe: Skip prologue if program compiled without optimization
@ 2016-07-28 14:31 Ravi Bangoria
  2016-07-30  3:04 ` Masami Hiramatsu
  0 siblings, 1 reply; 3+ messages in thread
From: Ravi Bangoria @ 2016-07-28 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, mingo, acme, alexander.shishkin, mhiramat, wangnan0,
	hemant, Ravi Bangoria

Function prologue prepares stack and registers before executing function
logic. When target program is compiled without optimization, function
parameter information is only valid after prologue. When we probe entrypc
of the function, and try to record function parameter, it contains
garbage value.

For example,
  $ vim test.c
    #include <stdio.h>

    void foo(int i)
    {
       printf("i: %d\n", i);
    }

    int main()
    {
      foo(42);
      return 0;
    }

  $ gcc -g test.c -o test
  $ objdump -dl test | less
    foo():
    /home/ravi/test.c:4
      400536:       55                      push   %rbp
      400537:       48 89 e5                mov    %rsp,%rbp
      40053a:       48 83 ec 10             sub    -bashx10,%rsp
      40053e:       89 7d fc                mov    %edi,-0x4(%rbp)
    /home/ravi/test.c:5
      400541:       8b 45 fc                mov    -0x4(%rbp),%eax
    ...
    ...
    main():
    /home/ravi/test.c:9
      400558:       55                      push   %rbp
      400559:       48 89 e5                mov    %rsp,%rbp
    /home/ravi/test.c:10
      40055c:       bf 2a 00 00 00          mov    -bashx2a,%edi
      400561:       e8 d0 ff ff ff          callq  400536 <foo>
    /home/ravi/test.c:11

  $ ./perf probe -x ./test 'foo i'
  $ cat /sys/kernel/debug/tracing/uprobe_events
     p:probe_test/foo /home/ravi/test:0x0000000000000536 i=-12(%sp):s32

  $ ./perf record -e probe_test:foo ./test
  $ ./perf script
     test  5778 [001]  4918.562027: probe_test:foo: (400536) i=0

Here variable 'i' is passed via stack which is pushed on stack at
0x40053e. But we are probing at 0x400536.

To resolve this issues, we need to probe on next instruction after
prologue. gdb and systemtap also does same thing. I've implemented
this patch based on approach systemtap has used.

After applying patch:

  $ ./perf probe -x ./test 'foo i'
  $ cat /sys/kernel/debug/tracing/uprobe_events
    p:probe_test/foo /home/ravi/test:0x0000000000000541 i=-4(%bp):s32

  $ ./perf record -e probe_test:foo ./test
  $ ./perf script
    test  6300 [001]  5877.879327: probe_test:foo: (400541) i=42

No need to skip prologue for optimized case since debug info is correct
for each instructions for -O2 -g. For more details please visit:
        https://bugzilla.redhat.com/show_bug.cgi?id=612253#c6

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
---
 tools/perf/util/probe-finder.c | 156 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f2d9ff0..a788b9c2 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -892,6 +892,161 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
 }
 
+static bool var_has_loclist(Dwarf_Die *die)
+{
+	Dwarf_Attribute loc;
+	int tag = dwarf_tag(die);
+
+	if (tag != DW_TAG_formal_parameter &&
+	    tag != DW_TAG_variable)
+		return false;
+
+	return (dwarf_attr_integrate(die, DW_AT_location, &loc) &&
+		dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization.
+ */
+static bool optimized_target(Dwarf_Die *die)
+{
+	Dwarf_Die tmp_die;
+
+	if (var_has_loclist(die))
+		return true;
+
+	if (!dwarf_child(die, &tmp_die) && optimized_target(&tmp_die))
+		return true;
+
+	if (!dwarf_siblingof(die, &tmp_die) && optimized_target(&tmp_die))
+		return true;
+
+	return false;
+}
+
+static bool get_entrypc_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+			    Dwarf_Addr pf_addr, unsigned long *entrypc_idx)
+{
+	unsigned long i;
+	Dwarf_Addr addr;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &addr))
+			return false;
+
+		if (addr == pf_addr) {
+			*entrypc_idx = i;
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool get_postprologue_addr(unsigned long entrypc_idx,
+				  Dwarf_Lines *lines,
+				  unsigned long nr_lines,
+				  Dwarf_Addr highpc,
+				  Dwarf_Addr *postprologue_addr)
+{
+	unsigned long i;
+	int entrypc_lno, lno;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	bool p_end;
+
+	/* entrypc_lno is actual source line number */
+	line = dwarf_onesrcline(lines, entrypc_idx);
+	if (dwarf_lineno(line, &entrypc_lno))
+		return false;
+
+	for (i = entrypc_idx; i < nr_lines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		if (dwarf_lineaddr(line, &addr) ||
+		    dwarf_lineno(line, &lno)    ||
+		    dwarf_lineprologueend(line, &p_end))
+			return false;
+
+		/* highpc is exclusive. [entrypc,highpc) */
+		if (addr >= highpc)
+			break;
+
+		/* clang supports prologue-end marker */
+		if (p_end)
+			break;
+
+		/* Actual next line in source */
+		if (lno != entrypc_lno)
+			break;
+
+		/*
+		 * Single source line can have multiple line records.
+		 * For Example,
+		 *     void foo() { printf("hello\n"); }
+		 * contains two line records. One points to declaration and
+		 * other points to printf() line. Variable 'lno' won't get
+		 * incremented in this case but 'i' will.
+		 */
+		if (i != entrypc_idx)
+			break;
+	}
+
+	dwarf_lineaddr(line, postprologue_addr);
+	if (*postprologue_addr >= highpc)
+		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+			       postprologue_addr);
+
+	return true;
+}
+
+static void __skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	unsigned long nr_lines = 0, entrypc_idx = 0;
+	Dwarf_Lines *lines = NULL;
+	Dwarf_Addr postprologue_addr;
+	Dwarf_Addr highpc;
+
+	if (dwarf_highpc(sp_die, &highpc))
+		return;
+
+	if (dwarf_getsrclines(&pf->cu_die, &lines, &nr_lines))
+		return;
+
+	if (!get_entrypc_idx(lines, nr_lines, pf->addr, &entrypc_idx))
+		return;
+
+	if (!get_postprologue_addr(entrypc_idx, lines, nr_lines,
+				   highpc, &postprologue_addr))
+		return;
+
+	pf->addr = postprologue_addr;
+}
+
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Not uprobe? */
+	if (!pf->pev->uprobes)
+		return;
+
+	/* Compiled with optimization? */
+	if (optimized_target(&pf->cu_die))
+		return;
+
+	/* Don't know entrypc? */
+	if (!pf->addr)
+		return;
+
+	/* Only FUNC and FUNC@SRC are eligible. */
+	if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+	    pp->offset || pp->abs_address)
+		return;
+
+	__skip_prologue(sp_die, pf);
+}
+
 static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
 {
 	struct probe_finder *pf = data;
@@ -954,6 +1109,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 		if (pp->lazy_line)
 			param->retval = find_probe_point_lazy(sp_die, pf);
 		else {
+			skip_prologue(sp_die, pf);
 			pf->addr += pp->offset;
 			/* TODO: Check the address in this function */
 			param->retval = call_probe_finder(sp_die, pf);
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [RFC] perf uprobe: Skip prologue if program compiled without optimization
  2016-07-28 14:31 [RFC] perf uprobe: Skip prologue if program compiled without optimization Ravi Bangoria
@ 2016-07-30  3:04 ` Masami Hiramatsu
  2016-08-01  9:01   ` Ravi Bangoria
  0 siblings, 1 reply; 3+ messages in thread
From: Masami Hiramatsu @ 2016-07-30  3:04 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: linux-kernel, peterz, mingo, acme, alexander.shishkin, mhiramat,
	wangnan0, hemant

On Thu, 28 Jul 2016 20:01:51 +0530
Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> wrote:

> Function prologue prepares stack and registers before executing function
> logic. When target program is compiled without optimization, function
> parameter information is only valid after prologue. When we probe entrypc
> of the function, and try to record function parameter, it contains
> garbage value.

Right! :)

> For example,
>   $ vim test.c
>     #include <stdio.h>
> 
>     void foo(int i)
>     {
>        printf("i: %d\n", i);
>     }
> 
>     int main()
>     {
>       foo(42);
>       return 0;
>     }
> 
>   $ gcc -g test.c -o test
>   $ objdump -dl test | less
>     foo():
>     /home/ravi/test.c:4
>       400536:       55                      push   %rbp
>       400537:       48 89 e5                mov    %rsp,%rbp
>       40053a:       48 83 ec 10             sub    -bashx10,%rsp
>       40053e:       89 7d fc                mov    %edi,-0x4(%rbp)
>     /home/ravi/test.c:5
>       400541:       8b 45 fc                mov    -0x4(%rbp),%eax
>     ...
>     ...
>     main():
>     /home/ravi/test.c:9
>       400558:       55                      push   %rbp
>       400559:       48 89 e5                mov    %rsp,%rbp
>     /home/ravi/test.c:10
>       40055c:       bf 2a 00 00 00          mov    -bashx2a,%edi
>       400561:       e8 d0 ff ff ff          callq  400536 <foo>
>     /home/ravi/test.c:11
> 
>   $ ./perf probe -x ./test 'foo i'
>   $ cat /sys/kernel/debug/tracing/uprobe_events
>      p:probe_test/foo /home/ravi/test:0x0000000000000536 i=-12(%sp):s32
> 
>   $ ./perf record -e probe_test:foo ./test
>   $ ./perf script
>      test  5778 [001]  4918.562027: probe_test:foo: (400536) i=0
> 
> Here variable 'i' is passed via stack which is pushed on stack at
> 0x40053e. But we are probing at 0x400536.
> 
> To resolve this issues, we need to probe on next instruction after
> prologue. gdb and systemtap also does same thing. I've implemented
> this patch based on approach systemtap has used.
> 
> After applying patch:
> 
>   $ ./perf probe -x ./test 'foo i'
>   $ cat /sys/kernel/debug/tracing/uprobe_events
>     p:probe_test/foo /home/ravi/test:0x0000000000000541 i=-4(%bp):s32
> 
>   $ ./perf record -e probe_test:foo ./test
>   $ ./perf script
>     test  6300 [001]  5877.879327: probe_test:foo: (400541) i=42

It is great!
And I think we also should give a notice message for users about
skipping prologue, so that they can understand why the probe point
is not on the function entry address ;)


> No need to skip prologue for optimized case since debug info is correct
> for each instructions for -O2 -g. For more details please visit:
>         https://bugzilla.redhat.com/show_bug.cgi?id=612253#c6
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
> ---
>  tools/perf/util/probe-finder.c | 156 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 156 insertions(+)
> 
> diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
> index f2d9ff0..a788b9c2 100644
> --- a/tools/perf/util/probe-finder.c
> +++ b/tools/perf/util/probe-finder.c
> @@ -892,6 +892,161 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
>  	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
>  }
>  
> +static bool var_has_loclist(Dwarf_Die *die)
> +{
> +	Dwarf_Attribute loc;
> +	int tag = dwarf_tag(die);
> +
> +	if (tag != DW_TAG_formal_parameter &&
> +	    tag != DW_TAG_variable)
> +		return false;
> +
> +	return (dwarf_attr_integrate(die, DW_AT_location, &loc) &&
> +		dwarf_whatform(&loc) == DW_FORM_sec_offset);
> +}
> +
> +/*
> + * For any object in given CU whose DW_AT_location is a location list,
> + * target program is compiled with optimization.
> + */

OK, anyway if it has loclist, we can ensure the variable is available at that address.

> +static bool optimized_target(Dwarf_Die *die)
> +{
> +	Dwarf_Die tmp_die;
> +
> +	if (var_has_loclist(die))
> +		return true;
> +
> +	if (!dwarf_child(die, &tmp_die) && optimized_target(&tmp_die))
> +		return true;
> +
> +	if (!dwarf_siblingof(die, &tmp_die) && optimized_target(&tmp_die))
> +		return true;
> +
> +	return false;
> +}
> +
> +static bool get_entrypc_idx(Dwarf_Lines *lines, unsigned long nr_lines,
> +			    Dwarf_Addr pf_addr, unsigned long *entrypc_idx)
> +{
> +	unsigned long i;
> +	Dwarf_Addr addr;
> +
> +	for (i = 0; i < nr_lines; i++) {
> +		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &addr))
> +			return false;
> +
> +		if (addr == pf_addr) {
> +			*entrypc_idx = i;
> +			return true;
> +		}
> +	}
> +	return false;
> +}
> +
> +static bool get_postprologue_addr(unsigned long entrypc_idx,
> +				  Dwarf_Lines *lines,
> +				  unsigned long nr_lines,
> +				  Dwarf_Addr highpc,
> +				  Dwarf_Addr *postprologue_addr)
> +{
> +	unsigned long i;
> +	int entrypc_lno, lno;
> +	Dwarf_Line *line;
> +	Dwarf_Addr addr;
> +	bool p_end;
> +
> +	/* entrypc_lno is actual source line number */
> +	line = dwarf_onesrcline(lines, entrypc_idx);
> +	if (dwarf_lineno(line, &entrypc_lno))
> +		return false;
> +
> +	for (i = entrypc_idx; i < nr_lines; i++) {
> +		line = dwarf_onesrcline(lines, i);
> +
> +		if (dwarf_lineaddr(line, &addr) ||
> +		    dwarf_lineno(line, &lno)    ||
> +		    dwarf_lineprologueend(line, &p_end))
> +			return false;
> +
> +		/* highpc is exclusive. [entrypc,highpc) */
> +		if (addr >= highpc)
> +			break;
> +
> +		/* clang supports prologue-end marker */
> +		if (p_end)
> +			break;
> +
> +		/* Actual next line in source */
> +		if (lno != entrypc_lno)
> +			break;
> +
> +		/*
> +		 * Single source line can have multiple line records.
> +		 * For Example,
> +		 *     void foo() { printf("hello\n"); }
> +		 * contains two line records. One points to declaration and
> +		 * other points to printf() line. Variable 'lno' won't get
> +		 * incremented in this case but 'i' will.
> +		 */
> +		if (i != entrypc_idx)
> +			break;
> +	}
> +
> +	dwarf_lineaddr(line, postprologue_addr);
> +	if (*postprologue_addr >= highpc)
> +		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
> +			       postprologue_addr);
> +
> +	return true;
> +}
> +
> +static void __skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
> +{
> +	unsigned long nr_lines = 0, entrypc_idx = 0;
> +	Dwarf_Lines *lines = NULL;
> +	Dwarf_Addr postprologue_addr;
> +	Dwarf_Addr highpc;
> +
> +	if (dwarf_highpc(sp_die, &highpc))
> +		return;
> +
> +	if (dwarf_getsrclines(&pf->cu_die, &lines, &nr_lines))
> +		return;
> +
> +	if (!get_entrypc_idx(lines, nr_lines, pf->addr, &entrypc_idx))
> +		return;
> +
> +	if (!get_postprologue_addr(entrypc_idx, lines, nr_lines,
> +				   highpc, &postprologue_addr))
> +		return;
> +
> +	pf->addr = postprologue_addr;
> +}
> +
> +static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
> +{
> +	struct perf_probe_point *pp = &pf->pev->point;
> +
> +	/* Not uprobe? */
> +	if (!pf->pev->uprobes)
> +		return;
> +
> +	/* Compiled with optimization? */
> +	if (optimized_target(&pf->cu_die))
> +		return;
> +
> +	/* Don't know entrypc? */
> +	if (!pf->addr)
> +		return;
> +
> +	/* Only FUNC and FUNC@SRC are eligible. */
> +	if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
> +	    pp->offset || pp->abs_address)
> +		return;
> +
> +	__skip_prologue(sp_die, pf);
> +}
> +
>  static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
>  {
>  	struct probe_finder *pf = data;
> @@ -954,6 +1109,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
>  		if (pp->lazy_line)
>  			param->retval = find_probe_point_lazy(sp_die, pf);
>  		else {
> +			skip_prologue(sp_die, pf);

Hmm, this seems no good. prologue skipping must be done only if the function
parameter is specified. Please consider the case of probing prologue like single
stepping debug.

Thank you,


>  			pf->addr += pp->offset;
>  			/* TODO: Check the address in this function */
>  			param->retval = call_probe_finder(sp_die, pf);
> -- 
> 2.5.5
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC] perf uprobe: Skip prologue if program compiled without optimization
  2016-07-30  3:04 ` Masami Hiramatsu
@ 2016-08-01  9:01   ` Ravi Bangoria
  0 siblings, 0 replies; 3+ messages in thread
From: Ravi Bangoria @ 2016-08-01  9:01 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: linux-kernel, peterz, mingo, acme, alexander.shishkin, wangnan0, hemant


On Saturday 30 July 2016 08:34 AM, Masami Hiramatsu wrote:
> On Thu, 28 Jul 2016 20:01:51 +0530
> Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> wrote:
>
>> Function prologue prepares stack and registers before executing function
>> logic. When target program is compiled without optimization, function
>> parameter information is only valid after prologue. When we probe entrypc
>> of the function, and try to record function parameter, it contains
>> garbage value.
> Right! :)

Thanks Masami for review.

I've sent patch with changes you suggested. Please review it.

-Ravi

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-08-01  9:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-28 14:31 [RFC] perf uprobe: Skip prologue if program compiled without optimization Ravi Bangoria
2016-07-30  3:04 ` Masami Hiramatsu
2016-08-01  9:01   ` Ravi Bangoria

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).