linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] uprobes/x86: emulate push insns for uprobe on x86
@ 2017-11-09  8:01 Yonghong Song
  2017-11-09 13:44 ` Oleg Nesterov
  2017-11-09 14:47 ` Oleg Nesterov
  0 siblings, 2 replies; 7+ messages in thread
From: Yonghong Song @ 2017-11-09  8:01 UTC (permalink / raw)
  To: mingo, tglx, oleg, peterz, linux-kernel, x86, netdev, ast; +Cc: kernel-team

Uprobe is a tracing mechanism for userspace programs.
Typical uprobe will incur overhead of two traps.
First trap is caused by replaced trap insn, and
the second trap is to execute the original displaced
insn in user space.

To reduce the overhead, kernel provides hooks
for architectures to emulate the original insn
and skip the second trap. In x86, emulation
is done for certain branch insns.

This patch extends the emulation to "push <reg>"
insns. These insns are typical in the beginning
of the function. For example, bcc
in https://github.com/iovisor/bcc repo provides
tools to measure funclantency, detect memleak, etc.
The tools will place uprobes in the beginning of
function and possibly uretprobes at the end of function.
This patch is able to reduce the trap overhead for
uprobe from 2 to 1.

Without this patch, uretprobe will typically incur
three traps. With this patch, if the function starts
with "push" insn, the number of traps can be
reduced from 3 to 2.

An experiment was conducted on two local VMs,
fedora 26 64-bit VM and 32-bit VM, both 4 processors
and 4GB memory, booted with latest x86/urgent (and this patch).
The host is MacBook with intel i7 processor.

The test program looks like
  #include <stdio.h>
  #include <stdlib.h>
  #include <time.h>
  #include <sys/time.h>

  static void test() __attribute__((noinline));
  void test() {}
  int main() {
    struct timeval start, end;

    gettimeofday(&start, NULL);
    for (int i = 0; i < 1000000; i++) {
      test();
    }
    gettimeofday(&end, NULL);

    printf("%ld\n", ((end.tv_sec * 1000000 + end.tv_usec)
                     - (start.tv_sec * 1000000 + start.tv_usec)));
    return 0;
  }

The program is compiled without optimization, and
the first insn for function "test" is "push %rbp".
The host is relatively idle.

Before the test run, the uprobe is inserted as below for uprobe:
  echo 'p <binary>:<test_func_offset>' > /sys/kernel/debug/tracing/uprobe_events
  echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable
and for uretprobe:
  echo 'r <binary>:<test_func_offset>' > /sys/kernel/debug/tracing/uprobe_events
  echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable

Unit: microsecond(usec) per loop iteration

x86_64          W/ this patch   W/O this patch
uprobe          1.55            3.1
uretprobe       2.0             3.6

x86_32          W/ this patch   W/O this patch
uprobe          1.41            3.5
uretprobe       1.75            4.0

You can see that this patch significantly reduced the overhead,
50% for uprobe and 44% for uretprobe on x86_64, and even more
on x86_32.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 arch/x86/include/asm/uprobes.h |  10 ++++
 arch/x86/kernel/uprobes.c      | 115 +++++++++++++++++++++++++++++++++++------
 2 files changed, 109 insertions(+), 16 deletions(-)

Changelog:
  . Make commit subject more appropriate

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 74f4c2f..f9d2b43 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -33,6 +33,11 @@ typedef u8 uprobe_opcode_t;
 #define UPROBE_SWBP_INSN		0xcc
 #define UPROBE_SWBP_INSN_SIZE		   1
 
+enum uprobe_insn_t {
+	UPROBE_BRANCH_INSN	= 0,
+	UPROBE_PUSH_INSN	= 1,
+};
+
 struct uprobe_xol_ops;
 
 struct arch_uprobe {
@@ -42,6 +47,7 @@ struct arch_uprobe {
 	};
 
 	const struct uprobe_xol_ops	*ops;
+	enum uprobe_insn_t		insn_class;
 
 	union {
 		struct {
@@ -53,6 +59,10 @@ struct arch_uprobe {
 			u8	fixups;
 			u8	ilen;
 		} 			defparam;
+		struct {
+			u8	rex_prefix;
+			u8	opc1;
+		}			push;
 	};
 };
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index a3755d2..5ace65c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -640,11 +640,71 @@ static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs)
 #undef	COND
 #undef	CASE_COND
 
-static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static unsigned long *get_push_reg_ptr(struct arch_uprobe *auprobe,
+				       struct pt_regs *regs)
 {
-	unsigned long new_ip = regs->ip += auprobe->branch.ilen;
-	unsigned long offs = (long)auprobe->branch.offs;
+#if defined(CONFIG_X86_64)
+	switch (auprobe->push.opc1) {
+	case 0x50:
+		return auprobe->push.rex_prefix ? &regs->r8 : &regs->ax;
+	case 0x51:
+		return auprobe->push.rex_prefix ? &regs->r9 : &regs->cx;
+	case 0x52:
+		return auprobe->push.rex_prefix ? &regs->r10 : &regs->dx;
+	case 0x53:
+		return auprobe->push.rex_prefix ? &regs->r11 : &regs->bx;
+	case 0x54:
+		return auprobe->push.rex_prefix ? &regs->r12 : &regs->sp;
+	case 0x55:
+		return auprobe->push.rex_prefix ? &regs->r13 : &regs->bp;
+	case 0x56:
+		return auprobe->push.rex_prefix ? &regs->r14 : &regs->si;
+	}
+
+	/* opc1 0x57 */
+	return auprobe->push.rex_prefix ? &regs->r15 : &regs->di;
+#else
+	switch (auprobe->push.opc1) {
+	case 0x50:
+		return &regs->ax;
+	case 0x51:
+		return &regs->cx;
+	case 0x52:
+		return &regs->dx;
+	case 0x53:
+		return &regs->bx;
+	case 0x54:
+		return &regs->sp;
+	case 0x55:
+		return &regs->bp;
+	case 0x56:
+		return &regs->si;
+	}
 
+	/* opc1 0x57 */
+	return &regs->di;
+#endif
+}
+
+static bool sstep_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	int reg_width, insn_class = auprobe->insn_class;
+	unsigned long *src_ptr, new_ip, offs, sp;
+
+	if (insn_class == UPROBE_PUSH_INSN) {
+		src_ptr = get_push_reg_ptr(auprobe, regs);
+		reg_width = sizeof_long();
+		sp = regs->sp;
+		if (copy_to_user((void __user *)(sp - reg_width), src_ptr, reg_width))
+			return false;
+
+		regs->sp = sp - reg_width;
+		regs->ip += 1 + (auprobe->push.rex_prefix != 0);
+		return true;
+	}
+
+	new_ip = regs->ip += auprobe->branch.ilen;
+	offs = (long)auprobe->branch.offs;
 	if (branch_is_call(auprobe)) {
 		/*
 		 * If it fails we execute this (mangled, see the comment in
@@ -665,14 +725,18 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	return true;
 }
 
-static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static int sstep_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	BUG_ON(!branch_is_call(auprobe));
+	BUG_ON(auprobe->insn_class != UPROBE_PUSH_INSN &&
+	       !branch_is_call(auprobe));
 	/*
-	 * We can only get here if branch_emulate_op() failed to push the ret
-	 * address _and_ another thread expanded our stack before the (mangled)
-	 * "call" insn was executed out-of-line. Just restore ->sp and restart.
-	 * We could also restore ->ip and try to call branch_emulate_op() again.
+	 * We can only get here if
+	 * - for push operation, sstep_emulate_op() failed to push the stack, or
+	 * - for branch operation, sstep_emulate_op() failed to push the ret address
+	 *   _and_ another thread expanded our stack before the (mangled)
+	 *   "call" insn was executed out-of-line.
+	 * Just restore ->sp and restart. We could also restore ->ip and try to
+	 * call sstep_emulate_op() again.
 	 */
 	regs->sp += sizeof_long();
 	return -ERESTART;
@@ -698,17 +762,18 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
 		0, insn->immediate.nbytes);
 }
 
-static const struct uprobe_xol_ops branch_xol_ops = {
-	.emulate  = branch_emulate_op,
-	.post_xol = branch_post_xol_op,
+static const struct uprobe_xol_ops sstep_xol_ops = {
+	.emulate  = sstep_emulate_op,
+	.post_xol = sstep_post_xol_op,
 };
 
-/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
-static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
+/* Returns -ENOSYS if sstep_xol_ops doesn't handle this insn */
+static int sstep_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
 	int i;
 
+	auprobe->insn_class = UPROBE_BRANCH_INSN;
 	switch (opc1) {
 	case 0xeb:	/* jmp 8 */
 	case 0xe9:	/* jmp 32 */
@@ -719,6 +784,23 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 		branch_clear_offset(auprobe, insn);
 		break;
 
+	case 0x50 ... 0x57:
+		if (insn->length > 2)
+			return -ENOSYS;
+		if (insn->length == 2) {
+			/* only support rex_prefix 0x41 (x64 only) */
+			if (insn->rex_prefix.nbytes != 1 ||
+			    insn->rex_prefix.bytes[0] != 0x41)
+				return -ENOSYS;
+			auprobe->push.rex_prefix = 0x41;
+		} else {
+			auprobe->push.rex_prefix = 0;
+		}
+
+		auprobe->insn_class = UPROBE_PUSH_INSN;
+		auprobe->push.opc1 = opc1;
+		goto set_ops;
+
 	case 0x0f:
 		if (insn->opcode.nbytes != 2)
 			return -ENOSYS;
@@ -746,7 +828,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 	auprobe->branch.ilen = insn->length;
 	auprobe->branch.offs = insn->immediate.value;
 
-	auprobe->ops = &branch_xol_ops;
+set_ops:
+	auprobe->ops = &sstep_xol_ops;
 	return 0;
 }
 
@@ -767,7 +850,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	if (ret)
 		return ret;
 
-	ret = branch_setup_xol_ops(auprobe, &insn);
+	ret = sstep_setup_xol_ops(auprobe, &insn);
 	if (ret != -ENOSYS)
 		return ret;
 
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] uprobes/x86: emulate push insns for uprobe on x86
  2017-11-09  8:01 [PATCH] uprobes/x86: emulate push insns for uprobe on x86 Yonghong Song
@ 2017-11-09 13:44 ` Oleg Nesterov
  2017-11-09 14:04   ` Oleg Nesterov
  2017-11-09 21:53   ` Yonghong Song
  2017-11-09 14:47 ` Oleg Nesterov
  1 sibling, 2 replies; 7+ messages in thread
From: Oleg Nesterov @ 2017-11-09 13:44 UTC (permalink / raw)
  To: Yonghong Song
  Cc: mingo, tglx, peterz, linux-kernel, x86, netdev, ast, kernel-team

On 11/09, Yonghong Song wrote:
>
> This patch extends the emulation to "push <reg>"
> insns. These insns are typical in the beginning
> of the function. For example, bcc
> in https://github.com/iovisor/bcc repo provides
> tools to measure funclantency, detect memleak, etc.
> The tools will place uprobes in the beginning of
> function and possibly uretprobes at the end of function.
> This patch is able to reduce the trap overhead for
> uprobe from 2 to 1.

OK. but to be honest I do not like the implementation, please see below.

> +enum uprobe_insn_t {
> +	UPROBE_BRANCH_INSN	= 0,
> +	UPROBE_PUSH_INSN	= 1,
> +};
> +
>  struct uprobe_xol_ops;
>
>  struct arch_uprobe {
> @@ -42,6 +47,7 @@ struct arch_uprobe {
>  	};
>
>  	const struct uprobe_xol_ops	*ops;
> +	enum uprobe_insn_t		insn_class;

Why?

I'd suggest to leave branch_xol_ops alone and add the new push_xol_ops{},
the code will look much simpler.

The only thing they can share is branch_post_xol_op() which is just

	regs->sp += sizeof_long();
	return -ERESTART;

I think a bit of code duplication would be fine in this case.

And. Do you really need ->post_xol() method to emulate "push"? Why we can't
simply execute it out-of-line if copy_to_user() fails?

branch_post_xol_op() is needed because we can't execute "call" out-of-line,
we need to restart and try again if copy_to_user() fails, but I don not
understand why it is needed to emulate "push".

Oleg.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] uprobes/x86: emulate push insns for uprobe on x86
  2017-11-09 13:44 ` Oleg Nesterov
@ 2017-11-09 14:04   ` Oleg Nesterov
  2017-11-09 21:53   ` Yonghong Song
  1 sibling, 0 replies; 7+ messages in thread
From: Oleg Nesterov @ 2017-11-09 14:04 UTC (permalink / raw)
  To: Yonghong Song
  Cc: mingo, tglx, peterz, linux-kernel, x86, netdev, ast, kernel-team

On 11/09, Oleg Nesterov wrote:
>
> And. Do you really need ->post_xol() method to emulate "push"? Why we can't
> simply execute it out-of-line if copy_to_user() fails?
>
> branch_post_xol_op() is needed because we can't execute "call" out-of-line,
> we need to restart and try again if copy_to_user() fails, but I don not
> understand why it is needed to emulate "push".

If I wasn't clear, please see the comment in branch_clear_offset().

Oleg.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] uprobes/x86: emulate push insns for uprobe on x86
  2017-11-09  8:01 [PATCH] uprobes/x86: emulate push insns for uprobe on x86 Yonghong Song
  2017-11-09 13:44 ` Oleg Nesterov
@ 2017-11-09 14:47 ` Oleg Nesterov
  1 sibling, 0 replies; 7+ messages in thread
From: Oleg Nesterov @ 2017-11-09 14:47 UTC (permalink / raw)
  To: Yonghong Song
  Cc: mingo, tglx, peterz, linux-kernel, x86, netdev, ast, kernel-team

On 11/09, Yonghong Song wrote:
>
> +	if (insn_class == UPROBE_PUSH_INSN) {
> +		src_ptr = get_push_reg_ptr(auprobe, regs);
> +		reg_width = sizeof_long();
> +		sp = regs->sp;
> +		if (copy_to_user((void __user *)(sp - reg_width), src_ptr, reg_width))
> +			return false;
> +
> +		regs->sp = sp - reg_width;
> +		regs->ip += 1 + (auprobe->push.rex_prefix != 0);
> +		return true;

Another nit... You can rename push_ret_address() and use it here

		src_ptr = ...;
		if (push_ret_address(regs, *src_ptr))
			return false;

		regs->ip += ...;
		return true;

and I think get_push_reg_ptr() should just return "unsigned long", not the
pointer.

And again, please make a separate method for this code. Let me repeat, the
main reason for branch_xol_ops/etc is that we simply can not execute these
insns out-of-line, we have to emulate them. "push" differs, the only reason
why we may want to emulate it is optimization.

Oleg.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] uprobes/x86: emulate push insns for uprobe on x86
  2017-11-09 13:44 ` Oleg Nesterov
  2017-11-09 14:04   ` Oleg Nesterov
@ 2017-11-09 21:53   ` Yonghong Song
  1 sibling, 0 replies; 7+ messages in thread
From: Yonghong Song @ 2017-11-09 21:53 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: mingo, tglx, peterz, linux-kernel, x86, netdev, ast, kernel-team



On 11/9/17 5:44 AM, Oleg Nesterov wrote:
> On 11/09, Yonghong Song wrote:
>>
>> This patch extends the emulation to "push <reg>"
>> insns. These insns are typical in the beginning
>> of the function. For example, bcc
>> in https://github.com/iovisor/bcc repo provides
>> tools to measure funclantency, detect memleak, etc.
>> The tools will place uprobes in the beginning of
>> function and possibly uretprobes at the end of function.
>> This patch is able to reduce the trap overhead for
>> uprobe from 2 to 1.
> 
> OK. but to be honest I do not like the implementation, please see below.
> 
>> +enum uprobe_insn_t {
>> +	UPROBE_BRANCH_INSN	= 0,
>> +	UPROBE_PUSH_INSN	= 1,
>> +};
>> +
>>   struct uprobe_xol_ops;
>>
>>   struct arch_uprobe {
>> @@ -42,6 +47,7 @@ struct arch_uprobe {
>>   	};
>>
>>   	const struct uprobe_xol_ops	*ops;
>> +	enum uprobe_insn_t		insn_class;
> 
> Why?
> 
> I'd suggest to leave branch_xol_ops alone and add the new push_xol_ops{},
> the code will look much simpler.
> 
> The only thing they can share is branch_post_xol_op() which is just
> 
> 	regs->sp += sizeof_long();
> 	return -ERESTART;
> 
> I think a bit of code duplication would be fine in this case.

Just prototyped. Agreed, having seperate uprobe_xol_ops for "push" 
emulation is clean and better.

> 
> And. Do you really need ->post_xol() method to emulate "push"? Why we can't
> simply execute it out-of-line if copy_to_user() fails?

Thanks for pointing it out. Agreed, we do not really need post_xol for 
"push". xol execution is just fine.

Will address your other comments as well in the next revision.

> 
> branch_post_xol_op() is needed because we can't execute "call" out-of-line,
> we need to restart and try again if copy_to_user() fails, but I don not
> understand why it is needed to emulate "push".
> 
> Oleg.
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] uprobes/x86: emulate push insns for uprobe on x86
  2017-11-09 23:02 Yonghong Song
@ 2017-11-10 13:06 ` Oleg Nesterov
  0 siblings, 0 replies; 7+ messages in thread
From: Oleg Nesterov @ 2017-11-10 13:06 UTC (permalink / raw)
  To: Yonghong Song
  Cc: mingo, tglx, peterz, linux-kernel, x86, netdev, ast, kernel-team

Yonghong,

The patch looks good to me, but I'll try to read it carefully later.
Just a couple of cosmetic nits for now.

On 11/09, Yonghong Song wrote:
>
> --- a/arch/x86/include/asm/uprobes.h
> +++ b/arch/x86/include/asm/uprobes.h
> @@ -53,6 +53,10 @@ struct arch_uprobe {
>  			u8	fixups;
>  			u8	ilen;
>  		} 			defparam;
> +		struct {
> +			u8	src_offset;	/* to the start of pt_regs */
> +			u8	ilen;
> +		}			push;
>  	};
>  };

I know it is very easy to blame the naming ;) but src_offset doesn't
look nice. How about reg_offset ?

> +static bool push_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
> +{
> +	void *src_ptr = (void *)regs + auprobe->push.src_offset;
> +
> +	if (emulate_push_stack(regs, *(unsigned long *)src_ptr))
> +		return false;

You can declare src_ptr as

	unsigned long *src_ptr = ...;

and avoid the casting below.

> +static int uprobe_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
>  {
>  	u8 opc1 = OPCODE1(insn);
>  	int i;
>
>  	switch (opc1) {
> +	case 0x50 ... 0x57:
> +		return uprobe_setup_push_ops(auprobe, insn, opc1);
> +
>  	case 0xeb:	/* jmp 8 */
>  	case 0xe9:	/* jmp 32 */
>  	case 0x90:	/* prefix* + nop; same as jmp with .offs = 0 */
> @@ -767,7 +863,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
>  	if (ret)
>  		return ret;
>
> -	ret = branch_setup_xol_ops(auprobe, &insn);
> +	ret = uprobe_setup_xol_ops(auprobe, &insn);
>  	if (ret != -ENOSYS)
>  		return ret;

Well... again, this is cosmetic and I won't insist, but to me it would be
more clean to not change/rename branch_setup_xol_ops(). Instead, you can
just add

	ret = uprobe_setup_push_ops(...);
	if (ret != -ENOSYS)
		return ret;

at the start of arch_uprobe_analyze_insn(), right after the similar
branch_setup_xol_ops() call.

Btw... please add v2 into the subject when you send the new version.

Oleg.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] uprobes/x86: emulate push insns for uprobe on x86
@ 2017-11-09 23:02 Yonghong Song
  2017-11-10 13:06 ` Oleg Nesterov
  0 siblings, 1 reply; 7+ messages in thread
From: Yonghong Song @ 2017-11-09 23:02 UTC (permalink / raw)
  To: mingo, tglx, oleg, peterz, linux-kernel, x86, netdev, ast; +Cc: kernel-team

Uprobe is a tracing mechanism for userspace programs.
Typical uprobe will incur overhead of two traps.
First trap is caused by replaced trap insn, and
the second trap is to execute the original displaced
insn in user space.

To reduce the overhead, kernel provides hooks
for architectures to emulate the original insn
and skip the second trap. In x86, emulation
is done for certain branch insns.

This patch extends the emulation to "push <reg>"
insns. These insns are typical in the beginning
of the function. For example, bcc
in https://github.com/iovisor/bcc repo provides
tools to measure funclantency, detect memleak, etc.
The tools will place uprobes in the beginning of
function and possibly uretprobes at the end of function.
This patch is able to reduce the trap overhead for
uprobe from 2 to 1.

Without this patch, uretprobe will typically incur
three traps. With this patch, if the function starts
with "push" insn, the number of traps can be
reduced from 3 to 2.

An experiment was conducted on two local VMs,
fedora 26 64-bit VM and 32-bit VM, both 4 processors
and 4GB memory, booted with latest tip repo (and this patch).
The host is MacBook with intel i7 processor.

The test program looks like
  #include <stdio.h>
  #include <stdlib.h>
  #include <time.h>
  #include <sys/time.h>

  static void test() __attribute__((noinline));
  void test() {}
  int main() {
    struct timeval start, end;

    gettimeofday(&start, NULL);
    for (int i = 0; i < 1000000; i++) {
      test();
    }
    gettimeofday(&end, NULL);

    printf("%ld\n", ((end.tv_sec * 1000000 + end.tv_usec)
                     - (start.tv_sec * 1000000 + start.tv_usec)));
    return 0;
  }

The program is compiled without optimization, and
the first insn for function "test" is "push %rbp".
The host is relatively idle.

Before the test run, the uprobe is inserted as below for uprobe:
  echo 'p <binary>:<test_func_offset>' > /sys/kernel/debug/tracing/uprobe_events
  echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable
and for uretprobe:
  echo 'r <binary>:<test_func_offset>' > /sys/kernel/debug/tracing/uprobe_events
  echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable

Unit: microsecond(usec) per loop iteration

x86_64          W/ this patch   W/O this patch
uprobe          1.55            3.1
uretprobe       2.0             3.6

x86_32          W/ this patch   W/O this patch
uprobe          1.41            3.5
uretprobe       1.75            4.0

You can see that this patch significantly reduced the overhead,
50% for uprobe and 44% for uretprobe on x86_64, and even more
on x86_32.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 arch/x86/include/asm/uprobes.h |   4 ++
 arch/x86/kernel/uprobes.c      | 110 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 107 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 74f4c2f..a90090c 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -53,6 +53,10 @@ struct arch_uprobe {
 			u8	fixups;
 			u8	ilen;
 		} 			defparam;
+		struct {
+			u8	src_offset;	/* to the start of pt_regs */
+			u8	ilen;
+		}			push;
 	};
 };
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index a3755d2..1ee8b59 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -528,11 +528,11 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	return 0;
 }
 
-static int push_ret_address(struct pt_regs *regs, unsigned long ip)
+static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
 {
 	unsigned long new_sp = regs->sp - sizeof_long();
 
-	if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
+	if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
 		return -EFAULT;
 
 	regs->sp = new_sp;
@@ -566,7 +566,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 		regs->ip += correction;
 	} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
 		regs->sp += sizeof_long(); /* Pop incorrect return address */
-		if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen))
+		if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
 			return -ERESTART;
 	}
 	/* popf; tell the caller to not touch TF */
@@ -655,7 +655,7 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 		 *
 		 * But there is corner case, see the comment in ->post_xol().
 		 */
-		if (push_ret_address(regs, new_ip))
+		if (emulate_push_stack(regs, new_ip))
 			return false;
 	} else if (!check_jmp_cond(auprobe, regs)) {
 		offs = 0;
@@ -665,6 +665,16 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	return true;
 }
 
+static bool push_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	void *src_ptr = (void *)regs + auprobe->push.src_offset;
+
+	if (emulate_push_stack(regs, *(unsigned long *)src_ptr))
+		return false;
+	regs->ip += auprobe->push.ilen;
+	return true;
+}
+
 static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	BUG_ON(!branch_is_call(auprobe));
@@ -703,13 +713,99 @@ static const struct uprobe_xol_ops branch_xol_ops = {
 	.post_xol = branch_post_xol_op,
 };
 
-/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
-static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
+static const struct uprobe_xol_ops push_xol_ops = {
+	.emulate  = push_emulate_op,
+};
+
+static int uprobe_setup_push_ops(struct arch_uprobe *auprobe, struct insn *insn,
+				 u8 opc1)
+{
+	u8 src_offset = 0;
+
+	if (insn->length > 2)
+		return -ENOSYS;
+	if (insn->length == 2) {
+		/* only support rex_prefix 0x41 (x64 only) */
+#ifdef CONFIG_X86_64
+		if (insn->rex_prefix.nbytes != 1 ||
+		    insn->rex_prefix.bytes[0] != 0x41)
+			return -ENOSYS;
+
+		auprobe->push.ilen = 2;
+		switch (opc1) {
+		case 0x50:
+			src_offset = offsetof(struct pt_regs, r8);
+			break;
+		case 0x51:
+			src_offset = offsetof(struct pt_regs, r9);
+			break;
+		case 0x52:
+			src_offset = offsetof(struct pt_regs, r10);
+			break;
+		case 0x53:
+			src_offset = offsetof(struct pt_regs, r11);
+			break;
+		case 0x54:
+			src_offset = offsetof(struct pt_regs, r12);
+			break;
+		case 0x55:
+			src_offset = offsetof(struct pt_regs, r13);
+			break;
+		case 0x56:
+			src_offset = offsetof(struct pt_regs, r14);
+			break;
+		case 0x57:
+			src_offset = offsetof(struct pt_regs, r15);
+			break;
+		}
+#else
+		return -ENOSYS;
+#endif
+	} else {
+		auprobe->push.ilen = 1;
+		switch (opc1) {
+		case 0x50:
+			src_offset = offsetof(struct pt_regs, ax);
+			break;
+		case 0x51:
+			src_offset = offsetof(struct pt_regs, cx);
+			break;
+		case 0x52:
+			src_offset = offsetof(struct pt_regs, dx);
+			break;
+		case 0x53:
+			src_offset = offsetof(struct pt_regs, bx);
+			break;
+		case 0x54:
+			src_offset = offsetof(struct pt_regs, sp);
+			break;
+		case 0x55:
+			src_offset = offsetof(struct pt_regs, bp);
+			break;
+		case 0x56:
+			src_offset = offsetof(struct pt_regs, si);
+			break;
+		case 0x57:
+			src_offset = offsetof(struct pt_regs, di);
+			break;
+		}
+	}
+
+	auprobe->push.src_offset = src_offset;
+	auprobe->ops = &push_xol_ops;
+	return 0;
+}
+
+/* Returns -ENOSYS if {branch|push}_xol_ops doesn't handle this insn */
+static int uprobe_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
 	int i;
 
 	switch (opc1) {
+	case 0x50 ... 0x57:
+		return uprobe_setup_push_ops(auprobe, insn, opc1);
+
 	case 0xeb:	/* jmp 8 */
 	case 0xe9:	/* jmp 32 */
 	case 0x90:	/* prefix* + nop; same as jmp with .offs = 0 */
@@ -767,7 +863,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	if (ret)
 		return ret;
 
-	ret = branch_setup_xol_ops(auprobe, &insn);
+	ret = uprobe_setup_xol_ops(auprobe, &insn);
 	if (ret != -ENOSYS)
 		return ret;
 
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-11-10 13:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-09  8:01 [PATCH] uprobes/x86: emulate push insns for uprobe on x86 Yonghong Song
2017-11-09 13:44 ` Oleg Nesterov
2017-11-09 14:04   ` Oleg Nesterov
2017-11-09 21:53   ` Yonghong Song
2017-11-09 14:47 ` Oleg Nesterov
2017-11-09 23:02 Yonghong Song
2017-11-10 13:06 ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).