All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
@ 2019-04-13  1:59 Nicholas Piggin
  2019-04-15  9:42 ` Naveen N. Rao
  2019-05-13  3:25 ` Michael Ellerman
  0 siblings, 2 replies; 11+ messages in thread
From: Nicholas Piggin @ 2019-04-13  1:59 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

The new mprofile-kernel mcount sequence is

  mflr	r0
  bl	_mcount

Dynamic ftrace patches the branch instruction with a noop, but leaves
the mflr. mflr is executed by the branch unit that can only execute one
per cycle on POWER9 and shared with branches, so it would be nice to
avoid it where possible.

This patch is a hacky proof of concept to nop out the mflr. Can we do
this or are there races or other issues with it?
---
 arch/powerpc/kernel/trace/ftrace.c | 77 +++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 52ee24fd353f..ecc75baef23e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -172,6 +172,19 @@ __ftrace_make_nop(struct module *mod,
 		pr_err("Unexpected instruction %08x around bl _mcount\n", op);
 		return -EINVAL;
 	}
+
+	if (patch_instruction((unsigned int *)ip, pop)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+
+	if (op == PPC_INST_MFLR) {
+		if (patch_instruction((unsigned int *)(ip - 4), pop)) {
+			pr_err("Patching NOP failed.\n");
+			return -EPERM;
+		}
+	}
+
 #else
 	/*
 	 * Our original call site looks like:
@@ -202,13 +215,14 @@ __ftrace_make_nop(struct module *mod,
 		pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
 		return -EINVAL;
 	}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
 	if (patch_instruction((unsigned int *)ip, pop)) {
 		pr_err("Patching NOP failed.\n");
 		return -EPERM;
 	}
 
+#endif /* CONFIG_MPROFILE_KERNEL */
+
 	return 0;
 }
 
@@ -421,6 +435,20 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
 		return -EPERM;
 	}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+	if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
+		pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+		return -EFAULT;
+	}
+
+	if (op == PPC_INST_MFLR) {
+		if (patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+			pr_err("Patching NOP failed.\n");
+			return -EPERM;
+		}
+	}
+#endif
+
 	return 0;
 }
 
@@ -437,9 +465,20 @@ int ftrace_make_nop(struct module *mod,
 	 */
 	if (test_24bit_addr(ip, addr)) {
 		/* within range */
+		int rc;
+
 		old = ftrace_call_replace(ip, addr, 1);
 		new = PPC_INST_NOP;
-		return ftrace_modify_code(ip, old, new);
+		rc = ftrace_modify_code(ip, old, new);
+		if (rc)
+			return rc;
+#ifdef CONFIG_MPROFILE_KERNEL
+		old = PPC_INST_MFLR;
+		new = PPC_INST_NOP;
+		ftrace_modify_code(ip - 4, old, new);
+		/* old mprofile kernel will error because no mflr */
+#endif
+		return rc;
 	} else if (core_kernel_text(ip))
 		return __ftrace_make_nop_kernel(rec, addr);
 
@@ -562,6 +601,20 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+	if (probe_kernel_read(op, (ip - 4), 4)) {
+		pr_err("Fetching instruction at %lx failed.\n", (unsigned long)(ip - 4));
+		return -EFAULT;
+	}
+
+	if (op[0] == PPC_INST_NOP) {
+		if (patch_instruction((ip - 4), PPC_INST_MFLR)) {
+			pr_err("Patching mflr failed.\n");
+			return -EINVAL;
+		}
+	}
+#endif
+
 	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
 		pr_err("REL24 out of range!\n");
 		return -EINVAL;
@@ -650,6 +703,20 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+	if (probe_kernel_read(&op, (ip - 4), 4)) {
+		pr_err("Fetching instruction at %lx failed.\n", (unsigned long)(ip - 4));
+		return -EFAULT;
+	}
+
+	if (op == PPC_INST_NOP) {
+		if (patch_instruction((ip - 4), PPC_INST_MFLR)) {
+			pr_err("Patching mflr failed.\n");
+			return -EINVAL;
+		}
+	}
+#endif
+
 	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
 		pr_err("Error patching branch to ftrace tramp!\n");
 		return -EINVAL;
@@ -670,6 +737,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	 */
 	if (test_24bit_addr(ip, addr)) {
 		/* within range */
+#ifdef CONFIG_MPROFILE_KERNEL
+		old = PPC_INST_NOP;
+		new = PPC_INST_MFLR;
+		ftrace_modify_code(ip - 4, old, new);
+		/* old mprofile-kernel sequence will error because no nop */
+#endif
 		old = PPC_INST_NOP;
 		new = ftrace_call_replace(ip, addr, 1);
 		return ftrace_modify_code(ip, old, new);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-04-13  1:59 [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr Nicholas Piggin
@ 2019-04-15  9:42 ` Naveen N. Rao
  2019-05-13  3:25 ` Michael Ellerman
  1 sibling, 0 replies; 11+ messages in thread
From: Naveen N. Rao @ 2019-04-15  9:42 UTC (permalink / raw)
  To: linuxppc-dev, Nicholas Piggin

Hi Nick,

Nicholas Piggin wrote:
> The new mprofile-kernel mcount sequence is
> 
>   mflr	r0
>   bl	_mcount
> 
> Dynamic ftrace patches the branch instruction with a noop, but leaves
> the mflr. mflr is executed by the branch unit that can only execute one
> per cycle on POWER9 and shared with branches, so it would be nice to
> avoid it where possible.
> 
> This patch is a hacky proof of concept to nop out the mflr. Can we do
> this or are there races or other issues with it?

Thanks for implementing this. I don't see a problem with this.

- Naveen



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-04-13  1:59 [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr Nicholas Piggin
  2019-04-15  9:42 ` Naveen N. Rao
@ 2019-05-13  3:25 ` Michael Ellerman
  2019-05-13  6:47   ` Naveen N. Rao
  1 sibling, 1 reply; 11+ messages in thread
From: Michael Ellerman @ 2019-05-13  3:25 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin

Nicholas Piggin <npiggin@gmail.com> writes:
> The new mprofile-kernel mcount sequence is
>
>   mflr	r0
>   bl	_mcount
>
> Dynamic ftrace patches the branch instruction with a noop, but leaves
> the mflr. mflr is executed by the branch unit that can only execute one
> per cycle on POWER9 and shared with branches, so it would be nice to
> avoid it where possible.
>
> This patch is a hacky proof of concept to nop out the mflr. Can we do
> this or are there races or other issues with it?

There's a race, isn't there?

We have a function foo which currently has tracing disabled, so the mflr
and bl are nop'ed out.

  CPU 0			CPU 1
  ==================================
  bl foo
  nop (ie. not mflr)
  -> interrupt
  something else	enable tracing for foo
  ...			patch mflr and branch
  <- rfi
  bl _mcount

So we end up in _mcount() but with r0 not populated.

Or else what am I missing?

cheers


> diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
> index 52ee24fd353f..ecc75baef23e 100644
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -172,6 +172,19 @@ __ftrace_make_nop(struct module *mod,
>  		pr_err("Unexpected instruction %08x around bl _mcount\n", op);
>  		return -EINVAL;
>  	}
> +
> +	if (patch_instruction((unsigned int *)ip, pop)) {
> +		pr_err("Patching NOP failed.\n");
> +		return -EPERM;
> +	}
> +
> +	if (op == PPC_INST_MFLR) {
> +		if (patch_instruction((unsigned int *)(ip - 4), pop)) {
> +			pr_err("Patching NOP failed.\n");
> +			return -EPERM;
> +		}
> +	}
> +
>  #else
>  	/*
>  	 * Our original call site looks like:
> @@ -202,13 +215,14 @@ __ftrace_make_nop(struct module *mod,
>  		pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
>  		return -EINVAL;
>  	}
> -#endif /* CONFIG_MPROFILE_KERNEL */
>  
>  	if (patch_instruction((unsigned int *)ip, pop)) {
>  		pr_err("Patching NOP failed.\n");
>  		return -EPERM;
>  	}
>  
> +#endif /* CONFIG_MPROFILE_KERNEL */
> +
>  	return 0;
>  }
>  
> @@ -421,6 +435,20 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
>  		return -EPERM;
>  	}
>  
> +#ifdef CONFIG_MPROFILE_KERNEL
> +	if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
> +		pr_err("Fetching instruction at %lx failed.\n", ip - 4);
> +		return -EFAULT;
> +	}
> +
> +	if (op == PPC_INST_MFLR) {
> +		if (patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
> +			pr_err("Patching NOP failed.\n");
> +			return -EPERM;
> +		}
> +	}
> +#endif
> +
>  	return 0;
>  }
>  
> @@ -437,9 +465,20 @@ int ftrace_make_nop(struct module *mod,
>  	 */
>  	if (test_24bit_addr(ip, addr)) {
>  		/* within range */
> +		int rc;
> +
>  		old = ftrace_call_replace(ip, addr, 1);
>  		new = PPC_INST_NOP;
> -		return ftrace_modify_code(ip, old, new);
> +		rc = ftrace_modify_code(ip, old, new);
> +		if (rc)
> +			return rc;
> +#ifdef CONFIG_MPROFILE_KERNEL
> +		old = PPC_INST_MFLR;
> +		new = PPC_INST_NOP;
> +		ftrace_modify_code(ip - 4, old, new);
> +		/* old mprofile kernel will error because no mflr */
> +#endif
> +		return rc;
>  	} else if (core_kernel_text(ip))
>  		return __ftrace_make_nop_kernel(rec, addr);
>  
> @@ -562,6 +601,20 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  		return -EINVAL;
>  	}
>  
> +#ifdef CONFIG_MPROFILE_KERNEL
> +	if (probe_kernel_read(op, (ip - 4), 4)) {
> +		pr_err("Fetching instruction at %lx failed.\n", (unsigned long)(ip - 4));
> +		return -EFAULT;
> +	}
> +
> +	if (op[0] == PPC_INST_NOP) {
> +		if (patch_instruction((ip - 4), PPC_INST_MFLR)) {
> +			pr_err("Patching mflr failed.\n");
> +			return -EINVAL;
> +		}
> +	}
> +#endif
> +
>  	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
>  		pr_err("REL24 out of range!\n");
>  		return -EINVAL;
> @@ -650,6 +703,20 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
>  		return -EINVAL;
>  	}
>  
> +#ifdef CONFIG_MPROFILE_KERNEL
> +	if (probe_kernel_read(&op, (ip - 4), 4)) {
> +		pr_err("Fetching instruction at %lx failed.\n", (unsigned long)(ip - 4));
> +		return -EFAULT;
> +	}
> +
> +	if (op == PPC_INST_NOP) {
> +		if (patch_instruction((ip - 4), PPC_INST_MFLR)) {
> +			pr_err("Patching mflr failed.\n");
> +			return -EINVAL;
> +		}
> +	}
> +#endif
> +
>  	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
>  		pr_err("Error patching branch to ftrace tramp!\n");
>  		return -EINVAL;
> @@ -670,6 +737,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  	 */
>  	if (test_24bit_addr(ip, addr)) {
>  		/* within range */
> +#ifdef CONFIG_MPROFILE_KERNEL
> +		old = PPC_INST_NOP;
> +		new = PPC_INST_MFLR;
> +		ftrace_modify_code(ip - 4, old, new);
> +		/* old mprofile-kernel sequence will error because no nop */
> +#endif
>  		old = PPC_INST_NOP;
>  		new = ftrace_call_replace(ip, addr, 1);
>  		return ftrace_modify_code(ip, old, new);
> -- 
> 2.20.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-13  3:25 ` Michael Ellerman
@ 2019-05-13  6:47   ` Naveen N. Rao
  2019-05-13 11:34     ` Michael Ellerman
  0 siblings, 1 reply; 11+ messages in thread
From: Naveen N. Rao @ 2019-05-13  6:47 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Nicholas Piggin

Michael Ellerman wrote:
> Nicholas Piggin <npiggin@gmail.com> writes:
>> The new mprofile-kernel mcount sequence is
>>
>>   mflr	r0
>>   bl	_mcount
>>
>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>> the mflr. mflr is executed by the branch unit that can only execute one
>> per cycle on POWER9 and shared with branches, so it would be nice to
>> avoid it where possible.
>>
>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>> this or are there races or other issues with it?
> 
> There's a race, isn't there?
> 
> We have a function foo which currently has tracing disabled, so the mflr
> and bl are nop'ed out.
> 
>   CPU 0			CPU 1
>   ==================================
>   bl foo
>   nop (ie. not mflr)
>   -> interrupt
>   something else	enable tracing for foo
>   ...			patch mflr and branch
>   <- rfi
>   bl _mcount
> 
> So we end up in _mcount() but with r0 not populated.

Good catch! Looks like we need to patch the mflr with a "b +8" similar 
to what we do in __ftrace_make_nop().


- Naveen



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-13  6:47   ` Naveen N. Rao
@ 2019-05-13 11:34     ` Michael Ellerman
  2019-05-14  8:32       ` Naveen N. Rao
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Ellerman @ 2019-05-13 11:34 UTC (permalink / raw)
  To: Naveen N. Rao, linuxppc-dev, Nicholas Piggin

"Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
> Michael Ellerman wrote:
>> Nicholas Piggin <npiggin@gmail.com> writes:
>>> The new mprofile-kernel mcount sequence is
>>>
>>>   mflr	r0
>>>   bl	_mcount
>>>
>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>> the mflr. mflr is executed by the branch unit that can only execute one
>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>> avoid it where possible.
>>>
>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>> this or are there races or other issues with it?
>> 
>> There's a race, isn't there?
>> 
>> We have a function foo which currently has tracing disabled, so the mflr
>> and bl are nop'ed out.
>> 
>>   CPU 0			CPU 1
>>   ==================================
>>   bl foo
>>   nop (ie. not mflr)
>>   -> interrupt
>>   something else	enable tracing for foo
>>   ...			patch mflr and branch
>>   <- rfi
>>   bl _mcount
>> 
>> So we end up in _mcount() but with r0 not populated.
>
> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
> to what we do in __ftrace_make_nop().

Would that actually make it any faster though? Nick?

cheers

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-13 11:34     ` Michael Ellerman
@ 2019-05-14  8:32       ` Naveen N. Rao
  2019-05-15 12:20         ` Michael Ellerman
  2019-05-16  5:49         ` Nicholas Piggin
  0 siblings, 2 replies; 11+ messages in thread
From: Naveen N. Rao @ 2019-05-14  8:32 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Nicholas Piggin

Michael Ellerman wrote:
> "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
>> Michael Ellerman wrote:
>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>> The new mprofile-kernel mcount sequence is
>>>>
>>>>   mflr	r0
>>>>   bl	_mcount
>>>>
>>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>>> the mflr. mflr is executed by the branch unit that can only execute one
>>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>>> avoid it where possible.
>>>>
>>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>>> this or are there races or other issues with it?
>>> 
>>> There's a race, isn't there?
>>> 
>>> We have a function foo which currently has tracing disabled, so the mflr
>>> and bl are nop'ed out.
>>> 
>>>   CPU 0			CPU 1
>>>   ==================================
>>>   bl foo
>>>   nop (ie. not mflr)
>>>   -> interrupt
>>>   something else	enable tracing for foo
>>>   ...			patch mflr and branch
>>>   <- rfi
>>>   bl _mcount
>>> 
>>> So we end up in _mcount() but with r0 not populated.
>>
>> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
>> to what we do in __ftrace_make_nop().
> 
> Would that actually make it any faster though? Nick?

Ok, how about doing this as a 2-step process?
1. patch 'mflr r0' with a 'b +8'
   synchronize_rcu_tasks()
2. convert 'b +8' to a 'nop'

- Naveen



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-14  8:32       ` Naveen N. Rao
@ 2019-05-15 12:20         ` Michael Ellerman
  2019-05-16  5:49         ` Nicholas Piggin
  1 sibling, 0 replies; 11+ messages in thread
From: Michael Ellerman @ 2019-05-15 12:20 UTC (permalink / raw)
  To: Naveen N. Rao, linuxppc-dev, Nicholas Piggin

"Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
> Michael Ellerman wrote:
>> "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
>>> Michael Ellerman wrote:
>>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>>> The new mprofile-kernel mcount sequence is
>>>>>
>>>>>   mflr	r0
>>>>>   bl	_mcount
>>>>>
>>>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>>>> the mflr. mflr is executed by the branch unit that can only execute one
>>>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>>>> avoid it where possible.
>>>>>
>>>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>>>> this or are there races or other issues with it?
>>>> 
>>>> There's a race, isn't there?
>>>> 
>>>> We have a function foo which currently has tracing disabled, so the mflr
>>>> and bl are nop'ed out.
>>>> 
>>>>   CPU 0			CPU 1
>>>>   ==================================
>>>>   bl foo
>>>>   nop (ie. not mflr)
>>>>   -> interrupt
>>>>   something else	enable tracing for foo
>>>>   ...			patch mflr and branch
>>>>   <- rfi
>>>>   bl _mcount
>>>> 
>>>> So we end up in _mcount() but with r0 not populated.
>>>
>>> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
>>> to what we do in __ftrace_make_nop().
>> 
>> Would that actually make it any faster though? Nick?
>
> Ok, how about doing this as a 2-step process?
> 1. patch 'mflr r0' with a 'b +8'
>    synchronize_rcu_tasks()
> 2. convert 'b +8' to a 'nop'

I think that would work, if I understand synchronize_rcu_tasks().

I worry that it will make the enable/disable expensive. But could be
worth trying.

cheers

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-14  8:32       ` Naveen N. Rao
  2019-05-15 12:20         ` Michael Ellerman
@ 2019-05-16  5:49         ` Nicholas Piggin
  2019-05-16 18:22           ` Naveen N. Rao
  1 sibling, 1 reply; 11+ messages in thread
From: Nicholas Piggin @ 2019-05-16  5:49 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Naveen N. Rao

Naveen N. Rao's on May 14, 2019 6:32 pm:
> Michael Ellerman wrote:
>> "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
>>> Michael Ellerman wrote:
>>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>>> The new mprofile-kernel mcount sequence is
>>>>>
>>>>>   mflr	r0
>>>>>   bl	_mcount
>>>>>
>>>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>>>> the mflr. mflr is executed by the branch unit that can only execute one
>>>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>>>> avoid it where possible.
>>>>>
>>>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>>>> this or are there races or other issues with it?
>>>> 
>>>> There's a race, isn't there?
>>>> 
>>>> We have a function foo which currently has tracing disabled, so the mflr
>>>> and bl are nop'ed out.
>>>> 
>>>>   CPU 0			CPU 1
>>>>   ==================================
>>>>   bl foo
>>>>   nop (ie. not mflr)
>>>>   -> interrupt
>>>>   something else	enable tracing for foo
>>>>   ...			patch mflr and branch
>>>>   <- rfi
>>>>   bl _mcount
>>>> 
>>>> So we end up in _mcount() but with r0 not populated.
>>>
>>> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
>>> to what we do in __ftrace_make_nop().
>> 
>> Would that actually make it any faster though? Nick?
> 
> Ok, how about doing this as a 2-step process?
> 1. patch 'mflr r0' with a 'b +8'
>    synchronize_rcu_tasks()
> 2. convert 'b +8' to a 'nop'

Good idea. Well the mflr r0 is harmless, so you can leave that in.
You just need to ensure it's not removed before the bl is. So nop
the bl _mcount, then synchronize_rcu_tasks(), then nop the mflr?

Thanks,
Nick


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-16  5:49         ` Nicholas Piggin
@ 2019-05-16 18:22           ` Naveen N. Rao
  2019-05-17  9:12             ` Nicholas Piggin
  0 siblings, 1 reply; 11+ messages in thread
From: Naveen N. Rao @ 2019-05-16 18:22 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Nicholas Piggin

Nicholas Piggin wrote:
> Naveen N. Rao's on May 14, 2019 6:32 pm:
>> Michael Ellerman wrote:
>>> "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
>>>> Michael Ellerman wrote:
>>>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>>>> The new mprofile-kernel mcount sequence is
>>>>>>
>>>>>>   mflr	r0
>>>>>>   bl	_mcount
>>>>>>
>>>>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>>>>> the mflr. mflr is executed by the branch unit that can only execute one
>>>>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>>>>> avoid it where possible.
>>>>>>
>>>>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>>>>> this or are there races or other issues with it?
>>>>> 
>>>>> There's a race, isn't there?
>>>>> 
>>>>> We have a function foo which currently has tracing disabled, so the mflr
>>>>> and bl are nop'ed out.
>>>>> 
>>>>>   CPU 0			CPU 1
>>>>>   ==================================
>>>>>   bl foo
>>>>>   nop (ie. not mflr)
>>>>>   -> interrupt
>>>>>   something else	enable tracing for foo
>>>>>   ...			patch mflr and branch
>>>>>   <- rfi
>>>>>   bl _mcount
>>>>> 
>>>>> So we end up in _mcount() but with r0 not populated.
>>>>
>>>> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
>>>> to what we do in __ftrace_make_nop().
>>> 
>>> Would that actually make it any faster though? Nick?
>> 
>> Ok, how about doing this as a 2-step process?
>> 1. patch 'mflr r0' with a 'b +8'
>>    synchronize_rcu_tasks()
>> 2. convert 'b +8' to a 'nop'
> 
> Good idea. Well the mflr r0 is harmless, so you can leave that in.
> You just need to ensure it's not removed before the bl is. So nop
> the bl _mcount, then synchronize_rcu_tasks(), then nop the mflr?

The problem actually seems to be when we try to patch in the branch to 
_mcount(), rather than when we are patching in the nop instructions 
(i.e., the race is when we try to enable the function tracer, rather 
than while disabling it).

When we disable ftrace, we only need to ensure we patch out the branch 
to _mcount() before patching out the preceding 'mflr r0'. I don't think 
we need a synchronize_rcu_tasks() in that case.

While enabling ftrace, we will first need to patch the preceding 'mflr 
r0' (which would now be a 'nop') with 'b +8', then use 
synchronize_rcu_tasks() and finally patch in 'bl _mcount()' followed by 
'mflr r0'.

I think that's what you meant, just that my reference to patching 'mflr 
r0' with a 'b +8' should have called out that the mflr would have been 
nop'ed out.

- Naveen


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-16 18:22           ` Naveen N. Rao
@ 2019-05-17  9:12             ` Nicholas Piggin
  2019-05-17 17:25               ` Naveen N. Rao
  0 siblings, 1 reply; 11+ messages in thread
From: Nicholas Piggin @ 2019-05-17  9:12 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Naveen N. Rao

Naveen N. Rao's on May 17, 2019 4:22 am:
> Nicholas Piggin wrote:
>> Naveen N. Rao's on May 14, 2019 6:32 pm:
>>> Michael Ellerman wrote:
>>>> "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
>>>>> Michael Ellerman wrote:
>>>>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>>>>> The new mprofile-kernel mcount sequence is
>>>>>>>
>>>>>>>   mflr	r0
>>>>>>>   bl	_mcount
>>>>>>>
>>>>>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>>>>>> the mflr. mflr is executed by the branch unit that can only execute one
>>>>>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>>>>>> avoid it where possible.
>>>>>>>
>>>>>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>>>>>> this or are there races or other issues with it?
>>>>>> 
>>>>>> There's a race, isn't there?
>>>>>> 
>>>>>> We have a function foo which currently has tracing disabled, so the mflr
>>>>>> and bl are nop'ed out.
>>>>>> 
>>>>>>   CPU 0			CPU 1
>>>>>>   ==================================
>>>>>>   bl foo
>>>>>>   nop (ie. not mflr)
>>>>>>   -> interrupt
>>>>>>   something else	enable tracing for foo
>>>>>>   ...			patch mflr and branch
>>>>>>   <- rfi
>>>>>>   bl _mcount
>>>>>> 
>>>>>> So we end up in _mcount() but with r0 not populated.
>>>>>
>>>>> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
>>>>> to what we do in __ftrace_make_nop().
>>>> 
>>>> Would that actually make it any faster though? Nick?
>>> 
>>> Ok, how about doing this as a 2-step process?
>>> 1. patch 'mflr r0' with a 'b +8'
>>>    synchronize_rcu_tasks()
>>> 2. convert 'b +8' to a 'nop'
>> 
>> Good idea. Well the mflr r0 is harmless, so you can leave that in.
>> You just need to ensure it's not removed before the bl is. So nop
>> the bl _mcount, then synchronize_rcu_tasks(), then nop the mflr?
> 
> The problem actually seems to be when we try to patch in the branch to 
> _mcount(), rather than when we are patching in the nop instructions 
> (i.e., the race is when we try to enable the function tracer, rather 
> than while disabling it).
> 
> When we disable ftrace, we only need to ensure we patch out the branch 
> to _mcount() before patching out the preceding 'mflr r0'. I don't think 
> we need a synchronize_rcu_tasks() in that case.

That's probably right.

> While enabling ftrace, we will first need to patch the preceding 'mflr 
> r0' (which would now be a 'nop') with 'b +8', then use 
> synchronize_rcu_tasks() and finally patch in 'bl _mcount()' followed by 
> 'mflr r0'.
> 
> I think that's what you meant, just that my reference to patching 'mflr 
> r0' with a 'b +8' should have called out that the mflr would have been 
> nop'ed out.

I meant that we don't need the b +8 anywhere, because the mflr r0
is harmless. Enabling ftrace just needs to patch in 'mflr r0', and
then synchronize_rcu_tasks(), and then patch in 'bl _mcount'. I think?

Thanks,
Nick


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
  2019-05-17  9:12             ` Nicholas Piggin
@ 2019-05-17 17:25               ` Naveen N. Rao
  0 siblings, 0 replies; 11+ messages in thread
From: Naveen N. Rao @ 2019-05-17 17:25 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Nicholas Piggin

Nicholas Piggin wrote:
> Naveen N. Rao's on May 17, 2019 4:22 am:
> 
>> While enabling ftrace, we will first need to patch the preceding 'mflr 
>> r0' (which would now be a 'nop') with 'b +8', then use 
>> synchronize_rcu_tasks() and finally patch in 'bl _mcount()' followed by 
>> 'mflr r0'.
>> 
>> I think that's what you meant, just that my reference to patching 'mflr 
>> r0' with a 'b +8' should have called out that the mflr would have been 
>> nop'ed out.
> 
> I meant that we don't need the b +8 anywhere, because the mflr r0
> is harmless. Enabling ftrace just needs to patch in 'mflr r0', and
> then synchronize_rcu_tasks(), and then patch in 'bl _mcount'. I think?

Ah, that's a good point! That should be enough and it simplifies this 
further.


Thanks,
Naveen



^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-05-17 17:39 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-13  1:59 [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr Nicholas Piggin
2019-04-15  9:42 ` Naveen N. Rao
2019-05-13  3:25 ` Michael Ellerman
2019-05-13  6:47   ` Naveen N. Rao
2019-05-13 11:34     ` Michael Ellerman
2019-05-14  8:32       ` Naveen N. Rao
2019-05-15 12:20         ` Michael Ellerman
2019-05-16  5:49         ` Nicholas Piggin
2019-05-16 18:22           ` Naveen N. Rao
2019-05-17  9:12             ` Nicholas Piggin
2019-05-17 17:25               ` Naveen N. Rao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.