All of lore.kernel.org
 help / color / mirror / Atom feed
From: Masami Hiramatsu <mhiramat@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	x86@kernel.org, linux-kernel@vger.kernel.org, bristot@redhat.com,
	jbaron@akamai.com, torvalds@linux-foundation.org,
	tglx@linutronix.de, namit@vmware.com, hpa@zytor.com,
	luto@kernel.org, ard.biesheuvel@linaro.org, jpoimboe@redhat.com,
	jeyu@kernel.org, alexei.starovoitov@gmail.com
Subject: Re: [PATCH -tip 1/2] x86/alternative: Sync bp_patching update for avoiding NULL pointer exception
Date: Wed, 11 Dec 2019 01:44:01 +0900	[thread overview]
Message-ID: <20191211014401.2f0c27f259a83d1f32aa6f2e@kernel.org> (raw)
In-Reply-To: <20191209143940.GI2810@hirez.programming.kicks-ass.net>

Hi Peter,

On Mon, 9 Dec 2019 15:39:40 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Nov 27, 2019 at 02:56:52PM +0900, Masami Hiramatsu wrote:
> 
> > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> > index 4552795a8df4..9505096e2cd1 100644
> > --- a/arch/x86/kernel/alternative.c
> > +++ b/arch/x86/kernel/alternative.c
> > @@ -1134,8 +1134,14 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
> >  	 * sync_core() implies an smp_mb() and orders this store against
> >  	 * the writing of the new instruction.
> >  	 */
> > -	bp_patching.vec = NULL;
> >  	bp_patching.nr_entries = 0;
> > +	/*
> > +	 * This sync_core () ensures that all int3 handlers in progress
> > +	 * have finished. This allows poke_int3_handler () after this to
> > +	 * avoid touching bp_paching.vec by checking nr_entries == 0.
> > +	 */
> > +	text_poke_sync();
> > +	bp_patching.vec = NULL;
> >  }
> 
> How's something like this instead? Under the assumption that it is rare
> to actually hit the INT3 and even more rare to actually hit this race,
> the below should be a lot cheaper.

Ah, this reminds me of my atomic-refcounter method for kpatch idea
and module unloading.

This looks good, but I feel it is a bit complicated.

If we use atomic (and spin-wait) here, can we use atomic_inc_not_zero()
in the poke_int3_handler() at first for making sure the bp_batching is
under operation or not?
I think it makes things simpler, like below.

---------
atomic_t bp_refcnt;

poke_int3_handler()
{
	smp_rmb();
	if (!READ_ONCE(bp_patching.nr_entries))
		return 0;
	if (!atomic_inc_not_zero(&bp_refcnt))
		return 0;
	smp_mb__after_atomic();
	[use bp_patching]
	atomic_dec(&bp_refcnt);
}

text_poke_bp_batch()
{
	bp_patching.vec = tp;
	bp_patching.nr_entries = nr_entries;
	smp_wmb();
	atomic_inc(&bp_refcnt);
	...
	atomic_dec(&bp_refcnt);
	/* wait for all running poke_int3_handler(). */
	atomic_cond_read_acquire(&bp_refcnt, !VAL);
	bp_patching.vec = NULL;
	bp_patching.nr_entries = 0;
}
---------

Thank you,


> 
> ---
>  arch/x86/kernel/alternative.c | 69 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 53 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 30e86730655c..12f2d193109d 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -953,6 +953,8 @@ static struct bp_patching_desc {
>  	int nr_entries;
>  } bp_patching;
>  
> +static atomic_t bp_handlers;
> +
>  static inline void *text_poke_addr(struct text_poke_loc *tp)
>  {
>  	return _stext + tp->rel_addr;
> @@ -973,8 +975,8 @@ NOKPROBE_SYMBOL(patch_cmp);
>  int notrace poke_int3_handler(struct pt_regs *regs)
>  {
>  	struct text_poke_loc *tp;
> +	int nr, len, ret = 0;
>  	void *ip;
> -	int len;
>  
>  	/*
>  	 * Having observed our INT3 instruction, we now must observe
> @@ -987,12 +989,21 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  	 * Idem for other elements in bp_patching.
>  	 */
>  	smp_rmb();
> -
> -	if (likely(!bp_patching.nr_entries))
> +	if (!READ_ONCE(bp_patching.nr_entries))
>  		return 0;
>  
> +	atomic_inc(&bp_handlers);
> +	/*
> +	 * 'ACQUIRE', everything happens after the increment.
> +	 */
> +	smp_mb__after_atomic();
> +
> +	nr = smp_load_acquire(&bp_patching.nr_entries);
> +	if (likely(!nr))
> +		goto out;
> +
>  	if (user_mode(regs))
> -		return 0;
> +		goto out;
>  
>  	/*
>  	 * Discount the INT3. See text_poke_bp_batch().
> @@ -1002,16 +1013,16 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  	/*
>  	 * Skip the binary search if there is a single member in the vector.
>  	 */
> -	if (unlikely(bp_patching.nr_entries > 1)) {
> -		tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
> +	if (unlikely(nr > 1)) {
> +		tp = bsearch(ip, bp_patching.vec, nr,
>  			     sizeof(struct text_poke_loc),
>  			     patch_cmp);
>  		if (!tp)
> -			return 0;
> +			goto out;
>  	} else {
>  		tp = bp_patching.vec;
>  		if (text_poke_addr(tp) != ip)
> -			return 0;
> +			goto out;
>  	}
>  
>  	len = text_opcode_size(tp->opcode);
> @@ -1023,7 +1034,7 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  		 * Someone poked an explicit INT3, they'll want to handle it,
>  		 * do not consume.
>  		 */
> -		return 0;
> +		goto out;
>  
>  	case CALL_INSN_OPCODE:
>  		int3_emulate_call(regs, (long)ip + tp->rel32);
> @@ -1038,7 +1049,14 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  		BUG();
>  	}
>  
> -	return 1;
> +	ret = 1;
> +out:
> +	/*
> +	 * 'RELEASE", everything happens before the decrement.
> +	 */
> +	smp_mb__before_atomic();
> +	atomic_dec(&bp_handlers);
> +	return ret;
>  }
>  NOKPROBE_SYMBOL(poke_int3_handler);
>  
> @@ -1076,7 +1094,12 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  	lockdep_assert_held(&text_mutex);
>  
>  	bp_patching.vec = tp;
> -	bp_patching.nr_entries = nr_entries;
> +	/*
> +	 * bp_patching.vec = tp			nr = bp_patching.nr_entries
> +	 * REL					ACQ
> +	 * bp_patching.nr_entries = nr_entries	tp = bp_patching.vec[]
> +	 */
> +	smp_store_release(&bp_patching.nr_entries, nr_entries);
>  
>  	/*
>  	 * Corresponding read barrier in int3 notifier for making sure the
> @@ -1134,13 +1157,27 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  	 * sync_core() implies an smp_mb() and orders this store against
>  	 * the writing of the new instruction.
>  	 */
> -	bp_patching.nr_entries = 0;
> +	WRITE_ONCE(bp_patching.nr_entries, 0);
>  	/*
> -	 * This sync_core () call ensures that all INT3 handlers in progress
> -	 * have finished. This allows poke_int3_handler() after this to
> -	 * avoid touching bp_paching.vec by checking nr_entries == 0.
> +	 * nr_entries = 0	bp_handlers++
> +	 * MB			MB
> +	 * VAL = bp_handlers	nr = nr_entries
> +	 */
> +	smp_mb();
> +	/*
> +	 * Guarantee all poke_int3_handler()s that have observed
> +	 * @bp_patching.nr_enties have completed before we clear
> +	 * bp_patching.vec.
> +	 *
> +	 * We can't do this before text_poke_sync() because then there
> +	 * might still be observable INT3 instructions.
> +	 */
> +	atomic_cond_read_acquire(&bp_handlers, !VAL);
> +	/*
> +	 * bp_handlers == 0		tp = bp_patching.vec[]
> +	 * ACQ				MB
> +	 * bp_patching.vec = NULL	bp_handlers--;
>  	 */
> -	text_poke_sync();
>  	bp_patching.vec = NULL;
>  }
>  


-- 
Masami Hiramatsu <mhiramat@kernel.org>

  reply	other threads:[~2019-12-10 16:44 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27  5:56 [PATCH -tip 0/2] x86/kprobes: Fix 2 issues related to text_poke_bp and optprobe Masami Hiramatsu
2019-11-27  5:56 ` [PATCH -tip 1/2] x86/alternative: Sync bp_patching update for avoiding NULL pointer exception Masami Hiramatsu
2019-12-02  9:15   ` Peter Zijlstra
2019-12-02 11:50     ` Masami Hiramatsu
2019-12-02 13:43       ` Peter Zijlstra
2019-12-02 14:39         ` Masami Hiramatsu
2019-12-04  8:33   ` [tip: core/kprobes] x86/alternatives: " tip-bot2 for Masami Hiramatsu
2019-12-09 14:39   ` [PATCH -tip 1/2] x86/alternative: " Peter Zijlstra
2019-12-10 16:44     ` Masami Hiramatsu [this message]
2019-12-10 17:32       ` Peter Zijlstra
2019-12-11  0:09         ` Peter Zijlstra
2019-12-11  8:09           ` Masami Hiramatsu
2019-12-11  9:12             ` Daniel Bristot de Oliveira
2019-11-27  5:57 ` [PATCH -tip 2/2] kprobes: Set unoptimized flag after unoptimizing code Masami Hiramatsu
2019-11-27  6:19   ` Alexei Starovoitov
2019-11-27  6:49     ` Ingo Molnar
2019-12-02 21:55       ` Alexei Starovoitov
2019-11-27  6:56     ` Masami Hiramatsu
2019-12-04  8:33   ` [tip: core/kprobes] " tip-bot2 for Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191211014401.2f0c27f259a83d1f32aa6f2e@kernel.org \
    --to=mhiramat@kernel.org \
    --cc=alexei.starovoitov@gmail.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bristot@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jbaron@akamai.com \
    --cc=jeyu@kernel.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namit@vmware.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.