All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiang Liu <jiang.liu@linux.intel.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	Joe Lawrence <joe.lawrence@stratus.com>
Cc: LKML <linux-kernel@vger.kernel.org>, x86@kernel.org
Subject: Re: irq_desc use-after-free in smp_irq_move_cleanup_interrupt
Date: Fri, 27 Nov 2015 16:06:42 +0800	[thread overview]
Message-ID: <56580F12.8070607@linux.intel.com> (raw)
In-Reply-To: <alpine.DEB.2.11.1511252112010.12555@nanos>



On 2015/11/26 5:12, Thomas Gleixner wrote:
> On Wed, 25 Nov 2015, Thomas Gleixner wrote:
>> So if CPU1 gets the IPI _BEFORE_ move_in_progress is set to 0, and
>> does not get another IPI before the next move ..... That has been that
>> way forever.
>>
>> Duh. Working on a real fix this time.
> 
> Here you go. Completely untested of course.
> 
> Larger than I hoped for, but the simple fix of just clearing the
> move_in_progress flag before sending the IPI does not work because:
> 
> CPU0	    	     	      CPU1			CPU2
> data->move_in_progress=0
> sendIPI()			
> 			      set_affinity()
> 			      lock_vector()		handle_IPI
> 			      move_in_progress = 1	lock_vector()
> 			      unlock_vector()
> 							move_in_progress == 1
> 							-> no cleanup
> 
> So we are back to square one. Now one might think that taking vector
> lock prevents that issue:
> 
> CPU0	    	     	      CPU1			CPU2
> lock_vector()
> data->move_in_progress=0
> sendIPI()			
> unlock_vector()
> 			      set_affinity()
> 			      assign_irq_vector()
> 			      lock_vector()		handle_IPI
> 			      move_in_progress = 1	lock_vector()
> 			      unlock_vector()
> 							move_in_progress == 1
> Not really. 
> 
> So now the solution is:
> 
> CPU0	    	     	      CPU1			CPU2
> lock_vector()
> data->move_in_progress=0
> data->cleanup_mask = data->old_domain
> sendIPI()			
> unlock_vector()
> 			      set_affinity()
> 			      assign_irq_vector()
> 			      lock_vector()		
> 			      if (move_in_progress ||
> 			      	  !empty(cleanup_mask)) {
> 				 unlock_vector()
> 			      	 return -EBUSY;		handle_IPI
> 			      }	 			lock_vector()
> 							move_in_progress == 0
> 							cpu is set in cleanup mask
> 							->cleanup vector
> 
> Looks a bit overkill with the extra cpumask. I tried a simple counter
> but that does not work versus cpu unplug as we do not know whether the
> outgoing cpu is involved in the cleanup or not. And if the cpu is
> involved we starve assign_irq_vector() ....
> 
> The upside of this is that we get rid of that atomic allocation in
> __send_cleanup_vector().
Hi Thomas,
	Maybe more headache for you now:)
	It seems there are still rooms for improvements. First it
seems we could just reuse old_domain instead of adding cleanup_mask.
Second I found another race window among x86_vector_free_irqs(),
__send_cleanup_vector() and smp_irq_move_cleanup_interrupt().
I'm trying to refine your patch based following rules:
1) move_in_progress controls whether we need to send IPIs
2) old_domain controls which CPUs we should do clean up
3) assign_irq_vector checks both move_in_progress and old_domain.
Will send out the patch soon for comments:)
Thanks,
Gerry			

> 
> Brain hurts by now. 
> 
> Not-Yet-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/x86/kernel/apic/vector.c |   37 ++++++++++++++++---------------------
>  1 file changed, 16 insertions(+), 21 deletions(-)
> 
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -25,6 +25,7 @@ struct apic_chip_data {
>  	struct irq_cfg		cfg;
>  	cpumask_var_t		domain;
>  	cpumask_var_t		old_domain;
> +	cpumask_var_t		cleanup_mask;
>  	u8			move_in_progress : 1;
>  };
>  
> @@ -83,7 +84,11 @@ static struct apic_chip_data *alloc_apic
>  		goto out_data;
>  	if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
>  		goto out_domain;
> +	if (!zalloc_cpumask_var_node(&data->cleanup_mask, GFP_KERNEL, node))
> +		goto out_old;
>  	return data;
> +out_old:
> +	free_cpumask_var(data->old_domain);
>  out_domain:
>  	free_cpumask_var(data->domain);
>  out_data:
> @@ -96,6 +101,7 @@ static void free_apic_chip_data(struct a
>  	if (data) {
>  		free_cpumask_var(data->domain);
>  		free_cpumask_var(data->old_domain);
> +		free_cpumask_var(data->cleanup_mask);
>  		kfree(data);
>  	}
>  }
> @@ -118,7 +124,7 @@ static int __assign_irq_vector(int irq,
>  	static int current_offset = VECTOR_OFFSET_START % 16;
>  	int cpu, err;
>  
> -	if (d->move_in_progress)
> +	if (d->move_in_progress || !cpumask_empty(d->cleanup_mask))
>  		return -EBUSY;
>  
>  	/* Only try and allocate irqs on cpus that are present */
> @@ -511,20 +517,11 @@ static struct irq_chip lapic_controller
>  #ifdef CONFIG_SMP
>  static void __send_cleanup_vector(struct apic_chip_data *data)
>  {
> -	cpumask_var_t cleanup_mask;
> -
> -	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
> -		unsigned int i;
> -
> -		for_each_cpu_and(i, data->old_domain, cpu_online_mask)
> -			apic->send_IPI_mask(cpumask_of(i),
> -					    IRQ_MOVE_CLEANUP_VECTOR);
> -	} else {
> -		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
> -		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
> -		free_cpumask_var(cleanup_mask);
> -	}
> +	raw_spin_lock(&vector_lock);
> +	cpumask_and(data->cleanup_mask, data->old_domain, cpu_online_mask);
>  	data->move_in_progress = 0;
> +	apic->send_IPI_mask(data->cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
> +	raw_spin_unlock(&vector_lock);
>  }
>  
>  void send_cleanup_vector(struct irq_cfg *cfg)
> @@ -568,14 +565,11 @@ asmlinkage __visible void smp_irq_move_c
>  			goto unlock;
>  
>  		/*
> -		 * Check if the irq migration is in progress. If so, we
> -		 * haven't received the cleanup request yet for this irq.
> +		 * Nothing to cleanup if irq migration is in progress
> +		 * or this cpu is not set in the cleanup mask.
>  		 */
> -		if (data->move_in_progress)
> -			goto unlock;
> -
> -		if (vector == data->cfg.vector &&
> -		    cpumask_test_cpu(me, data->domain))
> +		if (data->move_in_progress ||
> +		    !cpumask_test_cpu(me, data->cleanup_mask))
>  			goto unlock;
>  
>  		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
> @@ -591,6 +585,7 @@ asmlinkage __visible void smp_irq_move_c
>  			goto unlock;
>  		}
>  		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
> +		cpumask_clear_cpu(me, data->cleanup_mask);
>  unlock:
>  		raw_spin_unlock(&desc->lock);
>  	}
> 

  parent reply	other threads:[~2015-11-27  8:07 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-24  0:59 irq_desc use-after-free in smp_irq_move_cleanup_interrupt Joe Lawrence
2015-11-25 18:16 ` Thomas Gleixner
2015-11-25 19:31   ` Thomas Gleixner
2015-11-25 21:12     ` Thomas Gleixner
2015-11-25 22:02       ` Joe Lawrence
2015-11-27  8:06       ` Jiang Liu [this message]
2015-11-27  8:25         ` Thomas Gleixner
2015-11-30  8:09           ` [Bugfix 1/5] x86/irq: Do not reuse struct apic_chip_data.old_domain as temporary buffer Jiang Liu
2015-11-30  8:09             ` [Bugfix 2/5] x86/irq: Enhance __assign_irq_vector() to rollback in case of failure Jiang Liu
2015-12-10 18:39               ` [tip:x86/urgent] " tip-bot for Jiang Liu
2015-11-30  8:09             ` [Bugfix 3/5] x86/irq: Fix a race window in x86_vector_free_irqs() Jiang Liu
2015-12-10 18:40               ` [tip:x86/urgent] " tip-bot for Jiang Liu
2015-11-30  8:09             ` [Bugfix 4/5] x86/irq: Fix a race condition between vector assigning and cleanup Jiang Liu
2015-12-01 22:46               ` Joe Lawrence
2015-12-08  0:29                 ` Joe Lawrence
2015-12-08 21:31                   ` Thomas Gleixner
2015-12-10 18:40               ` [tip:x86/urgent] " tip-bot for Jiang Liu
2015-11-30  8:09             ` [Bugfix 5/5] x86/irq: Trivial cleanups for x86 vector allocation code Jiang Liu
2015-12-10 18:42               ` [tip:x86/apic] " tip-bot for Jiang Liu
2015-12-10 18:39             ` [tip:x86/urgent] x86/irq: Do not reuse struct apic_chip_data.old_domain as temporary buffer tip-bot for Jiang Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56580F12.8070607@linux.intel.com \
    --to=jiang.liu@linux.intel.com \
    --cc=joe.lawrence@stratus.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.