Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous

From: Borislav Petkov <bp@alien8.de>
To: Andy Lutomirski <luto@kernel.org>
Cc: "security@kernel.org" <security@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	X86 ML <x86@kernel.org>,
	linux-kernel@vger.kernel.org,
	Steven Rostedt <rostedt@goodmis.org>,
	xen-devel <xen-devel@lists.xen.org>,
	stable@vger.kernel.org, Jan Beulich <jbeulich@suse.com>,
	Sasha Levin <sasha.levin@oracle.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
Date: Fri, 24 Jul 2015 17:29:55 +0200	[thread overview]
Message-ID: <20150724152955.GC21441__38443.5661406771$1437751890$gmane$org@nazgul.tnic> (raw)
In-Reply-To: <049fdbab8ae2ecac1c8b40ecd558e9df45ccd5d3.1437592883.git.luto@kernel.org>

On Wed, Jul 22, 2015 at 12:23:46PM -0700, Andy Lutomirski wrote:
> modify_ldt has questionable locking and does not synchronize
> threads.  Improve it: redesign the locking and synchronize all
> threads' LDTs using an IPI on all modifications.
> 
> This will dramatically slow down modify_ldt in multithreaded
> programs, but there shouldn't be any multithreaded programs that
> care about modify_ldt's performance in the first place.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Just minor stylistic nitpicks below. Otherwise looks ok to me.

...

> diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
> index c37886d759cc..3ae308029dee 100644
> --- a/arch/x86/kernel/ldt.c
> +++ b/arch/x86/kernel/ldt.c
> @@ -12,6 +12,7 @@
>  #include <linux/string.h>
>  #include <linux/mm.h>
>  #include <linux/smp.h>
> +#include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/uaccess.h>
>  
> @@ -20,82 +21,83 @@
>  #include <asm/mmu_context.h>
>  #include <asm/syscalls.h>
>  
> -#ifdef CONFIG_SMP
>  static void flush_ldt(void *current_mm)
>  {
> -	if (current->active_mm == current_mm)
> -		load_LDT(&current->active_mm->context);
> +	if (current->active_mm == current_mm) {

Save indentation level:

	if (current->active_mm != current_mm)
		return;

> +		/* context.lock is held for us, so we don't need any locking. */

Stick that comment above the function name.

> +		mm_context_t *pc = &current->active_mm->context;
> +		set_ldt(pc->ldt->entries, pc->ldt->size);
> +	}
>  }
> -#endif
>  
> -static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
> +/* The caller must call finalize_ldt_struct on the result. */
> +static struct ldt_struct *alloc_ldt_struct(int size)
>  {
> -	void *oldldt, *newldt;
> -	int oldsize;
> +	struct ldt_struct *new_ldt;
> +	int alloc_size;
>  
> -	if (mincount <= pc->size)
> -		return 0;
> -	oldsize = pc->size;
> -	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
> -			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
> -	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
> -		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
> +	if (size > LDT_ENTRIES)
> +		return NULL;
> +
> +	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
> +	if (!new_ldt)
> +		return NULL;
> +
> +	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
> +	alloc_size = size * LDT_ENTRY_SIZE;
> +
> +	if (alloc_size > PAGE_SIZE)
> +		new_ldt->entries = vmalloc(alloc_size);
>  	else
> -		newldt = (void *)__get_free_page(GFP_KERNEL);
> +		new_ldt->entries = (void *)__get_free_page(GFP_KERNEL);

newline here.

> +	if (!new_ldt->entries) {
> +		kfree(new_ldt);
> +		return NULL;
> +	}
>  
> -	if (!newldt)
> -		return -ENOMEM;
> +	new_ldt->size = size;
> +	return new_ldt;
> +}
> +
> +/* After calling this, the LDT is immutable. */
> +static void finalize_ldt_struct(struct ldt_struct *ldt)
> +{
> +	paravirt_alloc_ldt(ldt->entries, ldt->size);
> +}
>  
> -	if (oldsize)
> -		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
> -	oldldt = pc->ldt;
> -	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
> -	       (mincount - oldsize) * LDT_ENTRY_SIZE);
> +static void install_ldt(struct mm_struct *current_mm,
> +			struct ldt_struct *ldt)
> +{
> +	/* context.lock is held */
> +	preempt_disable();
>  
> -	paravirt_alloc_ldt(newldt, mincount);
> +	/* Synchronizes with lockless_dereference in load_mm_ldt. */

Good.

> +	smp_store_release(&current_mm->context.ldt, ldt);
>  
> -#ifdef CONFIG_X86_64
> -	/* CHECKME: Do we really need this ? */
> -	wmb();
> -#endif
> -	pc->ldt = newldt;
> -	wmb();
> -	pc->size = mincount;
> -	wmb();
> +	/* Activate for this CPU. */
> +	flush_ldt(current->mm);
>  
> -	if (reload) {
>  #ifdef CONFIG_SMP
> -		preempt_disable();
> -		load_LDT(pc);
> -		if (!cpumask_equal(mm_cpumask(current->mm),
> -				   cpumask_of(smp_processor_id())))
> -			smp_call_function(flush_ldt, current->mm, 1);
> -		preempt_enable();
> -#else
> -		load_LDT(pc);
> +	/* Synchronize with other CPUs. */
> +	if (!cpumask_equal(mm_cpumask(current_mm),
> +			   cpumask_of(smp_processor_id())))

Let it stick out:

	if (!cpumask_equal(mm_cpumask(current_mm), cpumask_of(smp_processor_id())))
		smp_call_function(flush_ldt, current_mm, 1);
>  #endif
> -	}
> -	if (oldsize) {
> -		paravirt_free_ldt(oldldt, oldsize);
> -		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
> -			vfree(oldldt);
> -		else
> -			put_page(virt_to_page(oldldt));
> -	}
> -	return 0;
> +	preempt_enable();
>  }
>  
> -static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
> +static void free_ldt_struct(struct ldt_struct *ldt)
>  {
> -	int err = alloc_ldt(new, old->size, 0);
> -	int i;
> -
> -	if (err < 0)
> -		return err;
> -
> -	for (i = 0; i < old->size; i++)
> -		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
> -	return 0;
> +	if (unlikely(ldt)) {

Save an indentation level:

	int alloc_size;

	if (!ldt)
		return;

	alloc_size = sizeof(struct ldt_struct) + ldt->size * LDT_ENTRY_SIZE;

	...

> +		paravirt_free_ldt(ldt->entries, ldt->size);
> +		if (alloc_size > PAGE_SIZE)
> +			vfree(ldt->entries);
> +		else
> +			put_page(virt_to_page(ldt->entries));
> +		kfree(ldt);
> +	}
>  }
>  
>  /*
> @@ -108,13 +110,32 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>  	int retval = 0;
>  
>  	mutex_init(&mm->context.lock);
> -	mm->context.size = 0;
>  	old_mm = current->mm;
> -	if (old_mm && old_mm->context.size > 0) {
> -		mutex_lock(&old_mm->context.lock);
> -		retval = copy_ldt(&mm->context, &old_mm->context);
> -		mutex_unlock(&old_mm->context.lock);
> +	if (!old_mm) {
> +		mm->context.ldt = NULL;
> +		return 0;
> +	}
> +
> +	mutex_lock(&old_mm->context.lock);
> +	if (old_mm->context.ldt) {

Same here:

	if (!old_mm->context.ldt) {
		mm->context.ldt = NULL;
		goto out_unlock;
	}

	new_ldt = ...

> +		struct ldt_struct *new_ldt =
> +			alloc_ldt_struct(old_mm->context.ldt->size);
> +		if (!new_ldt) {
> +			retval = -ENOMEM;
> +			goto out_unlock;
> +		}
> +
> +		memcpy(new_ldt->entries, old_mm->context.ldt->entries,
> +		       new_ldt->size * LDT_ENTRY_SIZE);
> +		finalize_ldt_struct(new_ldt);
> +
> +		mm->context.ldt = new_ldt;
> +	} else {
> +		mm->context.ldt = NULL;
>  	}
> +
> +out_unlock:
> +	mutex_unlock(&old_mm->context.lock);
>  	return retval;
>  }
>  
> @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>   */
>  void destroy_context(struct mm_struct *mm)
>  {
> -	if (mm->context.size) {
> -#ifdef CONFIG_X86_32
> -		/* CHECKME: Can this ever happen ? */
> -		if (mm == current->active_mm)
> -			clear_LDT();
> -#endif
> -		paravirt_free_ldt(mm->context.ldt, mm->context.size);
> -		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
> -			vfree(mm->context.ldt);
> -		else
> -			put_page(virt_to_page(mm->context.ldt));
> -		mm->context.size = 0;
> -	}
> +	free_ldt_struct(mm->context.ldt);
> +	mm->context.ldt = NULL;
>  }
>  
>  static int read_ldt(void __user *ptr, unsigned long bytecount)
>  {
> -	int err;
> +	int retval;
>  	unsigned long size;
>  	struct mm_struct *mm = current->mm;
>  
> -	if (!mm->context.size)
> -		return 0;
> +	mutex_lock(&mm->context.lock);
> +
> +	if (!mm->context.ldt) {
> +		retval = 0;
> +		goto out_unlock;
> +	}
> +
>  	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
>  		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
>  
> -	mutex_lock(&mm->context.lock);
> -	size = mm->context.size * LDT_ENTRY_SIZE;
> +	size = mm->context.ldt->size * LDT_ENTRY_SIZE;
>  	if (size > bytecount)
>  		size = bytecount;
>  
> -	err = 0;
> -	if (copy_to_user(ptr, mm->context.ldt, size))
> -		err = -EFAULT;
> -	mutex_unlock(&mm->context.lock);
> -	if (err < 0)
> -		goto error_return;
> +	if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
> +		retval = -EFAULT;
> +		goto out_unlock;
> +	}
> +
>  	if (size != bytecount) {
> -		/* zero-fill the rest */
> +		/* Zero-fill the rest and pretend we read bytecount bytes. */
>  		if (clear_user(ptr + size, bytecount - size) != 0) {

Make that:

		if (clear_user(ptr + size, bytecount - size))

> -			err = -EFAULT;
> -			goto error_return;
> +			retval = -EFAULT;
> +			goto out_unlock;
>  		}
>  	}
> -	return bytecount;
> -error_return:
> -	return err;
> +	retval = bytecount;
> +
> +out_unlock:
> +	mutex_unlock(&mm->context.lock);
> +	return retval;
>  }
>  
>  static int read_default_ldt(void __user *ptr, unsigned long bytecount)
> @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
>  	struct desc_struct ldt;
>  	int error;
>  	struct user_desc ldt_info;
> +	int oldsize, newsize;
> +	struct ldt_struct *new_ldt, *old_ldt;
>  
>  	error = -EINVAL;
>  	if (bytecount != sizeof(ldt_info))
> @@ -213,34 +230,43 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
>  			goto out;
>  	}
>  
> -	mutex_lock(&mm->context.lock);
> -	if (ldt_info.entry_number >= mm->context.size) {
> -		error = alloc_ldt(&current->mm->context,
> -				  ldt_info.entry_number + 1, 1);
> -		if (error < 0)
> -			goto out_unlock;
> -	}
> -
> -	/* Allow LDTs to be cleared by the user. */
> -	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
> -		if (oldmode || LDT_empty(&ldt_info)) {
> -			memset(&ldt, 0, sizeof(ldt));
> -			goto install;
> +	if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) ||

Shorten:

	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||

> +	    LDT_empty(&ldt_info)) {
> +		/* The user wants to clear the entry. */
> +		memset(&ldt, 0, sizeof(ldt));
> +	} else {
> +		if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> +			error = -EINVAL;
> +			goto out;
>  		}
> +
> +		fill_ldt(&ldt, &ldt_info);
> +		if (oldmode)
> +			ldt.avl = 0;
>  	}
>  
> -	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> -		error = -EINVAL;
> +	mutex_lock(&mm->context.lock);
> +
> +	old_ldt = mm->context.ldt;
> +	oldsize = old_ldt ? old_ldt->size : 0;
> +	newsize = max((int)(ldt_info.entry_number + 1), oldsize);
> +
> +	error = -ENOMEM;
> +	new_ldt = alloc_ldt_struct(newsize);
> +	if (!new_ldt)
>  		goto out_unlock;
> -	}
>  
> -	fill_ldt(&ldt, &ldt_info);
> -	if (oldmode)
> -		ldt.avl = 0;
> +	if (old_ldt) {
> +		memcpy(new_ldt->entries, old_ldt->entries,
> +		       oldsize * LDT_ENTRY_SIZE);
> +	}

Single if-statement doesn't need {} and you don't absolutely need to
keep 80cols. Just let it stick out.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--