linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andy Lutomirski <luto@kernel.org>
To: Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Stephen Hemminger <stephen@networkplumber.org>,
	Willy Tarreau <w@1wt.eu>, Juergen Gross <jgross@suse.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: Re: [patch 4/9] x86/io: Speedup schedule out of I/O bitmap user
Date: Fri, 8 Nov 2019 16:24:37 -0800	[thread overview]
Message-ID: <b44feb60-130a-0672-5f14-5789de57a246@kernel.org> (raw)
In-Reply-To: <20191106202806.133597409@linutronix.de>

On 11/6/19 11:35 AM, Thomas Gleixner wrote:
> There is no requirement to update the TSS I/O bitmap when a thread using it is
> scheduled out and the incoming thread does not use it.
> 
> For the permission check based on the TSS I/O bitmap the CPU calculates the memory
> location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
> of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
> offset to an address outside of the TSS limit.
> 
> If an I/O instruction is issued from user space the TSS limit causes #GP to be
> raised in the same was as valid I/O bitmap with all bits set to 1 would do.
> 
> This removes the extra work when an I/O bitmap using task is scheduled out
> and puts the burden on the rare I/O bitmap users when they are scheduled
> in.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/x86/include/asm/processor.h |   38 +++++++++++++++++--------
>  arch/x86/kernel/cpu/common.c     |    3 +
>  arch/x86/kernel/doublefault.c    |    2 -
>  arch/x86/kernel/process.c        |   59 +++++++++++++++++++++------------------
>  4 files changed, 61 insertions(+), 41 deletions(-)
> 
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -330,8 +330,23 @@ struct x86_hw_tss {
>  #define IO_BITMAP_BITS			65536
>  #define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
>  #define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
> -#define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
> -#define INVALID_IO_BITMAP_OFFSET	0x8000
> +
> +#define IO_BITMAP_OFFSET_VALID				\
> +	(offsetof(struct tss_struct, io_bitmap) -	\
> +	 offsetof(struct tss_struct, x86_tss))
> +
> +/*
> + * sizeof(unsigned long) coming from an extra "long" at the end
> + * of the iobitmap.
> + *
> + * -1? seg base+limit should be pointing to the address of the
> + * last valid byte

What's with the '?'

> + */
> +#define __KERNEL_TSS_LIMIT	\
> +	(IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> +
> +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
> +#define IO_BITMAP_OFFSET_INVALID	(__KERNEL_TSS_LIMIT + 1)
>  
>  struct entry_stack {
>  	unsigned long		words[64];
> @@ -350,6 +365,15 @@ struct tss_struct {
>  	struct x86_hw_tss	x86_tss;
>  
>  	/*
> +	 * Store the dirty size of the last io bitmap offender. The next
> +	 * one will have to do the cleanup as the switch out to a non
> +	 * io bitmap user will just set x86_tss.io_bitmap_base to a value
> +	 * outside of the TSS limit. So for sane tasks there is no need
> +	 * to actually touch the io_bitmap at all.
> +	 */
> +	unsigned int		io_bitmap_prev_max;

Hmm.  I'm wondering if a clearer way to say this would be:

io_bitmap_max_allowed_byte: offset from the start of the io bitmap to
the first byte that might contain a zero bit.  If we switch from a task
that uses ioperm() to one that does not, we will invalidate the io
bitmap by changing the offset.  The next task that starts using the io
bitmap again will need to make sure it updates all the bytes through
io_bitmap_max_allowed_byte.

But your description is okay, too.

It's worth noting that, due to Meltdown, this patch leaks the io bitmap
of io bitmap-using tasks.  I'm not sure I care.

> +
> +	/*
>  	 * The extra 1 is there because the CPU will access an
>  	 * additional byte beyond the end of the IO permission
>  	 * bitmap. The extra byte must be all 1 bits, and must
> @@ -360,16 +384,6 @@ struct tss_struct {
>  
>  DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
>  
> -/*
> - * sizeof(unsigned long) coming from an extra "long" at the end
> - * of the iobitmap.
> - *
> - * -1? seg base+limit should be pointing to the address of the
> - * last valid byte
> - */
> -#define __KERNEL_TSS_LIMIT	\
> -	(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> -
>  /* Per CPU interrupt stacks */
>  struct irq_stack {
>  	char		stack[IRQ_STACK_SIZE];
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1863,7 +1863,8 @@ void cpu_init(void)
>  
>  	/* Initialize the TSS. */
>  	tss_setup_ist(tss);
> -	tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
> +	tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
> +	tss->io_bitmap_prev_max = 0;
>  	memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap));
>  	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
>  
> --- a/arch/x86/kernel/doublefault.c
> +++ b/arch/x86/kernel/doublefault.c
> @@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cach
>  	.sp0		= STACK_START,
>  	.ss0		= __KERNEL_DS,
>  	.ldt		= 0,
> -	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
> +	.io_bitmap_base	= IO_BITMAP_OFFSET_INVALID,
>  
>  	.ip		= (unsigned long) doublefault_fn,
>  	/* 0x2 bit is always set */
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -72,18 +72,9 @@
>  #ifdef CONFIG_X86_32
>  		.ss0 = __KERNEL_DS,
>  		.ss1 = __KERNEL_CS,
> -		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
>  #endif
> +		.io_bitmap_base	= IO_BITMAP_OFFSET_INVALID,
>  	 },
> -#ifdef CONFIG_X86_32
> -	 /*
> -	  * Note that the .io_bitmap member must be extra-big. This is because
> -	  * the CPU will access an additional byte beyond the end of the IO
> -	  * permission bitmap. The extra byte must be all 1 bits, and must
> -	  * be within the limit.
> -	  */
> -	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
> -#endif
>  };
>  EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
>  
> @@ -112,18 +103,18 @@ void exit_thread(struct task_struct *tsk
>  	struct thread_struct *t = &tsk->thread;
>  	unsigned long *bp = t->io_bitmap_ptr;
>  	struct fpu *fpu = &t->fpu;
> +	struct tss_struct *tss;
>  
>  	if (bp) {
> -		struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
> +		preempt_disable();
> +		tss = this_cpu_ptr(&cpu_tss_rw);
>  
>  		t->io_bitmap_ptr = NULL;
> -		clear_thread_flag(TIF_IO_BITMAP);
> -		/*
> -		 * Careful, clear this in the TSS too:
> -		 */
> -		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
>  		t->io_bitmap_max = 0;
> -		put_cpu();
> +		clear_thread_flag(TIF_IO_BITMAP);
> +		/* Invalidate the io bitmap base in the TSS */
> +		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;

The first time I read this, I thought this code was all unnecessary.
But now I think I understand.  How about a comment:

/*
 * We're about to free the IO bitmap.  We need to disable it in the TSS
 * too so that switch_to() doesn't see an inconsistent state.
 */

In any case:

Acked-by: Andy Lutomirski <luto@kernel.org>

  parent reply	other threads:[~2019-11-09  0:24 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-06 19:34 [patch 0/9] x86/iopl: Prevent user space from using CLI/STI with iopl(3) Thomas Gleixner
2019-11-06 19:35 ` [patch 1/9] x86/ptrace: Prevent truncation of bitmap size Thomas Gleixner
2019-11-07  7:31   ` Ingo Molnar
2019-11-06 19:35 ` [patch 2/9] x86/process: Unify copy_thread_tls() Thomas Gleixner
2019-11-08 22:31   ` Andy Lutomirski
2019-11-08 23:43     ` Thomas Gleixner
2019-11-10 12:36       ` Thomas Gleixner
2019-11-10 16:56         ` Andy Lutomirski
2019-11-11  8:52           ` Peter Zijlstra
2019-11-06 19:35 ` [patch 3/9] x86/cpu: Unify cpu_init() Thomas Gleixner
2019-11-08 22:34   ` Andy Lutomirski
2019-11-06 19:35 ` [patch 4/9] x86/io: Speedup schedule out of I/O bitmap user Thomas Gleixner
2019-11-07  9:12   ` Peter Zijlstra
2019-11-07 14:04     ` Thomas Gleixner
2019-11-07 14:08       ` Thomas Gleixner
2019-11-08 22:41         ` Andy Lutomirski
2019-11-08 23:45           ` Thomas Gleixner
2019-11-09  3:32             ` Andy Lutomirski
2019-11-10 12:43               ` Thomas Gleixner
2019-11-09  0:24   ` Andy Lutomirski [this message]
2019-11-06 19:35 ` [patch 5/9] x86/ioport: Reduce ioperm impact for sane usage further Thomas Gleixner
2019-11-07  1:11   ` Linus Torvalds
2019-11-07  7:44     ` Thomas Gleixner
2019-11-07  8:25     ` Ingo Molnar
2019-11-07  9:17       ` Willy Tarreau
2019-11-07 10:00         ` Thomas Gleixner
2019-11-07 10:13           ` Willy Tarreau
2019-11-07 10:19           ` hpa
2019-11-07 10:27             ` Willy Tarreau
2019-11-07 10:50               ` hpa
2019-11-07 12:56                 ` Willy Tarreau
2019-11-07 16:45                   ` Eric W. Biederman
2019-11-07 16:53                     ` Linus Torvalds
2019-11-07 16:57                     ` Willy Tarreau
2019-11-10 17:17       ` Andy Lutomirski
2019-11-07  7:37   ` Ingo Molnar
2019-11-07  7:45     ` Thomas Gleixner
2019-11-07  8:16   ` Ingo Molnar
2019-11-07 18:02     ` Thomas Gleixner
2019-11-07 19:24   ` Brian Gerst
2019-11-07 19:54     ` Linus Torvalds
2019-11-07 21:00       ` Brian Gerst
2019-11-07 21:32         ` Thomas Gleixner
2019-11-07 23:20           ` hpa
2019-11-07 21:44         ` Linus Torvalds
2019-11-08  1:12           ` H. Peter Anvin
2019-11-08  2:12             ` Brian Gerst
2019-11-10 17:21           ` Andy Lutomirski
2019-11-06 19:35 ` [patch 6/9] x86/iopl: Fixup misleading comment Thomas Gleixner
2019-11-06 19:35 ` [patch 7/9] x86/iopl: Restrict iopl() permission scope Thomas Gleixner
2019-11-07  9:09   ` Peter Zijlstra
2019-11-10 17:26   ` Andy Lutomirski
2019-11-10 20:31     ` Thomas Gleixner
2019-11-10 21:05       ` Andy Lutomirski
2019-11-10 21:21         ` Thomas Gleixner
2019-11-06 19:35 ` [patch 8/9] x86/iopl: Remove legacy IOPL option Thomas Gleixner
2019-11-07  6:11   ` Jürgen Groß
2019-11-07  6:26     ` hpa
2019-11-07 16:44     ` Stephen Hemminger
2019-11-07  9:13   ` Peter Zijlstra
2019-11-06 19:35 ` [patch 9/9] selftests/x86/iopl: Verify that CLI/STI result in #GP Thomas Gleixner
2019-11-07  7:28 ` [patch] x86/iopl: Remove unused local variable, update comments in ksys_ioperm() Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b44feb60-130a-0672-5f14-5789de57a246@kernel.org \
    --to=luto@kernel.org \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=stephen@networkplumber.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=w@1wt.eu \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).